Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| import sys | |
| import pkg_resources | |
| import requests | |
| from pathlib import Path | |
| import json | |
| from typing import Dict, List, Optional, Set, Tuple | |
| import configparser | |
| import re | |
| from dataclasses import dataclass | |
| class PackageLicense: | |
| name: str | |
| version: Optional[str] | |
| license_type: Optional[str] | |
| is_authorized: bool | |
| reason: str | |
| class LicenseChecker: | |
| def __init__( | |
| self, config_file: Path = Path("./tests/code_coverage_tests/liccheck.ini") | |
| ): | |
| if not config_file.exists(): | |
| print(f"Error: Config file {config_file} not found") | |
| sys.exit(1) | |
| self.config = configparser.ConfigParser(allow_no_value=True) | |
| self.config.read(config_file) | |
| # Initialize license sets | |
| self.authorized_licenses = self._parse_license_list( | |
| "Licenses", "authorized_licenses" | |
| ) | |
| self.unauthorized_licenses = self._parse_license_list( | |
| "Licenses", "unauthorized_licenses" | |
| ) | |
| # Parse authorized packages | |
| self.authorized_packages = self._parse_authorized_packages() | |
| # Initialize cache | |
| self.cache_file = Path("license_cache.json") | |
| self.license_cache: Dict[str, str] = {} | |
| if self.cache_file.exists(): | |
| with open(self.cache_file) as f: | |
| self.license_cache = json.load(f) | |
| # Track package results | |
| self.package_results: List[PackageLicense] = [] | |
| def _parse_license_list(self, section: str, option: str) -> Set[str]: | |
| """Parse license list from config, handling comments and whitespace.""" | |
| if not self.config.has_option(section, option): | |
| return set() | |
| licenses = set() | |
| for line in self.config.get(section, option).split("\n"): | |
| line = line.strip().lower() | |
| if line and not line.startswith("#"): | |
| licenses.add(line) | |
| return licenses | |
| def _parse_authorized_packages(self) -> Dict[str, Dict[str, str]]: | |
| """Parse authorized packages with their version specs and comments.""" | |
| authorized = {} | |
| if self.config.has_section("Authorized Packages"): | |
| for package, spec in self.config.items("Authorized Packages"): | |
| if not package.startswith("#"): | |
| package = package.strip().lower() | |
| parts = spec.split("#", 1) | |
| version_spec = parts[0].strip() | |
| comment = parts[1].strip() if len(parts) > 1 else "" | |
| authorized[package] = { | |
| "version_spec": version_spec, | |
| "comment": comment, | |
| } | |
| return authorized | |
| def get_package_license_from_pypi( | |
| self, package_name: str, version: str | |
| ) -> Optional[str]: | |
| """Fetch license information for a package from PyPI.""" | |
| try: | |
| url = f"https://pypi.org/pypi/{package_name}/{version}/json" | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| return data.get("info", {}).get("license") | |
| except Exception as e: | |
| print( | |
| f"Warning: Failed to fetch license for {package_name} {version}: {str(e)}" | |
| ) | |
| return None | |
| def is_license_acceptable(self, license_str: str) -> Tuple[bool, str]: | |
| """Check if a license is acceptable based on configured lists.""" | |
| if not license_str: | |
| return False, "Unknown license" | |
| # Normalize license string to handle common variations | |
| normalized_license = license_str.lower() | |
| normalized_license = normalized_license.replace("-", " ").replace("_", " ") | |
| # Special case for BSD licenses | |
| if "bsd" in normalized_license: | |
| if any( | |
| variation in normalized_license | |
| for variation in ["3 clause", "3-clause", "new", "simplified"] | |
| ): | |
| return True, "Matches authorized license: BSD 3-Clause" | |
| # Check unauthorized licenses first | |
| for unauth in self.unauthorized_licenses: | |
| if unauth in normalized_license: | |
| return False, f"Matches unauthorized license: {unauth}" | |
| # Then check authorized licenses | |
| for auth in self.authorized_licenses: | |
| if auth in normalized_license: | |
| return True, f"Matches authorized license: {auth}" | |
| return False, "License not in authorized list" | |
| def check_package(self, package_name: str, version: Optional[str] = None) -> bool: | |
| """Check if a specific package version is compliant.""" | |
| package_lower = package_name.lower() | |
| # Check if package is in authorized packages list | |
| if package_lower in self.authorized_packages: | |
| pkg_info = self.authorized_packages[package_lower] | |
| # If there's a comment, consider it manually verified | |
| if pkg_info.get("comment"): | |
| result = PackageLicense( | |
| name=package_name, | |
| version=version, | |
| license_type=pkg_info["comment"], | |
| is_authorized=True, | |
| reason="Manually verified in config", | |
| ) | |
| self.package_results.append(result) | |
| print(f"✅ {package_name}: Manually verified - {pkg_info['comment']}") | |
| return True | |
| # If no comment, proceed with license check but package is considered authorized | |
| license_type = self.get_package_license_from_pypi( | |
| package_name, version or "" | |
| ) | |
| if license_type: | |
| is_acceptable, reason = self.is_license_acceptable(license_type) | |
| result = PackageLicense( | |
| name=package_name, | |
| version=version, | |
| license_type=license_type, | |
| is_authorized=True, # Package is authorized even if license check fails | |
| reason=f"Listed in authorized packages - {license_type}", | |
| ) | |
| self.package_results.append(result) | |
| print( | |
| f"✅ {package_name}: {license_type} (Listed in authorized packages)" | |
| ) | |
| return True | |
| # If package is not authorized or authorization check failed, proceed with normal license check | |
| cache_key = f"{package_name}:{version}" if version else package_name | |
| if cache_key in self.license_cache: | |
| license_type = self.license_cache[cache_key] | |
| else: | |
| license_type = self.get_package_license_from_pypi( | |
| package_name, version or "" | |
| ) | |
| if license_type: | |
| self.license_cache[cache_key] = license_type | |
| if not license_type: | |
| result = PackageLicense( | |
| name=package_name, | |
| version=version, | |
| license_type=None, | |
| is_authorized=False, | |
| reason="Could not determine license", | |
| ) | |
| self.package_results.append(result) | |
| print(f"⚠️ Warning: Could not determine license for {package_name}") | |
| return False | |
| is_acceptable, reason = self.is_license_acceptable(license_type) | |
| result = PackageLicense( | |
| name=package_name, | |
| version=version, | |
| license_type=license_type, | |
| is_authorized=is_acceptable, | |
| reason=reason, | |
| ) | |
| self.package_results.append(result) | |
| if is_acceptable: | |
| print(f"✅ {package_name}: {license_type}") | |
| else: | |
| print(f"❌ {package_name}: {license_type} - {reason}") | |
| return is_acceptable | |
| def check_requirements(self, requirements_file: Path) -> bool: | |
| """Check all packages in a requirements file.""" | |
| print(f"\nChecking licenses for packages in {requirements_file}...") | |
| try: | |
| with open(requirements_file) as f: | |
| requirements = [ | |
| pkg_resources.Requirement.parse(line) | |
| for line in f | |
| if line.strip() and not line.startswith("#") | |
| ] | |
| except Exception as e: | |
| print(f"Error parsing {requirements_file}: {str(e)}") | |
| return False | |
| all_compliant = True | |
| for req in requirements: | |
| try: | |
| version = next(iter(req.specs))[1] if req.specs else None | |
| except Exception: | |
| version = None | |
| if not self.check_package(req.name, version): | |
| all_compliant = False | |
| # Save updated cache | |
| with open(self.cache_file, "w") as f: | |
| json.dump(self.license_cache, f, indent=2) | |
| return all_compliant | |
| def main(): | |
| # req_file = "../../requirements.txt" ## LOCAL TESTING | |
| req_file = "./requirements.txt" | |
| checker = LicenseChecker() | |
| # Check requirements | |
| if not checker.check_requirements(Path(req_file)): | |
| # Get lists of problematic packages | |
| unverified = [p for p in checker.package_results if not p.license_type] | |
| invalid = [ | |
| p for p in checker.package_results if p.license_type and not p.is_authorized | |
| ] | |
| # Print detailed information about problematic packages | |
| if unverified: | |
| print("\n❌ Packages with unknown licenses:") | |
| for pkg in unverified: | |
| version_str = f" ({pkg.version})" if pkg.version else "" | |
| print(f"- {pkg.name}{version_str}") | |
| if invalid: | |
| print("\n❌ Packages with unauthorized licenses:") | |
| for pkg in invalid: | |
| version_str = f" ({pkg.version})" if pkg.version else "" | |
| print(f"- {pkg.name}{version_str}: {pkg.license_type}") | |
| # Only error if there are packages that aren't explicitly authorized | |
| unhandled_packages = [ | |
| p | |
| for p in (unverified + invalid) | |
| if p.name.lower() not in checker.authorized_packages | |
| ] | |
| if unhandled_packages: | |
| print("\n❌ Error: Found packages that need verification:") | |
| for pkg in unhandled_packages: | |
| version_str = f" ({pkg.version})" if pkg.version else "" | |
| license_str = ( | |
| f" - {pkg.license_type}" | |
| if pkg.license_type | |
| else " - Unknown license" | |
| ) | |
| print(f"- {pkg.name}{version_str}{license_str}") | |
| print( | |
| "\nAdd these packages to the [Authorized Packages] section in liccheck.ini with a comment about their license verification." | |
| ) | |
| print("Example:") | |
| print("package-name: >=1.0.0 # MIT license manually verified") | |
| sys.exit(1) | |
| else: | |
| print("\n✅ All dependencies have acceptable licenses.") | |
| sys.exit(0) | |
| if __name__ == "__main__": | |
| main() | |