Source code for pkgcheck.checks.pkgdir

import os
import stat
from collections import defaultdict
from datetime import datetime

from pkgcore.ebuild.atom import MalformedAtom
from pkgcore.ebuild.atom import atom as atom_cls
from snakeoil.chksum import get_chksums
from snakeoil.osutils import listdir, pjoin, sizeof_fmt
from snakeoil.strings import pluralism

from .. import addons, results, sources
from . import Check, GentooRepoCheck

# allowed filename characters: "a-zA-Z0-9._-+:"
allowed_filename_chars = set()
allowed_filename_chars.update(chr(x) for x in range(ord("a"), ord("z") + 1))
allowed_filename_chars.update(chr(x) for x in range(ord("A"), ord("Z") + 1))
allowed_filename_chars.update(chr(x) for x in range(ord("0"), ord("9") + 1))
allowed_filename_chars.update([".", "-", "_", "+", ":"])

[docs] class MismatchedPN(results.PackageResult, results.Error): """Ebuilds that have different names than their parent directory.""" def __init__(self, ebuilds, **kwargs): super().__init__(**kwargs) self.ebuilds = tuple(ebuilds) @property def desc(self): s = pluralism(self.ebuilds) ebuilds = ", ".join(self.ebuilds) return f"mismatched package name{s}: [ {ebuilds} ]"
[docs] class InvalidPN(results.PackageResult, results.Error): """Ebuilds that have invalid package names.""" def __init__(self, ebuilds, **kwargs): super().__init__(**kwargs) self.ebuilds = tuple(ebuilds) @property def desc(self): s = pluralism(self.ebuilds) ebuilds = ", ".join(self.ebuilds) return f"invalid package name{s}: [ {ebuilds} ]"
[docs] class EqualVersions(results.PackageResult, results.Error): """Ebuilds that have equal versions. For example, cat/pn-1.0.2, cat/pn-1.0.2-r0, cat/pn-1.0.2-r00 and cat/pn-1.000.2 all have equal versions according to PMS and therefore shouldn't exist in the same repository. """ def __init__(self, versions, **kwargs): super().__init__(**kwargs) self.versions = tuple(versions) @property def desc(self): return f"equal package versions: [ {', '.join(self.versions)} ]"
[docs] class DuplicateFiles(results.PackageResult, results.Warning): """Two or more identical files in FILESDIR.""" def __init__(self, files, **kwargs): super().__init__(**kwargs) self.files = tuple(files) @property def desc(self): files = ", ".join(map(repr, self.files)) return f"duplicate identical files in FILESDIR: {files}"
[docs] class EmptyFile(results.PackageResult, results.Warning): """File in FILESDIR is empty.""" def __init__(self, filename, **kwargs): super().__init__(**kwargs) self.filename = filename @property def desc(self): return f"empty file in FILESDIR: {self.filename!r}"
[docs] class ExecutableFile(results.PackageResult, results.Warning): """File has executable bit, but doesn't need it.""" def __init__(self, filename, **kwargs): super().__init__(**kwargs) self.filename = filename @property def desc(self): return f"unnecessary executable bit: {self.filename!r}"
[docs] class UnknownPkgDirEntry(results.PackageResult, results.Warning): """Unknown files or directories in package directory. Relevant for the gentoo repo only since the spec states that a package directory may contain other files or directories. """ def __init__(self, filenames, **kwargs): super().__init__(**kwargs) self.filenames = tuple(filenames) @property def desc(self): files = ", ".join(map(repr, self.filenames)) y = pluralism(self.filenames, singular="y", plural="ies") return f"unknown entr{y}: {files}"
[docs] class SizeViolation(results.PackageResult, results.Warning): """File in $FILESDIR is too large.""" limit = 20480 # bytes → 20 KiB def __init__(self, filename, size, **kwargs): super().__init__(**kwargs) self.filename = filename self.size = size @property def desc(self): return ( f"{self.filename!r} exceeds {sizeof_fmt(self.limit)} in size; " f"{sizeof_fmt(self.size)} total" )
[docs] class TotalSizeViolation(results.PackageResult, results.Warning): """The total size of $FILESDIR is too large.""" limit = 51200 # bytes → 50 KiB def __init__(self, size, **kwargs): super().__init__(**kwargs) self.size = size @property def desc(self): return ( f"files/ directory exceeds {sizeof_fmt(self.limit)} in size; " f"{sizeof_fmt(self.size)} total" )
[docs] class BannedCharacter(results.PackageResult, results.Error): """File or directory name doesn't abide by GLEP 31 requirements. See the official GLEP 31 documentation [#]_ for details. .. [#] """ def __init__(self, filename, chars, **kwargs): super().__init__(**kwargs) self.filename = filename self.chars = tuple(chars) @property def desc(self): s = pluralism(self.chars) chars = ", ".join(map(repr, self.chars)) return f"filename {self.filename!r} character{s} outside allowed set: {chars}"
[docs] class InvalidUTF8(results.PackageResult, results.Error): """File isn't UTF-8 compliant.""" def __init__(self, filename, err, **kwargs): super().__init__(**kwargs) self.filename = filename self.err = err @property def desc(self): return f"invalid UTF-8: {self.err}: {self.filename!r}"
[docs] class PkgDirCheck(Check): """Scan ebuild directory for various file-related issues.""" _source = (sources.PackageRepoSource, (), (("source", sources.RawRepoSource),)) ignore_dirs = frozenset(["cvs", ".svn", ".bzr"]) required_addons = (addons.git.GitAddon,) known_results = frozenset( [ DuplicateFiles, EmptyFile, ExecutableFile, UnknownPkgDirEntry, SizeViolation, BannedCharacter, InvalidUTF8, MismatchedPN, InvalidPN, TotalSizeViolation, ] ) # TODO: put some 'preferred algorithms by purpose' into snakeoil? digest_algo = "sha256" def __init__(self, *args, git_addon): super().__init__(*args) self.gitignored = git_addon.gitignored
[docs] def feed(self, pkgset): pkg = pkgset[0] pkg_path = pjoin(self.options.target_repo.location, pkg.category, pkg.package) ebuild_ext = ".ebuild" mismatched = [] invalid = [] unknown = [] # note we don't use os.walk, we need size info also for filename in listdir(pkg_path): path = pjoin(pkg_path, filename) if self.gitignored(path): continue if os.path.isfile(path) and os.stat(path).st_mode & 0o111: yield ExecutableFile(filename, pkg=pkg) # While this may seem odd, written this way such that the filtering # happens all in the genexp. If the result was being handed to any, # it's a frame switch each char, which adds up. if banned_chars := set(filename) - allowed_filename_chars: yield BannedCharacter(filename, sorted(banned_chars), pkg=pkg) if filename.endswith(ebuild_ext): try: with open(path, mode="rb") as f: except UnicodeDecodeError as e: yield InvalidUTF8(filename, str(e), pkg=pkg) pkg_name = os.path.basename(filename[: -len(ebuild_ext)]) try: pkg_atom = atom_cls(f"={pkg.category}/{pkg_name}") if pkg_atom.package != os.path.basename(pkg_path): mismatched.append(pkg_name) except MalformedAtom: invalid.append(pkg_name) elif self.options.gentoo_repo and filename not in ("Manifest", "metadata.xml", "files"): unknown.append(filename) if mismatched: yield MismatchedPN(sorted(mismatched), pkg=pkg) if invalid: yield InvalidPN(sorted(invalid), pkg=pkg) if unknown: yield UnknownPkgDirEntry(sorted(unknown), pkg=pkg) files_by_size = defaultdict(list) pkg_path_len = len(pkg_path) + 1 total_size = 0 for root, dirs, files in os.walk(pjoin(pkg_path, "files")): # don't visit any ignored directories for d in self.ignore_dirs.intersection(dirs): dirs.remove(d) base_dir = root[pkg_path_len:] for filename in files: path = pjoin(root, filename) if self.gitignored(path): continue file_stat = os.lstat(path) if stat.S_ISREG(file_stat.st_mode): if file_stat.st_mode & 0o111: yield ExecutableFile(pjoin(base_dir, filename), pkg=pkg) if file_stat.st_size == 0: yield EmptyFile(pjoin(base_dir, filename), pkg=pkg) else: files_by_size[file_stat.st_size].append(pjoin(base_dir, filename)) total_size += file_stat.st_size if file_stat.st_size > SizeViolation.limit: yield SizeViolation( pjoin(base_dir, filename), file_stat.st_size, pkg=pkg ) if banned_chars := set(filename) - allowed_filename_chars: yield BannedCharacter( pjoin(base_dir, filename), sorted(banned_chars), pkg=pkg ) if total_size > TotalSizeViolation.limit: yield TotalSizeViolation(total_size, pkg=pkg) files_by_digest = defaultdict(list) for size, files in files_by_size.items(): if len(files) > 1: for f in files: digest = get_chksums(pjoin(pkg_path, f), self.digest_algo)[0] files_by_digest[digest].append(f) for digest, files in files_by_digest.items(): if len(files) > 1: yield DuplicateFiles(sorted(files), pkg=pkg)
[docs] class EqualVersionsCheck(Check): """Scan package ebuilds for semantically equal versions.""" _source = sources.PackageRepoSource known_results = frozenset([EqualVersions])
[docs] def feed(self, pkgset): equal_versions = defaultdict(set) sorted_pkgset = sorted(pkgset) for i, pkg_a in enumerate(sorted_pkgset): try: pkg_b = sorted_pkgset[i + 1] except IndexError: break if pkg_a.versioned_atom == pkg_b.versioned_atom: equal_versions[pkg_a.versioned_atom].update([pkg_a.fullver, pkg_b.fullver]) for pkg, versions in equal_versions.items(): yield EqualVersions(sorted(versions), pkg=pkg)
[docs] class LiveOnlyPackage(results.PackageResult, results.Warning): """Package has only had VCS-based ebuilds.""" def __init__(self, age, **kwargs): super().__init__(**kwargs) self.age = int(age) @property def desc(self): if self.age < 365: return f"all versions are VCS-based added over {self.age} days ago" years = round(self.age / 365, 2) return f"all versions are VCS-based added over {years} years ago"
[docs] class LiveOnlyCheck(GentooRepoCheck): """Scan for packages with only live versions.""" _source = sources.PackageRepoSource required_addons = (addons.git.GitAddon,) known_results = frozenset([LiveOnlyPackage]) def __init__(self, *args, git_addon): super().__init__(*args) = self.added_repo = git_addon.cached_repo(addons.git.GitAddedRepo)
[docs] def feed(self, pkgset): if all( for pkg in pkgset): # assume highest package version is most recently committed pkg = pkgset[0] if len(pkgset) == 1 else sorted(pkgset)[-1] try: match = next(self.added_repo.itermatch(pkg.versioned_atom)) except StopIteration: # probably an uncommitted package return added = datetime.fromtimestamp(match.time) days_old = ( - added).days yield LiveOnlyPackage(days_old, pkg=pkg)