Source code for pkgcore.cache

"""
cache subsystem, typically used for storing package metadata
"""

__all__ = ("base", "bulk")

import math
import operator
import os
from functools import partial

from snakeoil import klass
from snakeoil.chksum import get_handler
from snakeoil.mappings import ProtectedDict

from ..ebuild.const import metadata_keys
from . import errors


[docs] class base: # this is for metadata/cache transfer. # basically flags the cache needs be updated when transfered cache to cache. # leave this. """ :ivar autocommits: Controls whether the template commits every update, or queues up updates. :ivar cleanse_keys: Boolean controlling whether the template should drop empty keys for storing. """ autocommits = False cleanse_keys = False default_sync_rate = 1 chf_type = "mtime" eclass_chf_types = ("mtime",) eclass_splitter = "\t" default_keys = metadata_keys frozen = klass.alias_attr("readonly") def __init__(self, auxdbkeys=None, readonly=False): """ initialize the derived class; specifically, store label/keys :param auxdbkeys: sequence of allowed keys for each cache entry :param readonly: defaults to False, controls whether the cache is mutable. """ if auxdbkeys is None: auxdbkeys = self.default_keys self._known_keys = frozenset(auxdbkeys) self._chf_key = "_%s_" % self.chf_type self._chf_serializer = self._get_chf_serializer(self.chf_type) self._chf_deserializer = self._get_chf_deserializer(self.chf_type) self._known_keys |= frozenset([self._chf_key]) self._cdict_kls = dict self.readonly = readonly self.set_sync_rate(self.default_sync_rate) self.updates = 0 @staticmethod def _eclassdir_serializer(data): return os.path.dirname(data.path) @staticmethod def _mtime_serializer(data): return "%.0f" % math.floor(data.mtime) @staticmethod def _default_serializer(chf, data): # Skip the leading 0x... getter = operator.attrgetter(chf) return get_handler(chf).long2str(getter(data)) def _get_chf_serializer(self, chf): if chf == "eclassdir": return self._eclassdir_serializer if chf == "mtime": return self._mtime_serializer return partial(self._default_serializer, chf) @staticmethod def _mtime_deserializer(data): return int(math.floor(float(data))) @staticmethod def _default_deserializer(data): return int(data, 16) def _get_chf_deserializer(self, chf): if chf == "eclassdir": return str elif chf == "mtime": return self._mtime_deserializer return self._default_deserializer @klass.jit_attr def eclass_chf_serializers(self): return tuple(self._get_chf_serializer(chf) for chf in self.eclass_chf_types) @klass.jit_attr def eclass_chf_deserializers(self): l = [] for chf in self.eclass_chf_types: l.append((chf, self._get_chf_deserializer(chf))) return tuple(l) def _sync_if_needed(self, increment=False): if self.autocommits: return if increment: self.updates += 1 if self.updates >= self.sync_rate: self.commit() self.updates = 0 def __getitem__(self, cpv): """set a cpv to values This shouldn't be overridden in derived classes since it handles the __eclasses__ conversion. That said, if the class handles it, they can override it. """ self._sync_if_needed() d = self._getitem(cpv) if "_eclasses_" in d: d["_eclasses_"] = self.reconstruct_eclasses(cpv, d["_eclasses_"]) return d def _getitem(self, cpv): """get cpv's values. override this in derived classess. """ raise NotImplementedError def __setitem__(self, cpv, values): """set a cpv to values This shouldn't be overridden in derived classes since it handles the readonly checks. """ if self.readonly: raise errors.ReadOnly() d = ProtectedDict(values) if self.cleanse_keys: for k in d.keys(): if not d[k]: del d[k] if "_eclasses_" in values: d["_eclasses_"] = self.deconstruct_eclasses(d["_eclasses_"]) elif "_eclasses_" in values: d["_eclasses_"] = self.deconstruct_eclasses(d["_eclasses_"]) d[self._chf_key] = self._chf_serializer(d.pop("_chf_")) self._setitem(cpv, d) self._sync_if_needed(True) def _setitem(self, name, values): """__setitem__ calls this after readonly checks. override it in derived classes. note _eclasses_ key *must* be handled. """ raise NotImplementedError def __delitem__(self, cpv): """delete a key from the cache. This shouldn't be overridden in derived classes since it handles the readonly checks. """ if self.readonly: raise errors.ReadOnly() self._delitem(cpv) self._sync_if_needed(True) def _delitem(self, cpv): """__delitem__ calls this after readonly checks. override it in derived classes. """ raise NotImplementedError def __contains__(self, cpv): raise NotImplementedError
[docs] def has_key(self, cpv): return cpv in self
[docs] def keys(self): raise NotImplementedError
def __iter__(self): return self.keys()
[docs] def items(self): for x in self.keys(): yield (x, self[x])
[docs] def clear(self): for key in list(self): del self[key]
[docs] def set_sync_rate(self, rate=0): self.sync_rate = rate if rate == 0: self.commit()
[docs] def commit(self, force=False): if not self.autocommits: raise NotImplementedError
[docs] def deconstruct_eclasses(self, eclass_dict): """takes a dict, returns a string representing said dict""" l = [] converters = self.eclass_chf_serializers for eclass, data in eclass_dict.items(): l.append(eclass) l.extend(f(data) for f in converters) return self.eclass_splitter.join(l)
def _deserialize_eclass_chfs(self, data): data = zip(self.eclass_chf_deserializers, data) for (chf, convert), item in data: yield chf, convert(item)
[docs] def reconstruct_eclasses(self, cpv, eclass_string): """Turn a string from :obj:`serialize_eclasses` into a dict.""" if not isinstance(eclass_string, str): raise TypeError("eclass_string must be basestring, got %r" % eclass_string) eclass_data = eclass_string.strip().split(self.eclass_splitter) if eclass_data == [""]: # occasionally this occurs in the fs backends. they suck. return [] l = len(eclass_data) chf_funcs = self.eclass_chf_deserializers tuple_len = len(chf_funcs) + 1 if len(eclass_data) % tuple_len: raise errors.CacheCorruption( cpv, f"_eclasses_ was of invalid len {len(eclass_data)}" f"(must be mod {tuple_len})", ) i = iter(eclass_data) # roughly; deserializer grabs the values it needs, resulting # in a sequence of key/tuple pairs for each block of chfs; # this is in turn fed into the dict kls which converts it # to the dict. # Finally, the first item, and that chain, is zipped into # a dict; in effect, if 2 chfs, this results in a stream of- # (eclass_name, ((chf1,chf1_val), (chf2, chf2_val))). try: return [(eclass, tuple(self._deserialize_eclass_chfs(i))) for eclass in i] except ValueError as e: raise errors.CacheCorruption( cpv, f"ValueError reading {eclass_string!r}" ) from e
[docs] def validate_entry(self, cache_item, ebuild_hash_item, eclass_db): chf_hash = cache_item.get(self._chf_key) if chf_hash is None or chf_hash != getattr( ebuild_hash_item, self.chf_type, None ): return False eclass_data = cache_item.get("_eclasses_") if eclass_data is None: return True # if the INHERIT key is missing yet we did inherit some eclasses, # trigger a refresh to upgrade metadata cache if cache_item.get("INHERIT") is None: return False update = eclass_db.rebuild_cache_entry(eclass_data) if update is None: return False cache_item["_eclasses_"] = update return True
[docs] class bulk(base): default_sync_rate = 100 def __init__(self, *args, **kwds): super().__init__(*args, **kwds) self._pending_updates = [] @klass.jit_attr def data(self): return self._read_data() def _read_data(self): raise NotImplementedError(self, "_read_data") def _write_data(self): raise NotImplementedError(self, "_write_data") def __contains__(self, key): return key in self.data def _getitem(self, key): return self.data[key] def _setitem(self, key, val): known = self._known_keys val = self._cdict_kls((k, v) for k, v in val.items() if k in known) self._pending_updates.append((key, val)) self.data[key] = val def _delitem(self, key): del self.data[key] self._pending_updates.append((key, None))
[docs] def commit(self, force=False): if self._pending_updates or force: self._write_data() self._pending_updates = []