Source code for pkgcore.sync.http

__all__ = ("http_syncer",)

import errno
import os
import ssl
import sys
import urllib.request

from snakeoil.fileutils import AtomicWriteFile, readfile_ascii
from snakeoil.osutils import pjoin

from ..log import logger
from . import base


[docs] class http_syncer(base.Syncer): """Syncer that fetches files over HTTP(S).""" forcable = True def __init__(self, basedir, uri, dest=None, **kwargs): self.basename = os.path.basename(uri) super().__init__(basedir, uri, **kwargs) def _sync(self, verbosity, force=False, **kwargs): dest = self._pre_download() if self.uri.lower().startswith("https://"): # default to using system ssl certs context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) else: context = None headers = {} etag_path = pjoin(self.basedir, ".etag") modified_path = pjoin(self.basedir, ".modified") if not force: # use cached ETag to check if updates exist previous_etag = readfile_ascii(etag_path, none_on_missing=True) if previous_etag: headers["If-None-Match"] = previous_etag # use cached modification timestamp to check if updates exist previous_modified = readfile_ascii(modified_path, none_on_missing=True) if previous_modified: headers["If-Modified-Since"] = previous_modified req = urllib.request.Request(self.uri, headers=headers, method="GET") # TODO: add customizable timeout try: resp = urllib.request.urlopen(req, context=context) except urllib.error.URLError as e: if e.getcode() == 304: # Not Modified logger.debug("content is unchanged") return True raise base.SyncError(f"failed fetching {self.uri!r}: {e.reason}") from e # Manually check cached values ourselves since some servers appear to # ignore If-None-Match or If-Modified-Since headers. convert = lambda x: x.strip() if x else None etag = resp.getheader("ETag") modified = resp.getheader("Last-Modified") if not force: if etag is not None and convert(etag) == convert(previous_etag): logger.debug(f"etag {etag} is equal, no update available") return True if modified is not None and convert(modified) == convert(previous_modified): logger.debug(f"header mtime is unmodified: {modified}") return True try: os.makedirs(self.basedir, exist_ok=True) except OSError as e: raise base.SyncError( f"failed creating repo dir {self.basedir!r}: {e.strerror}" ) from e length = resp.getheader("content-length") if length: length = int(length) blocksize = max(4096, length // 100) else: blocksize = 1000000 try: self._download = AtomicWriteFile(dest, binary=True, perms=0o644) except OSError as e: raise base.PathError(self.basedir, e.strerror) from e # retrieve the file while providing simple progress output size = 0 while True: buf = resp.read(blocksize) if not buf: if length: sys.stdout.write("\n") break self._download.write(buf) size += len(buf) if length: sys.stdout.write("\r") progress = "=" * int(size / length * 50) percent = int(size / length * 100) sys.stdout.write("[%-50s] %d%%" % (progress, percent)) sys.stdout.flush() self._post_download(dest) # TODO: store this in pkgcore cache dir instead? # update cached ETag/Last-Modified values if etag: with open(etag_path, "w") as f: f.write(etag) if modified: with open(modified_path, "w") as f: f.write(modified) return True def _pre_download(self): """Pre-download initialization. Returns file path to download file to. """ return pjoin(self.basedir, self.basename) def _post_download(self, path): """Post-download file processing. Args: path (str): path to downloaded file """ # atomically create file self._download.close()