Source code for snakeoil.bash

"""Functionality for reading bash like files

Please note that while this functionality can do variable interpolation,
it strictly treats the source as non-executable code.  It cannot parse
subshells, variable additions, etc.

Its primary usage is for reading things like gentoo make.conf's, or
libtool .la files that are bash compatible, but non-executable.
"""

from shlex import shlex

from .demandload import demand_compile_regexp
from .fileutils import readlines
from .log import logger
from .mappings import ProtectedDict

demand_compile_regexp("line_cont_regexp", r"^(.*[^\\]|)\\$")
demand_compile_regexp("inline_comment_regexp", r"^.*\s#.*$")
demand_compile_regexp("var_find", r"\\?(\${\w+}|\$\w+)")
demand_compile_regexp("backslash_find", r"\\.")
demand_compile_regexp("ansi_escape_re", r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]")

__all__ = (
    "iter_read_bash",
    "read_bash",
    "read_dict",
    "read_bash_dict",
    "bash_parser",
    "BashParseError",
)



[docs]
def iter_read_bash(
    bash_source, allow_inline_comments=True, allow_line_cont=False, enum_line=False
):
    """Iterate over a file honoring bash commenting rules and line continuations.

    Note that it's considered good behaviour to close filehandles, as
    such, either iterate fully through this, or use read_bash instead.
    Once the file object is no longer referenced the handle will be
    closed, but be proactive instead of relying on the garbage
    collector.

    :param bash_source: either a file to read from
        or a string holding the filename to open.
    :param allow_inline_comments: whether or not to prune characters
        after a # that isn't at the start of a line.
    :param allow_line_cont: whether or not to respect line continuations
    :return: yields lines w/ commenting stripped out
    """
    if isinstance(bash_source, str):
        bash_source = readlines(bash_source, True)
    s = ""
    for lineno, line in enumerate(bash_source, 1):
        if allow_line_cont and s:
            s += line
        else:
            s = line.lstrip()

        if s:
            if s[0] != "#":
                if allow_inline_comments:
                    if not allow_line_cont or (
                        allow_line_cont and inline_comment_regexp.match(line)
                    ):
                        s = s.split("#", 1)[0].rstrip()
                if allow_line_cont and line_cont_regexp.match(line):
                    s = s.rstrip("\\\n")
                    continue
                if enum_line:
                    yield lineno, s.rstrip()
                else:
                    yield s.rstrip()
            s = ""
    if s:
        if enum_line:
            yield lineno, s
        else:
            yield s




[docs]
def read_bash(*args, **kwargs):
    """Read a file honoring bash commenting rules.

    See :py:func:`iter_read_bash` for parameter details.

    Returns a list of lines w/ comments stripped out.
    """
    return list(iter_read_bash(*args, **kwargs))




[docs]
def read_bash_dict(bash_source, vars_dict=None, sourcing_command=None):
    """Read bash source, yielding a dict of vars.

    :param bash_source: either a file to read from
        or a string holding the filename to open
    :param vars_dict: initial 'env' for the sourcing.
        Is protected from modification.
    :type vars_dict: dict or None
    :param sourcing_command: controls whether a source command exists.
        If one does and is encountered, then this func is called.
    :type sourcing_command: callable
    :raise BashParseError: thrown if invalid syntax is encountered.
    :return: dict representing the resultant env if bash executed the source.
    """

    # quite possibly I'm missing something here, but the original
    # portage_util getconfig/varexpand seemed like it only went
    # halfway. The shlex posix mode *should* cover everything.

    if vars_dict is not None:
        d, protected = ProtectedDict(vars_dict), True
    else:
        d, protected = {}, False

    close = False
    infile = None
    if isinstance(bash_source, str):
        f = open(bash_source, "r")
        close = True
        infile = bash_source
    else:
        f = bash_source
    s = bash_parser(f, sourcing_command=sourcing_command, env=d, infile=infile)

    try:
        tok = ""
        try:
            while tok is not None:
                key = s.get_token()
                if key == "export":
                    # discard 'export' token from "export VAR=VALUE" lines
                    key = s.get_token()
                if key is None:
                    break
                elif key.isspace():
                    # we specifically have to check this, since we're
                    # screwing with the whitespace filters below to
                    # detect empty assigns
                    continue
                eq = s.get_token()
                if eq != "=":
                    raise BashParseError(
                        bash_source, s.lineno, "got token %r, was expecting '='" % eq
                    )
                val = s.get_token()
                if val is None:
                    val = ""
                elif val == "export":
                    val = s.get_token()
                # look ahead to see if we just got an empty assign.
                next_tok = s.get_token()
                if next_tok == "=":
                    # ... we did.
                    # leftmost insertions, thus reversed ordering
                    s.push_token(next_tok)
                    s.push_token(val)
                    val = ""
                else:
                    s.push_token(next_tok)
                d[key] = val
        except ValueError as e:
            raise BashParseError(bash_source, s.lineno, str(e)) from e
    finally:
        if close and f is not None:
            f.close()
    if protected:
        d = d.new
    return d




[docs]
def read_dict(
    bash_source,
    splitter="=",
    source_isiter=False,
    allow_inline_comments=True,
    strip=False,
    filename=None,
    ignore_errors=False,
):
    """Read key value pairs from a file, ignoring bash-style comments.

    :param splitter: the string to split on.  Can be None to
        default to str.split's default
    :param bash_source: either a file to read from,
        or a string holding the filename to open.
    :param allow_inline_comments: whether or not to prune characters
        after a # that isn't at the start of a line.
    :param ignore_errors: parse errors are logged instead of raised
    :raise: :py:class:`BashParseError` if there are parse errors found.
    """
    d = {}
    if not source_isiter:
        filename = bash_source
        i = iter_read_bash(bash_source, allow_inline_comments=allow_inline_comments)
    else:
        if filename is None:
            # XXX what to do?
            filename = "<unknown>"
        i = bash_source
    line_count = 0
    try:
        for k in i:
            line_count += 1
            try:
                k, v = k.split(splitter, 1)
            except ValueError as e:
                if filename == "<unknown>":
                    filename = getattr(bash_source, "name", bash_source)
                if ignore_errors:
                    logger.error(
                        "bash parse error in %r, line %s", filename, line_count
                    )
                    continue
                else:
                    raise BashParseError(filename, line_count) from e
            if strip:
                k, v = k.strip(), v.strip()
            if len(v) > 2 and v[0] == v[-1] and v[0] in ("'", '"'):
                v = v[1:-1]
            d[k] = v
    finally:
        del i
    return d



def _nuke_backslash(s):
    s = s.group()
    if s == "\\\n":
        return "\n"
    try:
        return chr(ord(s))
    except TypeError:
        return s[1]



[docs]
class bash_parser(shlex):
    """Fixed up shlex version for bash parsing.

    Corrects corner cases in quote expansion and adds variable interpolation.
    While it's a fair bit slower than stdlib shlex, it parses a more complete
    subset of bash syntax than stdlib shlex.
    """

    def __init__(self, source, sourcing_command=None, env=None, infile=None):
        """
        :param source: file handle to read from
        :param sourcing_command: token to treat as an include command
        :type sourcing_command: either None, or a string; if None, no includes
            are allowed in this parsing
        :param env: initial environment to use for variable interpolation
        :type env: must be a mapping; if None, an empty dict is used
        """
        self.__dict__["state"] = " "
        super().__init__(source, posix=True, infile=infile)
        self.wordchars += "@${}/.-+/:~^*"
        self.wordchars = frozenset(self.wordchars)
        if sourcing_command is not None:
            self.source = sourcing_command
        if env is None:
            env = {}
        self.env = env
        self.__pos = 0

    def __setattr__(self, attr, val):
        if attr == "state":
            if (self.state, val) in (('"', "a"), ("a", '"'), ("a", " "), ("'", "a")):
                strl = len(self.token)
                if self.__pos != strl:
                    self.changed_state.append((self.state, self.token[self.__pos :]))
                self.__pos = strl
        self.__dict__[attr] = val


[docs]
    def sourcehook(self, newfile):
        try:
            return super().sourcehook(newfile)
        except IOError as e:
            raise BashParseError(newfile, 0, str(e)) from e



[docs]
    def read_token(self):
        self.changed_state = []
        self.__pos = 0
        token = super().read_token()
        if token is None:
            return token
        if self.state is None:
            # eof reached.
            self.changed_state.append((self.state, token[self.__pos :]))
        else:
            self.changed_state.append((self.state, self.token[self.__pos :]))
        tok = ""
        for s, t in self.changed_state:
            if s in ('"', "a"):
                tok += self.var_expand(t).replace("\\\n", "")
            else:
                tok += t
        return tok



[docs]
    def var_expand(self, val):
        prev, pos = 0, 0
        l = []
        while match := var_find.search(val, pos):
            pos = match.start()
            if val[pos] == "\\":
                # it's escaped. either it's \\$ or \\${ , either way,
                # skipping two ahead handles it.
                pos += 2
            else:
                var = val[match.start() : match.end()].strip("${}")
                if prev != pos:
                    l.append(val[prev:pos])
                if var in self.env:
                    if not isinstance(self.env[var], str):
                        raise ValueError(
                            "env key %r must be a string, not %s: %r"
                            % (var, type(self.env[var]), self.env[var])
                        )
                    l.append(self.env[var])
                else:
                    l.append("")
                prev = pos = match.end()

        # do \\ cleansing, collapsing val down also.
        val = backslash_find.sub(_nuke_backslash, "".join(l) + val[prev:])
        return val





[docs]
class BashParseError(Exception):
    """Exception thrown when a handle being parsed isn't valid bash."""

    def __init__(self, filename, line, errmsg=None):
        if errmsg is not None:
            super().__init__(
                "error parsing '%s' on or before line %i: err %s"
                % (filename, line, errmsg)
            )
        else:
            super().__init__(
                "error parsing '%s' on or before line %i" % (filename, line)
            )
        self.file, self.line, self.errmsg = filename, line, errmsg