"""Functionality for reading bash like files
Please note that while this functionality can do variable interpolation,
it strictly treats the source as non-executable code. It cannot parse
subshells, variable additions, etc.
Its primary usage is for reading things like gentoo make.conf's, or
libtool .la files that are bash compatible, but non-executable.
"""
from shlex import shlex
from .demandload import demand_compile_regexp
from .fileutils import readlines
from .log import logger
from .mappings import ProtectedDict
demand_compile_regexp("line_cont_regexp", r"^(.*[^\\]|)\\$")
demand_compile_regexp("inline_comment_regexp", r"^.*\s#.*$")
demand_compile_regexp("var_find", r"\\?(\${\w+}|\$\w+)")
demand_compile_regexp("backslash_find", r"\\.")
demand_compile_regexp("ansi_escape_re", r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]")
__all__ = (
"iter_read_bash",
"read_bash",
"read_dict",
"read_bash_dict",
"bash_parser",
"BashParseError",
)
[docs]
def iter_read_bash(
bash_source, allow_inline_comments=True, allow_line_cont=False, enum_line=False
):
"""Iterate over a file honoring bash commenting rules and line continuations.
Note that it's considered good behaviour to close filehandles, as
such, either iterate fully through this, or use read_bash instead.
Once the file object is no longer referenced the handle will be
closed, but be proactive instead of relying on the garbage
collector.
:param bash_source: either a file to read from
or a string holding the filename to open.
:param allow_inline_comments: whether or not to prune characters
after a # that isn't at the start of a line.
:param allow_line_cont: whether or not to respect line continuations
:return: yields lines w/ commenting stripped out
"""
if isinstance(bash_source, str):
bash_source = readlines(bash_source, True)
s = ""
for lineno, line in enumerate(bash_source, 1):
if allow_line_cont and s:
s += line
else:
s = line.lstrip()
if s:
if s[0] != "#":
if allow_inline_comments:
if not allow_line_cont or (
allow_line_cont and inline_comment_regexp.match(line)
):
s = s.split("#", 1)[0].rstrip()
if allow_line_cont and line_cont_regexp.match(line):
s = s.rstrip("\\\n")
continue
if enum_line:
yield lineno, s.rstrip()
else:
yield s.rstrip()
s = ""
if s:
if enum_line:
yield lineno, s
else:
yield s
[docs]
def read_bash(*args, **kwargs):
"""Read a file honoring bash commenting rules.
See :py:func:`iter_read_bash` for parameter details.
Returns a list of lines w/ comments stripped out.
"""
return list(iter_read_bash(*args, **kwargs))
[docs]
def read_bash_dict(bash_source, vars_dict=None, sourcing_command=None):
"""Read bash source, yielding a dict of vars.
:param bash_source: either a file to read from
or a string holding the filename to open
:param vars_dict: initial 'env' for the sourcing.
Is protected from modification.
:type vars_dict: dict or None
:param sourcing_command: controls whether a source command exists.
If one does and is encountered, then this func is called.
:type sourcing_command: callable
:raise BashParseError: thrown if invalid syntax is encountered.
:return: dict representing the resultant env if bash executed the source.
"""
# quite possibly I'm missing something here, but the original
# portage_util getconfig/varexpand seemed like it only went
# halfway. The shlex posix mode *should* cover everything.
if vars_dict is not None:
d, protected = ProtectedDict(vars_dict), True
else:
d, protected = {}, False
close = False
infile = None
if isinstance(bash_source, str):
f = open(bash_source, "r")
close = True
infile = bash_source
else:
f = bash_source
s = bash_parser(f, sourcing_command=sourcing_command, env=d, infile=infile)
try:
tok = ""
try:
while tok is not None:
key = s.get_token()
if key == "export":
# discard 'export' token from "export VAR=VALUE" lines
key = s.get_token()
if key is None:
break
elif key.isspace():
# we specifically have to check this, since we're
# screwing with the whitespace filters below to
# detect empty assigns
continue
eq = s.get_token()
if eq != "=":
raise BashParseError(
bash_source, s.lineno, "got token %r, was expecting '='" % eq
)
val = s.get_token()
if val is None:
val = ""
elif val == "export":
val = s.get_token()
# look ahead to see if we just got an empty assign.
next_tok = s.get_token()
if next_tok == "=":
# ... we did.
# leftmost insertions, thus reversed ordering
s.push_token(next_tok)
s.push_token(val)
val = ""
else:
s.push_token(next_tok)
d[key] = val
except ValueError as e:
raise BashParseError(bash_source, s.lineno, str(e)) from e
finally:
if close and f is not None:
f.close()
if protected:
d = d.new
return d
[docs]
def read_dict(
bash_source,
splitter="=",
source_isiter=False,
allow_inline_comments=True,
strip=False,
filename=None,
ignore_errors=False,
):
"""Read key value pairs from a file, ignoring bash-style comments.
:param splitter: the string to split on. Can be None to
default to str.split's default
:param bash_source: either a file to read from,
or a string holding the filename to open.
:param allow_inline_comments: whether or not to prune characters
after a # that isn't at the start of a line.
:param ignore_errors: parse errors are logged instead of raised
:raise: :py:class:`BashParseError` if there are parse errors found.
"""
d = {}
if not source_isiter:
filename = bash_source
i = iter_read_bash(bash_source, allow_inline_comments=allow_inline_comments)
else:
if filename is None:
# XXX what to do?
filename = "<unknown>"
i = bash_source
line_count = 0
try:
for k in i:
line_count += 1
try:
k, v = k.split(splitter, 1)
except ValueError as e:
if filename == "<unknown>":
filename = getattr(bash_source, "name", bash_source)
if ignore_errors:
logger.error(
"bash parse error in %r, line %s", filename, line_count
)
continue
else:
raise BashParseError(filename, line_count) from e
if strip:
k, v = k.strip(), v.strip()
if len(v) > 2 and v[0] == v[-1] and v[0] in ("'", '"'):
v = v[1:-1]
d[k] = v
finally:
del i
return d
def _nuke_backslash(s):
s = s.group()
if s == "\\\n":
return "\n"
try:
return chr(ord(s))
except TypeError:
return s[1]
[docs]
class bash_parser(shlex):
"""Fixed up shlex version for bash parsing.
Corrects corner cases in quote expansion and adds variable interpolation.
While it's a fair bit slower than stdlib shlex, it parses a more complete
subset of bash syntax than stdlib shlex.
"""
def __init__(self, source, sourcing_command=None, env=None, infile=None):
"""
:param source: file handle to read from
:param sourcing_command: token to treat as an include command
:type sourcing_command: either None, or a string; if None, no includes
are allowed in this parsing
:param env: initial environment to use for variable interpolation
:type env: must be a mapping; if None, an empty dict is used
"""
self.__dict__["state"] = " "
super().__init__(source, posix=True, infile=infile)
self.wordchars += "@${}/.-+/:~^*"
self.wordchars = frozenset(self.wordchars)
if sourcing_command is not None:
self.source = sourcing_command
if env is None:
env = {}
self.env = env
self.__pos = 0
def __setattr__(self, attr, val):
if attr == "state":
if (self.state, val) in (('"', "a"), ("a", '"'), ("a", " "), ("'", "a")):
strl = len(self.token)
if self.__pos != strl:
self.changed_state.append((self.state, self.token[self.__pos :]))
self.__pos = strl
self.__dict__[attr] = val
[docs]
def sourcehook(self, newfile):
try:
return super().sourcehook(newfile)
except IOError as e:
raise BashParseError(newfile, 0, str(e)) from e
[docs]
def read_token(self):
self.changed_state = []
self.__pos = 0
token = super().read_token()
if token is None:
return token
if self.state is None:
# eof reached.
self.changed_state.append((self.state, token[self.__pos :]))
else:
self.changed_state.append((self.state, self.token[self.__pos :]))
tok = ""
for s, t in self.changed_state:
if s in ('"', "a"):
tok += self.var_expand(t).replace("\\\n", "")
else:
tok += t
return tok
[docs]
def var_expand(self, val):
prev, pos = 0, 0
l = []
while match := var_find.search(val, pos):
pos = match.start()
if val[pos] == "\\":
# it's escaped. either it's \\$ or \\${ , either way,
# skipping two ahead handles it.
pos += 2
else:
var = val[match.start() : match.end()].strip("${}")
if prev != pos:
l.append(val[prev:pos])
if var in self.env:
if not isinstance(self.env[var], str):
raise ValueError(
"env key %r must be a string, not %s: %r"
% (var, type(self.env[var]), self.env[var])
)
l.append(self.env[var])
else:
l.append("")
prev = pos = match.end()
# do \\ cleansing, collapsing val down also.
val = backslash_find.sub(_nuke_backslash, "".join(l) + val[prev:])
return val
[docs]
class BashParseError(Exception):
"""Exception thrown when a handle being parsed isn't valid bash."""
def __init__(self, filename, line, errmsg=None):
if errmsg is not None:
super().__init__(
"error parsing '%s' on or before line %i: err %s"
% (filename, line, errmsg)
)
else:
super().__init__(
"error parsing '%s' on or before line %i" % (filename, line)
)
self.file, self.line, self.errmsg = filename, line, errmsg