initial commit

author: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2025-02-13 18:00:17 +1100
committer: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2025-02-13 18:00:17 +1100
commit: 98cef5e9a772602d42acfcf233838c760424db9a (patch)
tree: 5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp2041/slippy/slippy.py
1 files changed, 385 insertions, 0 deletions
diff --git a/comp2041/slippy/slippy.py b/comp2041/slippy/slippy.py
new file mode 100755
index 0000000..8cd02ba
--- /dev/null
+++ b/comp2041/slippy/slippy.py
@@ -0,0 +1,385 @@
+#!/usr/bin/env python3
+
+import re, sys, os, getopt, fileinput
+
+def get_error_strs():
+    '''
+    Returns a dictionary of error strings we throw in multiple locations.
+    '''
+    error_strs = {
+        "usage": "usage: slippy [-i] [-n] [-f <script-file> | <sed-command>] "\
+                 "[<files>...]",
+        "invalid_command": "slippy: command line: invalid command",
+    }
+    return error_strs
+
+
+def find_regex(regex, str):
+    '''
+    Returns a pair describing matched regex - None if not found.
+    The first element is a string describing the entire match captured.
+    The second element is the list of captured groups.
+    '''
+    match = re.search(regex, str)
+    if match == None:
+        return None
+        
+    ret = []
+    if match != None:
+        for group in match.groups():
+            if group == None:
+                continue
+            ret.append(group)
+    return (match.group(0), ret)
+
+
+# We use getopt instead of argparse because we need to match the spec's errors.
+def get_args(): 
+    '''
+    Returns a tuple where the first element is a dictionary of parsed args and
+    the second argument is a dictionary of remaining args (sed regex + files).
+    The key in both dictionaries is the position of the argument.
+    This function will throw if the arguments are invalid.
+    Eg: ({0: ("-p", ""), 1: ("-q", "3"}, {0: ["s/asdf/asdf/g"]})
+    '''
+    short = "inf:"
+    long = []
+    try:
+        (parsed, unparsed) = getopt.getopt(sys.argv[1:], short, long)
+    except:
+        raise RuntimeError(get_error_strs()["usage"])
+    return (dict(enumerate(parsed)), dict(enumerate(unparsed)))
+
+
+def get_command_address(command, delimit):
+    '''
+    Returns a pair (<string>, list<string>) of addresses for a command.
+    The first element in the pair is the entire address.
+    The second element in the pair is a list of addresses.
+    If the address is a line number, it will NOT be contained by the delimiter.
+    If the address is regex, it will be surrounded by the delimiter.
+    If the list has more than one element, it is a range.
+    3p               -> ("3", ["3"])
+    /.1/q            -> ("/.1/", ["/.1/"])
+    /4/,/6/s/[12]/9/ -> ("/4/,/6/", ["/4/", "/6/"])        NOTE the lack of ","
+    '''
+
+    d = delimit if delimit != "/" else r"\/" # regex needs \/ instead of /
+
+    regex = r"^(\/[^\/]*\/|[0-9$]),(\/[^\/]+\/|[0-9$]+)|^(\/[^\/]+\/)|(^[0-9$]+),([0-9$]+)|(^[0-9$]+)"
+    find = find_regex(regex, command)
+    return (("", []) if find == None else find)
+
+
+def get_command_arguments(command, delimit):
+    '''
+    Returns the contents of a the command split by "/".
+    /megamaid    -> ["megamaid"]
+    /asdf/qwer/g -> ["asdf", "qwer", "g"]
+    '''
+    return command.split(delimit)[1:]
+
+
+def parse_command(command):
+    '''
+    Transforms a command represented as a string into a dictionary describing
+    the command.
+    Examples:
+    "1d" ->
+                "addresses" : ["1"],             // A string list of addresses 
+                "type"      : "d",               // Type as a string
+                "arguments" : []                 // List of args
+                "enabled"   : False              // Stateful range indicator
+
+    "s/.1/qwer/g" ->
+                "addresses" : [],
+                "type"      : "s",
+                "arguments" : [".1", "qwer", "g"]
+                "enabled"   : False
+
+    "/4/,/6/s/[12]/9/" ->
+                "addresses" : ["/4/", "/6/"],
+                "type"      : "s",
+                "arguments" : ["[12]", "9"]
+                "enabled"   : False
+    '''
+
+    delimit = command[1] if len(command) >= 2 and command[0] == "s" else "/"
+
+    (address_prefix, addresses) = get_command_address(command, delimit)
+    command = command.removeprefix(address_prefix)
+
+    type = command[0]
+    command = command.removeprefix(type)
+
+    arguments = get_command_arguments(command, delimit)
+
+    return {"addresses": addresses, \
+            "type": type, \
+            "arguments": arguments, \
+            "enabled": False}
+
+
+def unroll_commands(commands):
+    '''
+    Transforms a list of commands into a list of the results of parse_commands.
+    Splits commands into multiple if necessary. Removes comments.
+    '''
+    ret = []
+    for unrolled in commands:
+        for command in unrolled.split(";"):
+
+            command = command.replace(" ", "") # remove whitespace
+
+            if len(command) <= 0: 
+                continue
+            
+            command = command.split("#", 1)[0] # remove comments
+
+            if len(command) <= 0:
+                continue
+
+            ret.append(command)
+
+    return list(map(parse_command, ret))
+
+
+
+def get_state():
+    '''
+    Returns a dictionary containing elements describing the commands passed to
+    the program.
+    The dictionary will contain the following key-values (with examples):
+    "commands" : [parse_command("s/qwer/asdf/g"), parse_command("etc")],
+    "args": ["-p", "-q"],
+    "files": ["readme.txt", "example.py"],
+
+    '''
+    (parsed_args, unparsed_args) = get_args()
+
+    commands = []
+    args = []
+    files = []
+
+    for key in parsed_args:
+        arg = parsed_args[key][0]
+        if arg == "-f":
+            filename = parsed_args[key][1]
+            with open(filename) as f:
+                commands = f.read().splitlines()
+        args.append(arg)
+    for key in sorted(unparsed_args.keys()):
+        arg = unparsed_args[key]
+        if not "-f" in args and key == 0:
+            commands += [arg]
+            continue
+        files += [arg]
+
+    # No command parsed is invalid syntax.
+    if len(commands) <= 0:
+        raise RuntimeError(get_error_strs()["usage"])
+            
+    return {
+        "commands": unroll_commands(commands),
+        "args": args,
+        "files": files,
+    }
+
+
+def is_regex_match(regex, count, current_line, next_line):
+    '''
+    Returns true if the regex exists in the string, value otherwise.
+    '''
+    return find_regex(regex, current_line) != None
+
+
+def is_line_match(target, count, current_line, next_line, lequal=False):
+    '''
+    Returns true if the target is equal to count, false otherwise.
+    '''
+    if target == "$":
+        return len(next_line) == 0
+
+    conv = int(target)
+    if lequal:
+        return conv > count
+    return conv == count
+
+
+def is_address_regex(address):
+    return len(address) >= 1 and address[0] == "/" and address[-1] == "/"
+
+
+def is_address_match(address, count, current_line, next_line):
+    '''
+    Deduces whether the address is a line count or regex address and returns
+    its deduced value.
+    '''
+    if is_address_regex(address):
+        return is_regex_match(address[1:-1], count, current_line, next_line)
+    return is_line_match(address, count, current_line, next_line)
+
+
+def is_address_within(address, count, current_line, next_line):
+    if is_address_regex(address):
+        return not is_regex_match(address[1:-1], count, current_line, next_line)
+    return is_line_match(address, count, current_line, next_line, True)
+
+
+def is_command_within(command, count, current_line, next_line):
+    addresses = command["addresses"]
+
+    start = addresses[0]
+    end = addresses[1]
+
+    # If we're not in the range...
+    if not command["enabled"]:
+        if not is_address_match(start, count, current_line, next_line):
+            return False
+
+        command["enabled"] = True
+        if not is_address_regex(end) and not is_address_within(end, count, current_line, next_line):
+            command["enabled"] = False
+        return True
+
+    # We're in the range, check if we should be out.
+    if not is_address_within(end, count, current_line, next_line):
+        command["enabled"] = False
+    return True
+
+
+def handle_q(command, count, current_line, next_line, args):
+    if not "-n" in args:
+        print(current_line, end="")
+    sys.exit(1)
+    return current_line # superfluous
+
+
+def handle_p(command, count, current_line, next_line, args):
+    print(current_line, end="")
+    return current_line
+
+
+def handle_d(command, count, current_line, next_line, args):
+    return ""
+
+
+def handle_s(command, count, current_line, next_line, args):
+    command_arguments = command["arguments"]
+    pattern = command_arguments[0]
+    replace = command_arguments[1]
+
+    sub = (re.sub(pattern, replace, current_line) \
+            if len(command_arguments) >= 3 and command_arguments[2] == "g" \
+            else re.sub(pattern, replace, current_line, 1))
+    return sub 
+    
+
+def get_command_funcs():
+    '''
+    Returns a dictionary of function pointers via each command type.
+    '''
+    command_funcs = {
+        "q": handle_q,
+        "p": handle_p,
+        "d": handle_d,
+        "s": handle_s
+    }
+    return command_funcs
+
+
+def should_run_command(command, count, current_line, next_line):
+    addresses = command["addresses"]
+    length = len(addresses)
+    if length == 0:
+        return True
+    if length == 1:
+        return is_address_match(addresses[0], count, current_line, next_line)
+    if length == 2:
+        val = is_command_within(command, count, current_line, next_line)
+        return val
+    raise RuntimeError(get_error_strs()["usage"])
+
+
+def run_command(command, count, current_line, next_line, args):
+    '''
+    Runs the command, modifying current_line if required.
+    '''
+
+    # Check if the command should be run before executing.
+    if not should_run_command(command, count, current_line, next_line):
+        return current_line
+
+    command_func = get_command_funcs()[command["type"]]
+    ret = command_func(command, count, current_line, next_line, args)
+
+    return ret
+
+
+def make_file_lines(files):
+    '''
+    Turns a list of filenames into a list of lines, in order as they appear.
+    '''
+    ret = []
+    for filename in files:
+        with open(filename) as f:
+            ret += f.read().splitlines(True)
+    return ret
+
+
+def get_new_line(file_lines, files):
+    '''
+    Gets a new line from stdin, or from the .
+    Returns a zero length string if out of data. This is distinct from an empty
+    line, which will be "\n".
+    '''
+
+    if len(files) <= 0:
+        return sys.stdin.readline()
+
+    if len(file_lines) <= 0:
+        return ""
+
+    line = file_lines[0]
+    file_lines.pop(0)
+    return line
+
+
+def loop(state):
+    '''
+    Main loop, handles running of commands.
+    '''
+    args = state["args"]
+    commands = state["commands"]
+    files = state["files"]
+    file_lines = make_file_lines(files)
+
+    count = 1
+    current_line = get_new_line(file_lines, files)
+    while current_line:
+        next_line = get_new_line(file_lines, files)
+
+        for command in commands:
+            current_line = run_command(command, count, current_line, next_line,\
+                                       args)
+            if len(current_line) == 0:
+                break
+
+        if not "-n" in args:
+            print(current_line, end="")
+
+        count += 1
+        current_line = next_line
+
+
+# We use a try block to handle error messages with the appropriate exit code.
+try:
+    state = get_state()
+    #print(state)
+    loop(state)
+except SystemExit:
+    pass
+except BaseException as error:
+    print(error, file=sys.stderr)
+    sys.exit(1)
+sys.exit(0)
author	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2025-02-13 18:00:17 +1100
committer	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2025-02-13 18:00:17 +1100
commit	98cef5e9a772602d42acfcf233838c760424db9a (patch)
tree	5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp2041/slippy/slippy.py