diff options
| author | Nicolas James <Eele1Ephe7uZahRie@tutanota.com> | 2025-02-13 18:00:17 +1100 |
|---|---|---|
| committer | Nicolas James <Eele1Ephe7uZahRie@tutanota.com> | 2025-02-13 18:00:17 +1100 |
| commit | 98cef5e9a772602d42acfcf233838c760424db9a (patch) | |
| tree | 5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp2041/slippy/slippy.py | |
initial commit
Diffstat (limited to 'comp2041/slippy/slippy.py')
| -rwxr-xr-x | comp2041/slippy/slippy.py | 385 |
1 files changed, 385 insertions, 0 deletions
diff --git a/comp2041/slippy/slippy.py b/comp2041/slippy/slippy.py new file mode 100755 index 0000000..8cd02ba --- /dev/null +++ b/comp2041/slippy/slippy.py @@ -0,0 +1,385 @@ +#!/usr/bin/env python3 + +import re, sys, os, getopt, fileinput + +def get_error_strs(): + ''' + Returns a dictionary of error strings we throw in multiple locations. + ''' + error_strs = { + "usage": "usage: slippy [-i] [-n] [-f <script-file> | <sed-command>] "\ + "[<files>...]", + "invalid_command": "slippy: command line: invalid command", + } + return error_strs + + +def find_regex(regex, str): + ''' + Returns a pair describing matched regex - None if not found. + The first element is a string describing the entire match captured. + The second element is the list of captured groups. + ''' + match = re.search(regex, str) + if match == None: + return None + + ret = [] + if match != None: + for group in match.groups(): + if group == None: + continue + ret.append(group) + return (match.group(0), ret) + + +# We use getopt instead of argparse because we need to match the spec's errors. +def get_args(): + ''' + Returns a tuple where the first element is a dictionary of parsed args and + the second argument is a dictionary of remaining args (sed regex + files). + The key in both dictionaries is the position of the argument. + This function will throw if the arguments are invalid. + Eg: ({0: ("-p", ""), 1: ("-q", "3"}, {0: ["s/asdf/asdf/g"]}) + ''' + short = "inf:" + long = [] + try: + (parsed, unparsed) = getopt.getopt(sys.argv[1:], short, long) + except: + raise RuntimeError(get_error_strs()["usage"]) + return (dict(enumerate(parsed)), dict(enumerate(unparsed))) + + +def get_command_address(command, delimit): + ''' + Returns a pair (<string>, list<string>) of addresses for a command. + The first element in the pair is the entire address. + The second element in the pair is a list of addresses. + If the address is a line number, it will NOT be contained by the delimiter. + If the address is regex, it will be surrounded by the delimiter. + If the list has more than one element, it is a range. + 3p -> ("3", ["3"]) + /.1/q -> ("/.1/", ["/.1/"]) + /4/,/6/s/[12]/9/ -> ("/4/,/6/", ["/4/", "/6/"]) NOTE the lack of "," + ''' + + d = delimit if delimit != "/" else r"\/" # regex needs \/ instead of / + + regex = r"^(\/[^\/]*\/|[0-9$]),(\/[^\/]+\/|[0-9$]+)|^(\/[^\/]+\/)|(^[0-9$]+),([0-9$]+)|(^[0-9$]+)" + find = find_regex(regex, command) + return (("", []) if find == None else find) + + +def get_command_arguments(command, delimit): + ''' + Returns the contents of a the command split by "/". + /megamaid -> ["megamaid"] + /asdf/qwer/g -> ["asdf", "qwer", "g"] + ''' + return command.split(delimit)[1:] + + +def parse_command(command): + ''' + Transforms a command represented as a string into a dictionary describing + the command. + Examples: + "1d" -> + "addresses" : ["1"], // A string list of addresses + "type" : "d", // Type as a string + "arguments" : [] // List of args + "enabled" : False // Stateful range indicator + + "s/.1/qwer/g" -> + "addresses" : [], + "type" : "s", + "arguments" : [".1", "qwer", "g"] + "enabled" : False + + "/4/,/6/s/[12]/9/" -> + "addresses" : ["/4/", "/6/"], + "type" : "s", + "arguments" : ["[12]", "9"] + "enabled" : False + ''' + + delimit = command[1] if len(command) >= 2 and command[0] == "s" else "/" + + (address_prefix, addresses) = get_command_address(command, delimit) + command = command.removeprefix(address_prefix) + + type = command[0] + command = command.removeprefix(type) + + arguments = get_command_arguments(command, delimit) + + return {"addresses": addresses, \ + "type": type, \ + "arguments": arguments, \ + "enabled": False} + + +def unroll_commands(commands): + ''' + Transforms a list of commands into a list of the results of parse_commands. + Splits commands into multiple if necessary. Removes comments. + ''' + ret = [] + for unrolled in commands: + for command in unrolled.split(";"): + + command = command.replace(" ", "") # remove whitespace + + if len(command) <= 0: + continue + + command = command.split("#", 1)[0] # remove comments + + if len(command) <= 0: + continue + + ret.append(command) + + return list(map(parse_command, ret)) + + + +def get_state(): + ''' + Returns a dictionary containing elements describing the commands passed to + the program. + The dictionary will contain the following key-values (with examples): + "commands" : [parse_command("s/qwer/asdf/g"), parse_command("etc")], + "args": ["-p", "-q"], + "files": ["readme.txt", "example.py"], + + ''' + (parsed_args, unparsed_args) = get_args() + + commands = [] + args = [] + files = [] + + for key in parsed_args: + arg = parsed_args[key][0] + if arg == "-f": + filename = parsed_args[key][1] + with open(filename) as f: + commands = f.read().splitlines() + args.append(arg) + for key in sorted(unparsed_args.keys()): + arg = unparsed_args[key] + if not "-f" in args and key == 0: + commands += [arg] + continue + files += [arg] + + # No command parsed is invalid syntax. + if len(commands) <= 0: + raise RuntimeError(get_error_strs()["usage"]) + + return { + "commands": unroll_commands(commands), + "args": args, + "files": files, + } + + +def is_regex_match(regex, count, current_line, next_line): + ''' + Returns true if the regex exists in the string, value otherwise. + ''' + return find_regex(regex, current_line) != None + + +def is_line_match(target, count, current_line, next_line, lequal=False): + ''' + Returns true if the target is equal to count, false otherwise. + ''' + if target == "$": + return len(next_line) == 0 + + conv = int(target) + if lequal: + return conv > count + return conv == count + + +def is_address_regex(address): + return len(address) >= 1 and address[0] == "/" and address[-1] == "/" + + +def is_address_match(address, count, current_line, next_line): + ''' + Deduces whether the address is a line count or regex address and returns + its deduced value. + ''' + if is_address_regex(address): + return is_regex_match(address[1:-1], count, current_line, next_line) + return is_line_match(address, count, current_line, next_line) + + +def is_address_within(address, count, current_line, next_line): + if is_address_regex(address): + return not is_regex_match(address[1:-1], count, current_line, next_line) + return is_line_match(address, count, current_line, next_line, True) + + +def is_command_within(command, count, current_line, next_line): + addresses = command["addresses"] + + start = addresses[0] + end = addresses[1] + + # If we're not in the range... + if not command["enabled"]: + if not is_address_match(start, count, current_line, next_line): + return False + + command["enabled"] = True + if not is_address_regex(end) and not is_address_within(end, count, current_line, next_line): + command["enabled"] = False + return True + + # We're in the range, check if we should be out. + if not is_address_within(end, count, current_line, next_line): + command["enabled"] = False + return True + + +def handle_q(command, count, current_line, next_line, args): + if not "-n" in args: + print(current_line, end="") + sys.exit(1) + return current_line # superfluous + + +def handle_p(command, count, current_line, next_line, args): + print(current_line, end="") + return current_line + + +def handle_d(command, count, current_line, next_line, args): + return "" + + +def handle_s(command, count, current_line, next_line, args): + command_arguments = command["arguments"] + pattern = command_arguments[0] + replace = command_arguments[1] + + sub = (re.sub(pattern, replace, current_line) \ + if len(command_arguments) >= 3 and command_arguments[2] == "g" \ + else re.sub(pattern, replace, current_line, 1)) + return sub + + +def get_command_funcs(): + ''' + Returns a dictionary of function pointers via each command type. + ''' + command_funcs = { + "q": handle_q, + "p": handle_p, + "d": handle_d, + "s": handle_s + } + return command_funcs + + +def should_run_command(command, count, current_line, next_line): + addresses = command["addresses"] + length = len(addresses) + if length == 0: + return True + if length == 1: + return is_address_match(addresses[0], count, current_line, next_line) + if length == 2: + val = is_command_within(command, count, current_line, next_line) + return val + raise RuntimeError(get_error_strs()["usage"]) + + +def run_command(command, count, current_line, next_line, args): + ''' + Runs the command, modifying current_line if required. + ''' + + # Check if the command should be run before executing. + if not should_run_command(command, count, current_line, next_line): + return current_line + + command_func = get_command_funcs()[command["type"]] + ret = command_func(command, count, current_line, next_line, args) + + return ret + + +def make_file_lines(files): + ''' + Turns a list of filenames into a list of lines, in order as they appear. + ''' + ret = [] + for filename in files: + with open(filename) as f: + ret += f.read().splitlines(True) + return ret + + +def get_new_line(file_lines, files): + ''' + Gets a new line from stdin, or from the . + Returns a zero length string if out of data. This is distinct from an empty + line, which will be "\n". + ''' + + if len(files) <= 0: + return sys.stdin.readline() + + if len(file_lines) <= 0: + return "" + + line = file_lines[0] + file_lines.pop(0) + return line + + +def loop(state): + ''' + Main loop, handles running of commands. + ''' + args = state["args"] + commands = state["commands"] + files = state["files"] + file_lines = make_file_lines(files) + + count = 1 + current_line = get_new_line(file_lines, files) + while current_line: + next_line = get_new_line(file_lines, files) + + for command in commands: + current_line = run_command(command, count, current_line, next_line,\ + args) + if len(current_line) == 0: + break + + if not "-n" in args: + print(current_line, end="") + + count += 1 + current_line = next_line + + +# We use a try block to handle error messages with the appropriate exit code. +try: + state = get_state() + #print(state) + loop(state) +except SystemExit: + pass +except BaseException as error: + print(error, file=sys.stderr) + sys.exit(1) +sys.exit(0) |
