#!/usr/bin/env python3 import re, sys, os, getopt, fileinput def get_error_strs(): ''' Returns a dictionary of error strings we throw in multiple locations. ''' error_strs = { "usage": "usage: slippy [-i] [-n] [-f | ] "\ "[...]", "invalid_command": "slippy: command line: invalid command", } return error_strs def find_regex(regex, str): ''' Returns a pair describing matched regex - None if not found. The first element is a string describing the entire match captured. The second element is the list of captured groups. ''' match = re.search(regex, str) if match == None: return None ret = [] if match != None: for group in match.groups(): if group == None: continue ret.append(group) return (match.group(0), ret) # We use getopt instead of argparse because we need to match the spec's errors. def get_args(): ''' Returns a tuple where the first element is a dictionary of parsed args and the second argument is a dictionary of remaining args (sed regex + files). The key in both dictionaries is the position of the argument. This function will throw if the arguments are invalid. Eg: ({0: ("-p", ""), 1: ("-q", "3"}, {0: ["s/asdf/asdf/g"]}) ''' short = "inf:" long = [] try: (parsed, unparsed) = getopt.getopt(sys.argv[1:], short, long) except: raise RuntimeError(get_error_strs()["usage"]) return (dict(enumerate(parsed)), dict(enumerate(unparsed))) def get_command_address(command, delimit): ''' Returns a pair (, list) of addresses for a command. The first element in the pair is the entire address. The second element in the pair is a list of addresses. If the address is a line number, it will NOT be contained by the delimiter. If the address is regex, it will be surrounded by the delimiter. If the list has more than one element, it is a range. 3p -> ("3", ["3"]) /.1/q -> ("/.1/", ["/.1/"]) /4/,/6/s/[12]/9/ -> ("/4/,/6/", ["/4/", "/6/"]) NOTE the lack of "," ''' d = delimit if delimit != "/" else r"\/" # regex needs \/ instead of / regex = r"^(\/[^\/]*\/|[0-9$]),(\/[^\/]+\/|[0-9$]+)|^(\/[^\/]+\/)|(^[0-9$]+),([0-9$]+)|(^[0-9$]+)" find = find_regex(regex, command) return (("", []) if find == None else find) def get_command_arguments(command, delimit): ''' Returns the contents of a the command split by "/". /megamaid -> ["megamaid"] /asdf/qwer/g -> ["asdf", "qwer", "g"] ''' return command.split(delimit)[1:] def parse_command(command): ''' Transforms a command represented as a string into a dictionary describing the command. Examples: "1d" -> "addresses" : ["1"], // A string list of addresses "type" : "d", // Type as a string "arguments" : [] // List of args "enabled" : False // Stateful range indicator "s/.1/qwer/g" -> "addresses" : [], "type" : "s", "arguments" : [".1", "qwer", "g"] "enabled" : False "/4/,/6/s/[12]/9/" -> "addresses" : ["/4/", "/6/"], "type" : "s", "arguments" : ["[12]", "9"] "enabled" : False ''' delimit = command[1] if len(command) >= 2 and command[0] == "s" else "/" (address_prefix, addresses) = get_command_address(command, delimit) command = command.removeprefix(address_prefix) type = command[0] command = command.removeprefix(type) arguments = get_command_arguments(command, delimit) return {"addresses": addresses, \ "type": type, \ "arguments": arguments, \ "enabled": False} def unroll_commands(commands): ''' Transforms a list of commands into a list of the results of parse_commands. Splits commands into multiple if necessary. Removes comments. ''' ret = [] for unrolled in commands: for command in unrolled.split(";"): command = command.replace(" ", "") # remove whitespace if len(command) <= 0: continue command = command.split("#", 1)[0] # remove comments if len(command) <= 0: continue ret.append(command) return list(map(parse_command, ret)) def get_state(): ''' Returns a dictionary containing elements describing the commands passed to the program. The dictionary will contain the following key-values (with examples): "commands" : [parse_command("s/qwer/asdf/g"), parse_command("etc")], "args": ["-p", "-q"], "files": ["readme.txt", "example.py"], ''' (parsed_args, unparsed_args) = get_args() commands = [] args = [] files = [] for key in parsed_args: arg = parsed_args[key][0] if arg == "-f": filename = parsed_args[key][1] with open(filename) as f: commands = f.read().splitlines() args.append(arg) for key in sorted(unparsed_args.keys()): arg = unparsed_args[key] if not "-f" in args and key == 0: commands += [arg] continue files += [arg] # No command parsed is invalid syntax. if len(commands) <= 0: raise RuntimeError(get_error_strs()["usage"]) return { "commands": unroll_commands(commands), "args": args, "files": files, } def is_regex_match(regex, count, current_line, next_line): ''' Returns true if the regex exists in the string, value otherwise. ''' return find_regex(regex, current_line) != None def is_line_match(target, count, current_line, next_line, lequal=False): ''' Returns true if the target is equal to count, false otherwise. ''' if target == "$": return len(next_line) == 0 conv = int(target) if lequal: return conv > count return conv == count def is_address_regex(address): return len(address) >= 1 and address[0] == "/" and address[-1] == "/" def is_address_match(address, count, current_line, next_line): ''' Deduces whether the address is a line count or regex address and returns its deduced value. ''' if is_address_regex(address): return is_regex_match(address[1:-1], count, current_line, next_line) return is_line_match(address, count, current_line, next_line) def is_address_within(address, count, current_line, next_line): if is_address_regex(address): return not is_regex_match(address[1:-1], count, current_line, next_line) return is_line_match(address, count, current_line, next_line, True) def is_command_within(command, count, current_line, next_line): addresses = command["addresses"] start = addresses[0] end = addresses[1] # If we're not in the range... if not command["enabled"]: if not is_address_match(start, count, current_line, next_line): return False command["enabled"] = True if not is_address_regex(end) and not is_address_within(end, count, current_line, next_line): command["enabled"] = False return True # We're in the range, check if we should be out. if not is_address_within(end, count, current_line, next_line): command["enabled"] = False return True def handle_q(command, count, current_line, next_line, args): if not "-n" in args: print(current_line, end="") sys.exit(1) return current_line # superfluous def handle_p(command, count, current_line, next_line, args): print(current_line, end="") return current_line def handle_d(command, count, current_line, next_line, args): return "" def handle_s(command, count, current_line, next_line, args): command_arguments = command["arguments"] pattern = command_arguments[0] replace = command_arguments[1] sub = (re.sub(pattern, replace, current_line) \ if len(command_arguments) >= 3 and command_arguments[2] == "g" \ else re.sub(pattern, replace, current_line, 1)) return sub def get_command_funcs(): ''' Returns a dictionary of function pointers via each command type. ''' command_funcs = { "q": handle_q, "p": handle_p, "d": handle_d, "s": handle_s } return command_funcs def should_run_command(command, count, current_line, next_line): addresses = command["addresses"] length = len(addresses) if length == 0: return True if length == 1: return is_address_match(addresses[0], count, current_line, next_line) if length == 2: val = is_command_within(command, count, current_line, next_line) return val raise RuntimeError(get_error_strs()["usage"]) def run_command(command, count, current_line, next_line, args): ''' Runs the command, modifying current_line if required. ''' # Check if the command should be run before executing. if not should_run_command(command, count, current_line, next_line): return current_line command_func = get_command_funcs()[command["type"]] ret = command_func(command, count, current_line, next_line, args) return ret def make_file_lines(files): ''' Turns a list of filenames into a list of lines, in order as they appear. ''' ret = [] for filename in files: with open(filename) as f: ret += f.read().splitlines(True) return ret def get_new_line(file_lines, files): ''' Gets a new line from stdin, or from the . Returns a zero length string if out of data. This is distinct from an empty line, which will be "\n". ''' if len(files) <= 0: return sys.stdin.readline() if len(file_lines) <= 0: return "" line = file_lines[0] file_lines.pop(0) return line def loop(state): ''' Main loop, handles running of commands. ''' args = state["args"] commands = state["commands"] files = state["files"] file_lines = make_file_lines(files) count = 1 current_line = get_new_line(file_lines, files) while current_line: next_line = get_new_line(file_lines, files) for command in commands: current_line = run_command(command, count, current_line, next_line,\ args) if len(current_line) == 0: break if not "-n" in args: print(current_line, end="") count += 1 current_line = next_line # We use a try block to handle error messages with the appropriate exit code. try: state = get_state() #print(state) loop(state) except SystemExit: pass except BaseException as error: print(error, file=sys.stderr) sys.exit(1) sys.exit(0)