comp2041/slippy/slippy.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385

#!/usr/bin/env python3

import re, sys, os, getopt, fileinput

def get_error_strs():
    '''
    Returns a dictionary of error strings we throw in multiple locations.
    '''
    error_strs = {
        "usage": "usage: slippy [-i] [-n] [-f <script-file> | <sed-command>] "\
                 "[<files>...]",
        "invalid_command": "slippy: command line: invalid command",
    }
    return error_strs


def find_regex(regex, str):
    '''
    Returns a pair describing matched regex - None if not found.
    The first element is a string describing the entire match captured.
    The second element is the list of captured groups.
    '''
    match = re.search(regex, str)
    if match == None:
        return None
        
    ret = []
    if match != None:
        for group in match.groups():
            if group == None:
                continue
            ret.append(group)
    return (match.group(0), ret)


# We use getopt instead of argparse because we need to match the spec's errors.
def get_args(): 
    '''
    Returns a tuple where the first element is a dictionary of parsed args and
    the second argument is a dictionary of remaining args (sed regex + files).
    The key in both dictionaries is the position of the argument.
    This function will throw if the arguments are invalid.
    Eg: ({0: ("-p", ""), 1: ("-q", "3"}, {0: ["s/asdf/asdf/g"]})
    '''
    short = "inf:"
    long = []
    try:
        (parsed, unparsed) = getopt.getopt(sys.argv[1:], short, long)
    except:
        raise RuntimeError(get_error_strs()["usage"])
    return (dict(enumerate(parsed)), dict(enumerate(unparsed)))


def get_command_address(command, delimit):
    '''
    Returns a pair (<string>, list<string>) of addresses for a command.
    The first element in the pair is the entire address.
    The second element in the pair is a list of addresses.
    If the address is a line number, it will NOT be contained by the delimiter.
    If the address is regex, it will be surrounded by the delimiter.
    If the list has more than one element, it is a range.
    3p               -> ("3", ["3"])
    /.1/q            -> ("/.1/", ["/.1/"])
    /4/,/6/s/[12]/9/ -> ("/4/,/6/", ["/4/", "/6/"])        NOTE the lack of ","
    '''

    d = delimit if delimit != "/" else r"\/" # regex needs \/ instead of /

    regex = r"^(\/[^\/]*\/|[0-9$]),(\/[^\/]+\/|[0-9$]+)|^(\/[^\/]+\/)|(^[0-9$]+),([0-9$]+)|(^[0-9$]+)"
    find = find_regex(regex, command)
    return (("", []) if find == None else find)


def get_command_arguments(command, delimit):
    '''
    Returns the contents of a the command split by "/".
    /megamaid    -> ["megamaid"]
    /asdf/qwer/g -> ["asdf", "qwer", "g"]
    '''
    return command.split(delimit)[1:]


def parse_command(command):
    '''
    Transforms a command represented as a string into a dictionary describing
    the command.
    Examples:
    "1d" ->
                "addresses" : ["1"],             // A string list of addresses 
                "type"      : "d",               // Type as a string
                "arguments" : []                 // List of args
                "enabled"   : False              // Stateful range indicator

    "s/.1/qwer/g" ->
                "addresses" : [],
                "type"      : "s",
                "arguments" : [".1", "qwer", "g"]
                "enabled"   : False

    "/4/,/6/s/[12]/9/" ->
                "addresses" : ["/4/", "/6/"],
                "type"      : "s",
                "arguments" : ["[12]", "9"]
                "enabled"   : False
    '''

    delimit = command[1] if len(command) >= 2 and command[0] == "s" else "/"

    (address_prefix, addresses) = get_command_address(command, delimit)
    command = command.removeprefix(address_prefix)

    type = command[0]
    command = command.removeprefix(type)

    arguments = get_command_arguments(command, delimit)

    return {"addresses": addresses, \
            "type": type, \
            "arguments": arguments, \
            "enabled": False}


def unroll_commands(commands):
    '''
    Transforms a list of commands into a list of the results of parse_commands.
    Splits commands into multiple if necessary. Removes comments.
    '''
    ret = []
    for unrolled in commands:
        for command in unrolled.split(";"):

            command = command.replace(" ", "") # remove whitespace

            if len(command) <= 0: 
                continue
            
            command = command.split("#", 1)[0] # remove comments

            if len(command) <= 0:
                continue

            ret.append(command)

    return list(map(parse_command, ret))


def get_state():
    '''
    Returns a dictionary containing elements describing the commands passed to
    the program.
    The dictionary will contain the following key-values (with examples):
    "commands" : [parse_command("s/qwer/asdf/g"), parse_command("etc")],
    "args": ["-p", "-q"],
    "files": ["readme.txt", "example.py"],

    '''
    (parsed_args, unparsed_args) = get_args()

    commands = []
    args = []
    files = []

    for key in parsed_args:
        arg = parsed_args[key][0]
        if arg == "-f":
            filename = parsed_args[key][1]
            with open(filename) as f:
                commands = f.read().splitlines()
        args.append(arg)
    for key in sorted(unparsed_args.keys()):
        arg = unparsed_args[key]
        if not "-f" in args and key == 0:
            commands += [arg]
            continue
        files += [arg]

    # No command parsed is invalid syntax.
    if len(commands) <= 0:
        raise RuntimeError(get_error_strs()["usage"])
            
    return {
        "commands": unroll_commands(commands),
        "args": args,
        "files": files,
    }


def is_regex_match(regex, count, current_line, next_line):
    '''
    Returns true if the regex exists in the string, value otherwise.
    '''
    return find_regex(regex, current_line) != None


def is_line_match(target, count, current_line, next_line, lequal=False):
    '''
    Returns true if the target is equal to count, false otherwise.
    '''
    if target == "$":
        return len(next_line) == 0

    conv = int(target)
    if lequal:
        return conv > count
    return conv == count


def is_address_regex(address):
    return len(address) >= 1 and address[0] == "/" and address[-1] == "/"


def is_address_match(address, count, current_line, next_line):
    '''
    Deduces whether the address is a line count or regex address and returns
    its deduced value.
    '''
    if is_address_regex(address):
        return is_regex_match(address[1:-1], count, current_line, next_line)
    return is_line_match(address, count, current_line, next_line)


def is_address_within(address, count, current_line, next_line):
    if is_address_regex(address):
        return not is_regex_match(address[1:-1], count, current_line, next_line)
    return is_line_match(address, count, current_line, next_line, True)


def is_command_within(command, count, current_line, next_line):
    addresses = command["addresses"]

    start = addresses[0]
    end = addresses[1]

    # If we're not in the range...
    if not command["enabled"]:
        if not is_address_match(start, count, current_line, next_line):
            return False

        command["enabled"] = True
        if not is_address_regex(end) and not is_address_within(end, count, current_line, next_line):
            command["enabled"] = False
        return True

    # We're in the range, check if we should be out.
    if not is_address_within(end, count, current_line, next_line):
        command["enabled"] = False
    return True


def handle_q(command, count, current_line, next_line, args):
    if not "-n" in args:
        print(current_line, end="")
    sys.exit(1)
    return current_line # superfluous


def handle_p(command, count, current_line, next_line, args):
    print(current_line, end="")
    return current_line


def handle_d(command, count, current_line, next_line, args):
    return ""


def handle_s(command, count, current_line, next_line, args):
    command_arguments = command["arguments"]
    pattern = command_arguments[0]
    replace = command_arguments[1]

    sub = (re.sub(pattern, replace, current_line) \
            if len(command_arguments) >= 3 and command_arguments[2] == "g" \
            else re.sub(pattern, replace, current_line, 1))
    return sub 
    

def get_command_funcs():
    '''
    Returns a dictionary of function pointers via each command type.
    '''
    command_funcs = {
        "q": handle_q,
        "p": handle_p,
        "d": handle_d,
        "s": handle_s
    }
    return command_funcs


def should_run_command(command, count, current_line, next_line):
    addresses = command["addresses"]
    length = len(addresses)
    if length == 0:
        return True
    if length == 1:
        return is_address_match(addresses[0], count, current_line, next_line)
    if length == 2:
        val = is_command_within(command, count, current_line, next_line)
        return val
    raise RuntimeError(get_error_strs()["usage"])


def run_command(command, count, current_line, next_line, args):
    '''
    Runs the command, modifying current_line if required.
    '''

    # Check if the command should be run before executing.
    if not should_run_command(command, count, current_line, next_line):
        return current_line

    command_func = get_command_funcs()[command["type"]]
    ret = command_func(command, count, current_line, next_line, args)

    return ret


def make_file_lines(files):
    '''
    Turns a list of filenames into a list of lines, in order as they appear.
    '''
    ret = []
    for filename in files:
        with open(filename) as f:
            ret += f.read().splitlines(True)
    return ret


def get_new_line(file_lines, files):
    '''
    Gets a new line from stdin, or from the .
    Returns a zero length string if out of data. This is distinct from an empty
    line, which will be "\n".
    '''

    if len(files) <= 0:
        return sys.stdin.readline()

    if len(file_lines) <= 0:
        return ""

    line = file_lines[0]
    file_lines.pop(0)
    return line


def loop(state):
    '''
    Main loop, handles running of commands.
    '''
    args = state["args"]
    commands = state["commands"]
    files = state["files"]
    file_lines = make_file_lines(files)

    count = 1
    current_line = get_new_line(file_lines, files)
    while current_line:
        next_line = get_new_line(file_lines, files)

        for command in commands:
            current_line = run_command(command, count, current_line, next_line,\
                                       args)
            if len(current_line) == 0:
                break

        if not "-n" in args:
            print(current_line, end="")

        count += 1
        current_line = next_line


# We use a try block to handle error messages with the appropriate exit code.
try:
    state = get_state()
    #print(state)
    loop(state)
except SystemExit:
    pass
except BaseException as error:
    print(error, file=sys.stderr)
    sys.exit(1)
sys.exit(0)