aboutsummaryrefslogtreecommitdiff
path: root/comp2521/tf_idf/io.c
blob: 566b1b983fd29a10fbbf4ff75f4ce67269ceae31 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#include "io.h"

// Returns a malloc'ed char* of the next word in the file. NULL otherwise.
// Undefined behaviour if the word is greater than 100 chars.
// We will assume that the specification is referring to length as
// LENGTH + \0, so 101 bytes should be allocated.
char *getNextWord(FILE *const fptr) {
    // We can assume a maximum word length of 100.
    char *ret = malloc(sizeof(char) * 101);
    // Read until there is a non-space.
    for (int c = fgetc(fptr); c != EOF; c = fgetc(fptr)) {
        if (isspace(c)) {
            continue;
        }
        // Put the char back into the stream so that we may read it correctly
        // in the next loop.
        ungetc(c, fptr);
        break;
    }
    int str_pos = 0;
    // Read the word and write it into ret, until there is another space char.
    for (int c = fgetc(fptr); c != EOF; c = fgetc(fptr)) {
        if (isspace(c)) {
            break;
        }
        ret[str_pos++] = (char)c;
    }
    ret[str_pos] = '\0';
    // Only return a value if ret has been written to.
    if (!str_pos) {
        free(ret);
        return NULL;
    }
    // Requires +2 as str_pos is (size -1) AND null terminator.
    return realloc(ret, (size_t)str_pos + 1);
}

// Returns true if the string in the second argument exists past the fptr in a
// file. Preserves original position of the fptr.
bool exists_after(FILE *const fptr, char *const string) {
    bool exists = false;
    // Save the position and restore it when the function ends.
    long pos = ftell(fptr);
    char *word = NULL;
    while ((word = getNextWord(fptr))) {
        word = normaliseWord(word);
        if (!strcmp(word, string)) {
            exists = true;
            break;
        }
    }
    rewind(fptr);
    fseek(fptr, pos, 0);
    return exists;
}

// Outputs the contents of an inverted index to the file specified by fptr.
void writeInvertedIndex(FILE *const fptr, struct InvertedIndexNode *node) {
    if (node == NULL) {
        return;
    }
    writeInvertedIndex(fptr, node->left);
    fprintf(fptr, "%s ", node->word);
    for (struct FileListNode *i = node->fileList; i != NULL; i = i->next) {
        fprintf(fptr, "%s (%lf) ", i->filename, i->tf);
    }
    fprintf(fptr, "\n");
    writeInvertedIndex(fptr, node->right);
}