aboutsummaryrefslogtreecommitdiff
path: root/comp2521/tf_idf/fileList.c
blob: 53dc0f97a6aeaa34cf12eee652e515bb586842ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include "fileList.h"

// Returns a newly malloc'ed file list node, with word initialised to word
// and tf set to 1.
struct FileListNode *createFileListNode(char *const word) {
    struct FileListNode *ret = calloc(1, sizeof(struct FileListNode));
    ret->filename = word;
    ret->tf = 1;
    return ret;
}

// Inserts a fileListNode into an InvertedIndexNode->fileList based off a
// string in alphabetical order. Increments tf by 1 already exists.
// Returns new or preexisting fileListNode *.
struct FileListNode *insertFileList(struct InvertedIndexNode *node,
                                    char *const filename) {
    struct FileListNode *ret = createFileListNode(filename);
    // Empty case.
    if (!node->fileList) {
        node->fileList = ret;
        return ret;
    }
    // Front case.
    int comparison = strcmp(filename, node->fileList->filename);
    if (comparison < 0) {
        ret->next = node->fileList;
        node->fileList = ret;
        return ret;
    } else if (comparison == 0) {
        ++node->fileList->tf;
        free(ret);
        return node->fileList;
    }
    // Middle case.
    struct FileListNode *last = node->fileList;
    for (struct FileListNode *i = node->fileList->next; i; i = i->next) {
        int comp = strcmp(filename, i->filename);
        if (comp < 0) {
            last->next = ret;
            ret->next = i;
            return ret;
        } else if (comp == 0) {
            ++i->tf;
            free(ret);
            return i;
        }
        last = i;
    }
    // Back case.
    last->next = ret;
    return ret;
}

// Inserts a fileListNode into a list via ordering of tf values. If the tf
// values are equal, inserts based off the filename in ascending order.
// Returns the new head of the list.
struct FileListNode *insertFileOrderedList(struct FileListNode *head,
                                           struct FileListNode *insert,
                                           double mult) {
    struct FileListNode *ret = calloc(1, sizeof(struct FileListNode));
    ret->filename = strdup(insert->filename);
    ret->tf = insert->tf * mult;
    // Empty case.
    if (!head) {
        return ret;
    }

    struct FileListNode *prev_duplicate = NULL;
    struct FileListNode *duplicate = NULL;
    for (struct FileListNode *i = head; i; i = i->next) {
        if (!strcmp(ret->filename, i->filename)) {
            duplicate = i;
            break;
        }
        prev_duplicate = i;
    }

    if (duplicate != NULL) {
        if (prev_duplicate != NULL) {
            prev_duplicate->next = duplicate->next;
        } else {
            head = duplicate->next;
        }
        duplicate->tf += ret->tf;
        free(ret->filename);
        free(ret);
        return insertFileOrderedList(head, duplicate, mult);
    }

    struct FileListNode *last = NULL;
    for (struct FileListNode *i = head; i; i = i->next) {
        const double scmp = strcmp(ret->filename, i->filename);

        if (ret->tf > i->tf) {
            break;
        } else if (insert->tf == i->tf) {
            if (scmp <= 0) {
                break;
            } else {
                last = i;
            }
        } else {
            last = i;
        }
    }

    // Front case.
    if (!last) {
        ret->next = head;
        return ret;
    }

    // Middle or end case.
    ret->next = last->next;
    last->next = ret;

    return head;
}