1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
#include "fileList.h"
// Returns a newly malloc'ed file list node, with word initialised to word
// and tf set to 1.
struct FileListNode *createFileListNode(char *const word) {
struct FileListNode *ret = calloc(1, sizeof(struct FileListNode));
ret->filename = word;
ret->tf = 1;
return ret;
}
// Inserts a fileListNode into an InvertedIndexNode->fileList based off a
// string in alphabetical order. Increments tf by 1 already exists.
// Returns new or preexisting fileListNode *.
struct FileListNode *insertFileList(struct InvertedIndexNode *node,
char *const filename) {
struct FileListNode *ret = createFileListNode(filename);
// Empty case.
if (!node->fileList) {
node->fileList = ret;
return ret;
}
// Front case.
int comparison = strcmp(filename, node->fileList->filename);
if (comparison < 0) {
ret->next = node->fileList;
node->fileList = ret;
return ret;
} else if (comparison == 0) {
++node->fileList->tf;
free(ret);
return node->fileList;
}
// Middle case.
struct FileListNode *last = node->fileList;
for (struct FileListNode *i = node->fileList->next; i; i = i->next) {
int comp = strcmp(filename, i->filename);
if (comp < 0) {
last->next = ret;
ret->next = i;
return ret;
} else if (comp == 0) {
++i->tf;
free(ret);
return i;
}
last = i;
}
// Back case.
last->next = ret;
return ret;
}
// Inserts a fileListNode into a list via ordering of tf values. If the tf
// values are equal, inserts based off the filename in ascending order.
// Returns the new head of the list.
struct FileListNode *insertFileOrderedList(struct FileListNode *head,
struct FileListNode *insert,
double mult) {
struct FileListNode *ret = calloc(1, sizeof(struct FileListNode));
ret->filename = strdup(insert->filename);
ret->tf = insert->tf * mult;
// Empty case.
if (!head) {
return ret;
}
struct FileListNode *prev_duplicate = NULL;
struct FileListNode *duplicate = NULL;
for (struct FileListNode *i = head; i; i = i->next) {
if (!strcmp(ret->filename, i->filename)) {
duplicate = i;
break;
}
prev_duplicate = i;
}
if (duplicate != NULL) {
if (prev_duplicate != NULL) {
prev_duplicate->next = duplicate->next;
} else {
head = duplicate->next;
}
duplicate->tf += ret->tf;
free(ret->filename);
free(ret);
return insertFileOrderedList(head, duplicate, mult);
}
struct FileListNode *last = NULL;
for (struct FileListNode *i = head; i; i = i->next) {
const double scmp = strcmp(ret->filename, i->filename);
if (ret->tf > i->tf) {
break;
} else if (insert->tf == i->tf) {
if (scmp <= 0) {
break;
} else {
last = i;
}
} else {
last = i;
}
}
// Front case.
if (!last) {
ret->next = head;
return ret;
}
// Middle or end case.
ret->next = last->next;
last->next = ret;
return head;
}
|