aboutsummaryrefslogtreecommitdiff
path: root/comp2521/tf_idf/fileList.c
diff options
context:
space:
mode:
authorNicolas James <Eele1Ephe7uZahRie@tutanota.com>2025-02-13 18:00:17 +1100
committerNicolas James <Eele1Ephe7uZahRie@tutanota.com>2025-02-13 18:00:17 +1100
commit98cef5e9a772602d42acfcf233838c760424db9a (patch)
tree5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp2521/tf_idf/fileList.c
initial commit
Diffstat (limited to 'comp2521/tf_idf/fileList.c')
-rw-r--r--comp2521/tf_idf/fileList.c118
1 files changed, 118 insertions, 0 deletions
diff --git a/comp2521/tf_idf/fileList.c b/comp2521/tf_idf/fileList.c
new file mode 100644
index 0000000..53dc0f9
--- /dev/null
+++ b/comp2521/tf_idf/fileList.c
@@ -0,0 +1,118 @@
+#include "fileList.h"
+
+// Returns a newly malloc'ed file list node, with word initialised to word
+// and tf set to 1.
+struct FileListNode *createFileListNode(char *const word) {
+ struct FileListNode *ret = calloc(1, sizeof(struct FileListNode));
+ ret->filename = word;
+ ret->tf = 1;
+ return ret;
+}
+
+// Inserts a fileListNode into an InvertedIndexNode->fileList based off a
+// string in alphabetical order. Increments tf by 1 already exists.
+// Returns new or preexisting fileListNode *.
+struct FileListNode *insertFileList(struct InvertedIndexNode *node,
+ char *const filename) {
+ struct FileListNode *ret = createFileListNode(filename);
+ // Empty case.
+ if (!node->fileList) {
+ node->fileList = ret;
+ return ret;
+ }
+ // Front case.
+ int comparison = strcmp(filename, node->fileList->filename);
+ if (comparison < 0) {
+ ret->next = node->fileList;
+ node->fileList = ret;
+ return ret;
+ } else if (comparison == 0) {
+ ++node->fileList->tf;
+ free(ret);
+ return node->fileList;
+ }
+ // Middle case.
+ struct FileListNode *last = node->fileList;
+ for (struct FileListNode *i = node->fileList->next; i; i = i->next) {
+ int comp = strcmp(filename, i->filename);
+ if (comp < 0) {
+ last->next = ret;
+ ret->next = i;
+ return ret;
+ } else if (comp == 0) {
+ ++i->tf;
+ free(ret);
+ return i;
+ }
+ last = i;
+ }
+ // Back case.
+ last->next = ret;
+ return ret;
+}
+
+// Inserts a fileListNode into a list via ordering of tf values. If the tf
+// values are equal, inserts based off the filename in ascending order.
+// Returns the new head of the list.
+struct FileListNode *insertFileOrderedList(struct FileListNode *head,
+ struct FileListNode *insert,
+ double mult) {
+ struct FileListNode *ret = calloc(1, sizeof(struct FileListNode));
+ ret->filename = strdup(insert->filename);
+ ret->tf = insert->tf * mult;
+ // Empty case.
+ if (!head) {
+ return ret;
+ }
+
+ struct FileListNode *prev_duplicate = NULL;
+ struct FileListNode *duplicate = NULL;
+ for (struct FileListNode *i = head; i; i = i->next) {
+ if (!strcmp(ret->filename, i->filename)) {
+ duplicate = i;
+ break;
+ }
+ prev_duplicate = i;
+ }
+
+ if (duplicate != NULL) {
+ if (prev_duplicate != NULL) {
+ prev_duplicate->next = duplicate->next;
+ } else {
+ head = duplicate->next;
+ }
+ duplicate->tf += ret->tf;
+ free(ret->filename);
+ free(ret);
+ return insertFileOrderedList(head, duplicate, mult);
+ }
+
+ struct FileListNode *last = NULL;
+ for (struct FileListNode *i = head; i; i = i->next) {
+ const double scmp = strcmp(ret->filename, i->filename);
+
+ if (ret->tf > i->tf) {
+ break;
+ } else if (insert->tf == i->tf) {
+ if (scmp <= 0) {
+ break;
+ } else {
+ last = i;
+ }
+ } else {
+ last = i;
+ }
+ }
+
+ // Front case.
+ if (!last) {
+ ret->next = head;
+ return ret;
+ }
+
+ // Middle or end case.
+ ret->next = last->next;
+ last->next = ret;
+
+ return head;
+}