aboutsummaryrefslogtreecommitdiff
path: root/comp2521/tf_idf/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'comp2521/tf_idf/io.c')
-rw-r--r--comp2521/tf_idf/io.c69
1 files changed, 69 insertions, 0 deletions
diff --git a/comp2521/tf_idf/io.c b/comp2521/tf_idf/io.c
new file mode 100644
index 0000000..566b1b9
--- /dev/null
+++ b/comp2521/tf_idf/io.c
@@ -0,0 +1,69 @@
+#include "io.h"
+
+// Returns a malloc'ed char* of the next word in the file. NULL otherwise.
+// Undefined behaviour if the word is greater than 100 chars.
+// We will assume that the specification is referring to length as
+// LENGTH + \0, so 101 bytes should be allocated.
+char *getNextWord(FILE *const fptr) {
+ // We can assume a maximum word length of 100.
+ char *ret = malloc(sizeof(char) * 101);
+ // Read until there is a non-space.
+ for (int c = fgetc(fptr); c != EOF; c = fgetc(fptr)) {
+ if (isspace(c)) {
+ continue;
+ }
+ // Put the char back into the stream so that we may read it correctly
+ // in the next loop.
+ ungetc(c, fptr);
+ break;
+ }
+ int str_pos = 0;
+ // Read the word and write it into ret, until there is another space char.
+ for (int c = fgetc(fptr); c != EOF; c = fgetc(fptr)) {
+ if (isspace(c)) {
+ break;
+ }
+ ret[str_pos++] = (char)c;
+ }
+ ret[str_pos] = '\0';
+ // Only return a value if ret has been written to.
+ if (!str_pos) {
+ free(ret);
+ return NULL;
+ }
+ // Requires +2 as str_pos is (size -1) AND null terminator.
+ return realloc(ret, (size_t)str_pos + 1);
+}
+
+// Returns true if the string in the second argument exists past the fptr in a
+// file. Preserves original position of the fptr.
+bool exists_after(FILE *const fptr, char *const string) {
+ bool exists = false;
+ // Save the position and restore it when the function ends.
+ long pos = ftell(fptr);
+ char *word = NULL;
+ while ((word = getNextWord(fptr))) {
+ word = normaliseWord(word);
+ if (!strcmp(word, string)) {
+ exists = true;
+ break;
+ }
+ }
+ rewind(fptr);
+ fseek(fptr, pos, 0);
+ return exists;
+}
+
+// Outputs the contents of an inverted index to the file specified by fptr.
+void writeInvertedIndex(FILE *const fptr, struct InvertedIndexNode *node) {
+ if (node == NULL) {
+ return;
+ }
+ writeInvertedIndex(fptr, node->left);
+ fprintf(fptr, "%s ", node->word);
+ for (struct FileListNode *i = node->fileList; i != NULL; i = i->next) {
+ fprintf(fptr, "%s (%lf) ", i->filename, i->tf);
+ }
+ fprintf(fptr, "\n");
+ writeInvertedIndex(fptr, node->right);
+}