#include "io.h" // Returns a malloc'ed char* of the next word in the file. NULL otherwise. // Undefined behaviour if the word is greater than 100 chars. // We will assume that the specification is referring to length as // LENGTH + \0, so 101 bytes should be allocated. char *getNextWord(FILE *const fptr) { // We can assume a maximum word length of 100. char *ret = malloc(sizeof(char) * 101); // Read until there is a non-space. for (int c = fgetc(fptr); c != EOF; c = fgetc(fptr)) { if (isspace(c)) { continue; } // Put the char back into the stream so that we may read it correctly // in the next loop. ungetc(c, fptr); break; } int str_pos = 0; // Read the word and write it into ret, until there is another space char. for (int c = fgetc(fptr); c != EOF; c = fgetc(fptr)) { if (isspace(c)) { break; } ret[str_pos++] = (char)c; } ret[str_pos] = '\0'; // Only return a value if ret has been written to. if (!str_pos) { free(ret); return NULL; } // Requires +2 as str_pos is (size -1) AND null terminator. return realloc(ret, (size_t)str_pos + 1); } // Returns true if the string in the second argument exists past the fptr in a // file. Preserves original position of the fptr. bool exists_after(FILE *const fptr, char *const string) { bool exists = false; // Save the position and restore it when the function ends. long pos = ftell(fptr); char *word = NULL; while ((word = getNextWord(fptr))) { word = normaliseWord(word); if (!strcmp(word, string)) { exists = true; break; } } rewind(fptr); fseek(fptr, pos, 0); return exists; } // Outputs the contents of an inverted index to the file specified by fptr. void writeInvertedIndex(FILE *const fptr, struct InvertedIndexNode *node) { if (node == NULL) { return; } writeInvertedIndex(fptr, node->left); fprintf(fptr, "%s ", node->word); for (struct FileListNode *i = node->fileList; i != NULL; i = i->next) { fprintf(fptr, "%s (%lf) ", i->filename, i->tf); } fprintf(fptr, "\n"); writeInvertedIndex(fptr, node->right); }