1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
#include "io.h"
// Returns a malloc'ed char* of the next word in the file. NULL otherwise.
// Undefined behaviour if the word is greater than 100 chars.
// We will assume that the specification is referring to length as
// LENGTH + \0, so 101 bytes should be allocated.
char *getNextWord(FILE *const fptr) {
// We can assume a maximum word length of 100.
char *ret = malloc(sizeof(char) * 101);
// Read until there is a non-space.
for (int c = fgetc(fptr); c != EOF; c = fgetc(fptr)) {
if (isspace(c)) {
continue;
}
// Put the char back into the stream so that we may read it correctly
// in the next loop.
ungetc(c, fptr);
break;
}
int str_pos = 0;
// Read the word and write it into ret, until there is another space char.
for (int c = fgetc(fptr); c != EOF; c = fgetc(fptr)) {
if (isspace(c)) {
break;
}
ret[str_pos++] = (char)c;
}
ret[str_pos] = '\0';
// Only return a value if ret has been written to.
if (!str_pos) {
free(ret);
return NULL;
}
// Requires +2 as str_pos is (size -1) AND null terminator.
return realloc(ret, (size_t)str_pos + 1);
}
// Returns true if the string in the second argument exists past the fptr in a
// file. Preserves original position of the fptr.
bool exists_after(FILE *const fptr, char *const string) {
bool exists = false;
// Save the position and restore it when the function ends.
long pos = ftell(fptr);
char *word = NULL;
while ((word = getNextWord(fptr))) {
word = normaliseWord(word);
if (!strcmp(word, string)) {
exists = true;
break;
}
}
rewind(fptr);
fseek(fptr, pos, 0);
return exists;
}
// Outputs the contents of an inverted index to the file specified by fptr.
void writeInvertedIndex(FILE *const fptr, struct InvertedIndexNode *node) {
if (node == NULL) {
return;
}
writeInvertedIndex(fptr, node->left);
fprintf(fptr, "%s ", node->word);
for (struct FileListNode *i = node->fileList; i != NULL; i = i->next) {
fprintf(fptr, "%s (%lf) ", i->filename, i->tf);
}
fprintf(fptr, "\n");
writeInvertedIndex(fptr, node->right);
}
|