comp6771/1/src/word_ladder.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

#include "word_ladder.h"

using word_t = std::shared_ptr<std::string>;
using words_t = std::vector<word_t>;

namespace {

// Returns valid words which neighbour current by one letter.
// Performs caching on neighbours so future lookups of the same arguments are
// not computed unnecessarily.
auto get_neighbours(const std::string& current, const std::string& from,
                    const std::unordered_set<std::string>& readable,
                    std::unordered_map<std::string, words_t>& neighbours)
    -> std::unordered_map<std::string, words_t>::iterator {
    if (const auto it = neighbours.find(current); it != std::end(neighbours)) {
        return it;
    }

    words_t lookup_neighbours{};
    for (std::size_t pos = 0; pos < std::size(from); ++pos) {
        std::string potential = current;

        for (char c = 'a'; c <= 'z'; ++c) {
            potential[pos] = c;

            if (!readable.contains(potential)) {
                continue;
            }

            lookup_neighbours.emplace_back(
                std::make_shared<std::string>(potential));
        }
    }
    return neighbours.emplace(current, std::move(lookup_neighbours)).first;
}

// Performs a breadth first search using only words found in lexicon. Words are
// considered connected if they differ by one character only.
auto do_bfs(const std::string& to, const std::string& from,
            const std::unordered_set<std::string>& lexicon)
    -> std::vector<std::vector<std::string>> {
    std::vector<std::vector<std::string>> results{};

    // Optimisation where we copy strings from the lexicon into our own set. By
    // reducing the size of the hashmap to words of only size from/to, we get
    // faster lookup. Also we can remove words and avoid unnecessary neighbour
    // calculations later by ignoring them (treating them as if they did not
    // exist in the lexicon entirely avoids a second hashing).
    std::unordered_set<std::string> readable = std::accumulate(
        std::begin(lexicon), std::end(lexicon),
        std::unordered_set<std::string>{}, [&](auto&& l, const auto& word) {
            if (std::size(word) == std::size(to)) {
                l.emplace(word);
            }
            return l;
        });

    std::unordered_map<std::string, words_t> neighbours{};

    std::queue<words_t> paths_queue{
        {words_t{std::make_shared<std::string>(from)}}};

    while (!paths_queue.empty()) {
        const words_t path = paths_queue.front();
        const std::string& current = *path.back();
        paths_queue.pop();

        readable.erase(current);

        if (const auto it = std::begin(results);
            it != std::end(results) && std::size(path) >= std::size(*it)) {
            continue;
        }

        const auto lookup = ::get_neighbours(current, to, readable, neighbours);

        for (const auto& neighbour : lookup->second) {
            const auto concatenate_path = [&]() {
                words_t new_path{};
                // Vector optimisation where we allocate exactly as needed,
                // reducing syscalls asking for more memory.
                new_path.reserve(std::size(path) + 1);
                std::copy(std::begin(path), std::end(path),
                          std::back_inserter(new_path));
                new_path.emplace_back(neighbour);
                return new_path;
            };

            if (*neighbour == to) {
                const std::vector<word_t> result_path = concatenate_path();
                results.emplace_back(std::accumulate(
                    std::rbegin(result_path), std::rend(result_path),
                    std::vector<std::string>{}, [](auto&& v, const auto& ptr) {
                        v.emplace_back(*ptr);
                        return v;
                    }));
                continue;
            }

            if (!readable.contains(*neighbour)) {
                continue;
            }

            paths_queue.push(concatenate_path());
        }
    }

    return results;
}

}  // namespace

namespace word_ladder {

auto read_lexicon(const std::string& path) -> std::unordered_set<std::string> {
    std::ifstream in{path};
    if (!in.is_open()) {
        throw std::runtime_error("unable to open file '" + path + "'");
    }

    std::unordered_set<std::string> words{};

    std::stringstream contents{std::string{std::istreambuf_iterator<char>{in},
                                           std::istreambuf_iterator<char>{}}};
    for (std::string line; std::getline(contents, line);) {
        if (line.empty()) {
            continue;
        }

        words.emplace(std::move(line));
    }

    return words;
}

auto generate(const std::string& from, const std::string& to,
              const std::unordered_set<std::string>& lexicon)
    -> std::vector<std::vector<std::string>> {
    std::vector<std::vector<std::string>> results = ::do_bfs(from, to, lexicon);

    // Sorts results lexicographically (avoiding large string concatenation).
    std::sort(std::begin(results), std::end(results),
              [](const auto& a, const auto& b) {
                  for (std::size_t i = 0; i < std::size(a); ++i) {
                      const auto result = a[i].compare(b[i]);
                      if (result == 0) {
                          continue;
                      }
                      return result < 0;
                  }
                  return false;
              });

    return results;
}

}  // namespace word_ladder