initial commit

author: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2025-02-13 18:00:17 +1100
committer: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2025-02-13 18:00:17 +1100
commit: 98cef5e9a772602d42acfcf233838c760424db9a (patch)
tree: 5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp6771/1/src/word_ladder.cpp
1 files changed, 157 insertions, 0 deletions
diff --git a/comp6771/1/src/word_ladder.cpp b/comp6771/1/src/word_ladder.cpp
new file mode 100644
index 0000000..22b3223
--- /dev/null
+++ b/comp6771/1/src/word_ladder.cpp
@@ -0,0 +1,157 @@
+#include "word_ladder.h"
+
+using word_t = std::shared_ptr<std::string>;
+using words_t = std::vector<word_t>;
+
+namespace {
+
+// Returns valid words which neighbour current by one letter.
+// Performs caching on neighbours so future lookups of the same arguments are
+// not computed unnecessarily.
+auto get_neighbours(const std::string& current, const std::string& from,
+                    const std::unordered_set<std::string>& readable,
+                    std::unordered_map<std::string, words_t>& neighbours)
+    -> std::unordered_map<std::string, words_t>::iterator {
+    if (const auto it = neighbours.find(current); it != std::end(neighbours)) {
+        return it;
+    }
+
+    words_t lookup_neighbours{};
+    for (std::size_t pos = 0; pos < std::size(from); ++pos) {
+        std::string potential = current;
+
+        for (char c = 'a'; c <= 'z'; ++c) {
+            potential[pos] = c;
+
+            if (!readable.contains(potential)) {
+                continue;
+            }
+
+            lookup_neighbours.emplace_back(
+                std::make_shared<std::string>(potential));
+        }
+    }
+    return neighbours.emplace(current, std::move(lookup_neighbours)).first;
+}
+
+// Performs a breadth first search using only words found in lexicon. Words are
+// considered connected if they differ by one character only.
+auto do_bfs(const std::string& to, const std::string& from,
+            const std::unordered_set<std::string>& lexicon)
+    -> std::vector<std::vector<std::string>> {
+    std::vector<std::vector<std::string>> results{};
+
+    // Optimisation where we copy strings from the lexicon into our own set. By
+    // reducing the size of the hashmap to words of only size from/to, we get
+    // faster lookup. Also we can remove words and avoid unnecessary neighbour
+    // calculations later by ignoring them (treating them as if they did not
+    // exist in the lexicon entirely avoids a second hashing).
+    std::unordered_set<std::string> readable = std::accumulate(
+        std::begin(lexicon), std::end(lexicon),
+        std::unordered_set<std::string>{}, [&](auto&& l, const auto& word) {
+            if (std::size(word) == std::size(to)) {
+                l.emplace(word);
+            }
+            return l;
+        });
+
+    std::unordered_map<std::string, words_t> neighbours{};
+
+    std::queue<words_t> paths_queue{
+        {words_t{std::make_shared<std::string>(from)}}};
+
+    while (!paths_queue.empty()) {
+        const words_t path = paths_queue.front();
+        const std::string& current = *path.back();
+        paths_queue.pop();
+
+        readable.erase(current);
+
+        if (const auto it = std::begin(results);
+            it != std::end(results) && std::size(path) >= std::size(*it)) {
+            continue;
+        }
+
+        const auto lookup = ::get_neighbours(current, to, readable, neighbours);
+
+        for (const auto& neighbour : lookup->second) {
+            const auto concatenate_path = [&]() {
+                words_t new_path{};
+                // Vector optimisation where we allocate exactly as needed,
+                // reducing syscalls asking for more memory.
+                new_path.reserve(std::size(path) + 1);
+                std::copy(std::begin(path), std::end(path),
+                          std::back_inserter(new_path));
+                new_path.emplace_back(neighbour);
+                return new_path;
+            };
+
+            if (*neighbour == to) {
+                const std::vector<word_t> result_path = concatenate_path();
+                results.emplace_back(std::accumulate(
+                    std::rbegin(result_path), std::rend(result_path),
+                    std::vector<std::string>{}, [](auto&& v, const auto& ptr) {
+                        v.emplace_back(*ptr);
+                        return v;
+                    }));
+                continue;
+            }
+
+            if (!readable.contains(*neighbour)) {
+                continue;
+            }
+
+            paths_queue.push(concatenate_path());
+        }
+    }
+
+    return results;
+}
+
+}  // namespace
+
+namespace word_ladder {
+
+auto read_lexicon(const std::string& path) -> std::unordered_set<std::string> {
+    std::ifstream in{path};
+    if (!in.is_open()) {
+        throw std::runtime_error("unable to open file '" + path + "'");
+    }
+
+    std::unordered_set<std::string> words{};
+
+    std::stringstream contents{std::string{std::istreambuf_iterator<char>{in},
+                                           std::istreambuf_iterator<char>{}}};
+    for (std::string line; std::getline(contents, line);) {
+        if (line.empty()) {
+            continue;
+        }
+
+        words.emplace(std::move(line));
+    }
+
+    return words;
+}
+
+auto generate(const std::string& from, const std::string& to,
+              const std::unordered_set<std::string>& lexicon)
+    -> std::vector<std::vector<std::string>> {
+    std::vector<std::vector<std::string>> results = ::do_bfs(from, to, lexicon);
+
+    // Sorts results lexicographically (avoiding large string concatenation).
+    std::sort(std::begin(results), std::end(results),
+              [](const auto& a, const auto& b) {
+                  for (std::size_t i = 0; i < std::size(a); ++i) {
+                      const auto result = a[i].compare(b[i]);
+                      if (result == 0) {
+                          continue;
+                      }
+                      return result < 0;
+                  }
+                  return false;
+              });
+
+    return results;
+}
+
+}  // namespace word_ladder
+\ No newline at end of file
author	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2025-02-13 18:00:17 +1100
committer	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2025-02-13 18:00:17 +1100
commit	98cef5e9a772602d42acfcf233838c760424db9a (patch)
tree	5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp6771/1/src/word_ladder.cpp