initial commit

author: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2025-02-13 18:00:17 +1100
committer: Nicolas James <Eele1Ephe7uZahRie@tutanota.com> 2025-02-13 18:00:17 +1100
commit: 98cef5e9a772602d42acfcf233838c760424db9a (patch)
tree: 5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp6771/1/src
5 files changed, 409 insertions, 0 deletions
diff --git a/comp6771/1/src/main.cpp b/comp6771/1/src/main.cpp
new file mode 100644
index 0000000..94c8e9f
--- /dev/null
+++ b/comp6771/1/src/main.cpp
@@ -0,0 +1,39 @@
+#include <iostream>
+
+#include "word_ladder.h"
+
+// Please note: it's not good practice to test your code via a main function
+// that does printing. Instead, you should be using your test folder. This file
+// should only really be used for more "primitive" debugging as we know that
+// working solely with test frameworks might be overwhelming for some.
+
+namespace {
+
+void print_generate(const std::string& from, const std::string& to,
+                    const auto& lexicon) {
+    const auto ladder = word_ladder::generate(from, to, lexicon);
+    std::cout << from << ':' << to << " has " << std::size(ladder)
+              << " solutions\n";
+    for (const auto& solution : ladder) {
+        std::cout << "    ";
+        for (const auto& word : solution) {
+            std::cout << word << ' ';
+        }
+        std::cout << '\n';
+    }
+    std::cout << '\n';
+}
+
+}  // namespace
+
+auto main() -> int {
+    const auto english_lexicon = word_ladder::read_lexicon("./english.txt");
+
+    print_generate("at", "it", english_lexicon);
+    print_generate("fly", "sky", english_lexicon);
+    print_generate("code", "data", english_lexicon);
+    print_generate("work", "play", english_lexicon);
+    print_generate("awake", "sleep", english_lexicon);
+    print_generate("airplane", "tricycle", english_lexicon);
+    print_generate("atlases", "cabaret", english_lexicon);
+}
diff --git a/comp6771/1/src/word_ladder.cpp b/comp6771/1/src/word_ladder.cpp
new file mode 100644
index 0000000..22b3223
--- /dev/null
+++ b/comp6771/1/src/word_ladder.cpp
@@ -0,0 +1,157 @@
+#include "word_ladder.h"
+
+using word_t = std::shared_ptr<std::string>;
+using words_t = std::vector<word_t>;
+
+namespace {
+
+// Returns valid words which neighbour current by one letter.
+// Performs caching on neighbours so future lookups of the same arguments are
+// not computed unnecessarily.
+auto get_neighbours(const std::string& current, const std::string& from,
+                    const std::unordered_set<std::string>& readable,
+                    std::unordered_map<std::string, words_t>& neighbours)
+    -> std::unordered_map<std::string, words_t>::iterator {
+    if (const auto it = neighbours.find(current); it != std::end(neighbours)) {
+        return it;
+    }
+
+    words_t lookup_neighbours{};
+    for (std::size_t pos = 0; pos < std::size(from); ++pos) {
+        std::string potential = current;
+
+        for (char c = 'a'; c <= 'z'; ++c) {
+            potential[pos] = c;
+
+            if (!readable.contains(potential)) {
+                continue;
+            }
+
+            lookup_neighbours.emplace_back(
+                std::make_shared<std::string>(potential));
+        }
+    }
+    return neighbours.emplace(current, std::move(lookup_neighbours)).first;
+}
+
+// Performs a breadth first search using only words found in lexicon. Words are
+// considered connected if they differ by one character only.
+auto do_bfs(const std::string& to, const std::string& from,
+            const std::unordered_set<std::string>& lexicon)
+    -> std::vector<std::vector<std::string>> {
+    std::vector<std::vector<std::string>> results{};
+
+    // Optimisation where we copy strings from the lexicon into our own set. By
+    // reducing the size of the hashmap to words of only size from/to, we get
+    // faster lookup. Also we can remove words and avoid unnecessary neighbour
+    // calculations later by ignoring them (treating them as if they did not
+    // exist in the lexicon entirely avoids a second hashing).
+    std::unordered_set<std::string> readable = std::accumulate(
+        std::begin(lexicon), std::end(lexicon),
+        std::unordered_set<std::string>{}, [&](auto&& l, const auto& word) {
+            if (std::size(word) == std::size(to)) {
+                l.emplace(word);
+            }
+            return l;
+        });
+
+    std::unordered_map<std::string, words_t> neighbours{};
+
+    std::queue<words_t> paths_queue{
+        {words_t{std::make_shared<std::string>(from)}}};
+
+    while (!paths_queue.empty()) {
+        const words_t path = paths_queue.front();
+        const std::string& current = *path.back();
+        paths_queue.pop();
+
+        readable.erase(current);
+
+        if (const auto it = std::begin(results);
+            it != std::end(results) && std::size(path) >= std::size(*it)) {
+            continue;
+        }
+
+        const auto lookup = ::get_neighbours(current, to, readable, neighbours);
+
+        for (const auto& neighbour : lookup->second) {
+            const auto concatenate_path = [&]() {
+                words_t new_path{};
+                // Vector optimisation where we allocate exactly as needed,
+                // reducing syscalls asking for more memory.
+                new_path.reserve(std::size(path) + 1);
+                std::copy(std::begin(path), std::end(path),
+                          std::back_inserter(new_path));
+                new_path.emplace_back(neighbour);
+                return new_path;
+            };
+
+            if (*neighbour == to) {
+                const std::vector<word_t> result_path = concatenate_path();
+                results.emplace_back(std::accumulate(
+                    std::rbegin(result_path), std::rend(result_path),
+                    std::vector<std::string>{}, [](auto&& v, const auto& ptr) {
+                        v.emplace_back(*ptr);
+                        return v;
+                    }));
+                continue;
+            }
+
+            if (!readable.contains(*neighbour)) {
+                continue;
+            }
+
+            paths_queue.push(concatenate_path());
+        }
+    }
+
+    return results;
+}
+
+}  // namespace
+
+namespace word_ladder {
+
+auto read_lexicon(const std::string& path) -> std::unordered_set<std::string> {
+    std::ifstream in{path};
+    if (!in.is_open()) {
+        throw std::runtime_error("unable to open file '" + path + "'");
+    }
+
+    std::unordered_set<std::string> words{};
+
+    std::stringstream contents{std::string{std::istreambuf_iterator<char>{in},
+                                           std::istreambuf_iterator<char>{}}};
+    for (std::string line; std::getline(contents, line);) {
+        if (line.empty()) {
+            continue;
+        }
+
+        words.emplace(std::move(line));
+    }
+
+    return words;
+}
+
+auto generate(const std::string& from, const std::string& to,
+              const std::unordered_set<std::string>& lexicon)
+    -> std::vector<std::vector<std::string>> {
+    std::vector<std::vector<std::string>> results = ::do_bfs(from, to, lexicon);
+
+    // Sorts results lexicographically (avoiding large string concatenation).
+    std::sort(std::begin(results), std::end(results),
+              [](const auto& a, const auto& b) {
+                  for (std::size_t i = 0; i < std::size(a); ++i) {
+                      const auto result = a[i].compare(b[i]);
+                      if (result == 0) {
+                          continue;
+                      }
+                      return result < 0;
+                  }
+                  return false;
+              });
+
+    return results;
+}
+
+}  // namespace word_ladder
+\ No newline at end of file
diff --git a/comp6771/1/src/word_ladder.h b/comp6771/1/src/word_ladder.h
new file mode 100644
index 0000000..ef0f04a
--- /dev/null
+++ b/comp6771/1/src/word_ladder.h
@@ -0,0 +1,31 @@
+#ifndef COMP6771_WORD_LADDER_H
+#define COMP6771_WORD_LADDER_H
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <numeric>
+#include <queue>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+namespace word_ladder {
+// Given a file path to a newline-separated list of words...
+// Loads those words into an unordered set and returns it.
+auto read_lexicon(const std::string& path) -> std::unordered_set<std::string>;
+
+// Given a start word and destination word, returns all the shortest possible
+// paths from the start word to the destination, where each word in an
+// individual path is a valid word per the provided lexicon. Preconditions:
+// - from.size() == to.size()
+// - lexicon.contains(from)
+// - lexicon.contains(to)
+auto generate(const std::string& from, const std::string& to,
+              const std::unordered_set<std::string>& lexicon)
+    -> std::vector<std::vector<std::string>>;
+}  // namespace word_ladder
+
+#endif  // COMP6771_WORD_LADDER_H
diff --git a/comp6771/1/src/word_ladder.test.cpp b/comp6771/1/src/word_ladder.test.cpp
new file mode 100644
index 0000000..34eec4a
--- /dev/null
+++ b/comp6771/1/src/word_ladder.test.cpp
@@ -0,0 +1,171 @@
+#include "word_ladder.h"
+
+#include <catch2/catch.hpp>
+
+// Fixture to handle cleanup of temporary file between both tests for
+// word_ladder::read_lexicon.
+struct lexicon_fixture {
+    static const constexpr char* const path = "words.txt";
+    ~lexicon_fixture() noexcept { std::filesystem::remove(path); }
+};
+
+TEST_CASE_METHOD(lexicon_fixture,
+                 "word_ladder::read_lexicon works as expected") {
+    const std::vector<std::string> words = {"test", "words", "for", "writing"};
+    {
+        std::ofstream out{lexicon_fixture::path};
+        if (!out.is_open()) {
+            FAIL("failed to open test file for writing");
+        }
+        for (const auto& word : words) {
+            out << word << '\n';
+        }
+    }
+
+    const auto lexicon = word_ladder::read_lexicon(lexicon_fixture::path);
+    CHECK(std::size(lexicon) == std::size(words));
+    for (const auto& word : words) {
+        CHECK(lexicon.contains(word));
+    }
+}
+
+TEST_CASE_METHOD(lexicon_fixture,
+                 "word_ladder::read_lexicon fails with a non-existent file") {
+    REQUIRE_THROWS(word_ladder::read_lexicon(lexicon_fixture::path));
+}
+
+namespace {
+
+auto check_generate(const std::string& from, const std::string& to,
+                    const std::unordered_set<std::string>& lexicon,
+                    const std::vector<std::vector<std::string>>& expected)
+    -> void {
+    const auto ladders = word_ladder::generate(from, to, lexicon);
+    CHECK(ladders == expected);
+}
+
+}  // namespace
+
+TEST_CASE("\"\" -> \"\"") {
+    check_generate("", "", word_ladder::read_lexicon("./english.txt"), {});
+}
+
+TEST_CASE("at -> it") {
+    const auto expected = std::vector<std::vector<std::string>>{{"at", "it"}};
+    check_generate("at", "it", word_ladder::read_lexicon("./english.txt"),
+                   expected);
+}
+
+// Previous test ensures at->it is correct.
+// Ensure that the words in the lexicon are respected and do not generate
+// solutions for an empty lexicon, for this same test case.
+TEST_CASE("at -> it, empty lexicon") { check_generate("at", "it", {}, {}); }
+
+TEST_CASE("fly -> sky") {
+    const auto expected =
+        std::vector<std::vector<std::string>>{{"fly", "sly", "sky"}};
+    check_generate("fly", "sky", word_ladder::read_lexicon("./english.txt"),
+                   expected);
+}
+
+TEST_CASE("code -> data") {
+    const auto expected = std::vector<std::vector<std::string>>{
+        {"code", "cade", "cate", "date", "data"},
+        {"code", "cote", "cate", "date", "data"},
+        {"code", "cote", "dote", "date", "data"}};
+    check_generate("code", "data", word_ladder::read_lexicon("./english.txt"),
+                   expected);
+}
+
+// Previous test ensured that code->data returned correct solutions. We now
+// ensure that some of the solutions are impossible to retrieve given that we
+// modify the lexicon to make those solutions impossible.
+TEST_CASE("code -> data, modified lexicon") {
+    const auto expected = std::vector<std::vector<std::string>>{
+        {"code", "cade", "cate", "date", "data"}};
+    const auto lexicon = []() -> std::unordered_set<std::string> {
+        auto lexicon = word_ladder::read_lexicon("./english.txt");
+        lexicon.erase("cote");
+        return lexicon;
+    }();
+    check_generate("code", "data", lexicon, expected);
+}
+
+TEST_CASE("work -> play") {
+    const auto expected = std::vector<std::vector<std::string>>{
+        {"work", "fork", "form", "foam", "flam", "flay", "play"},
+        {"work", "pork", "perk", "peak", "pean", "plan", "play"},
+        {"work", "pork", "perk", "peak", "peat", "plat", "play"},
+        {"work", "pork", "perk", "pert", "peat", "plat", "play"},
+        {"work", "pork", "porn", "pirn", "pian", "plan", "play"},
+        {"work", "pork", "port", "pert", "peat", "plat", "play"},
+        {"work", "word", "wood", "pood", "plod", "ploy", "play"},
+        {"work", "worm", "form", "foam", "flam", "flay", "play"},
+        {"work", "worn", "porn", "pirn", "pian", "plan", "play"},
+        {"work", "wort", "bort", "boat", "blat", "plat", "play"},
+        {"work", "wort", "port", "pert", "peat", "plat", "play"},
+        {"work", "wort", "wert", "pert", "peat", "plat", "play"}};
+    check_generate("work", "play", word_ladder::read_lexicon("./english.txt"),
+                   expected);
+}
+
+TEST_CASE("awake -> sleep") {
+    const auto expected = std::vector<std::vector<std::string>>{
+        {"awake", "aware", "sware", "share", "sharn", "shawn", "shewn", "sheen",
+         "sheep", "sleep"},
+        {"awake", "aware", "sware", "share", "shire", "shirr", "shier", "sheer",
+         "sheep", "sleep"}};
+    check_generate("awake", "sleep", word_ladder::read_lexicon("./english.txt"),
+                   expected);
+}
+
+TEST_CASE("airplane -> tricycle") {
+    check_generate("airplane", "tricycle",
+                   word_ladder::read_lexicon("./english.txt"), {});
+}
+
+TEST_CASE("warm -> cold") {
+    const auto expected = std::vector<std::vector<std::string>>{
+        {"warm", "ward", "card", "cord", "cold"},
+        {"warm", "ward", "word", "cord", "cold"},
+        {"warm", "ward", "word", "wold", "cold"},
+        {"warm", "worm", "corm", "cord", "cold"},
+        {"warm", "worm", "word", "cord", "cold"},
+        {"warm", "worm", "word", "wold", "cold"},
+    };
+    check_generate("warm", "cold", word_ladder::read_lexicon("./english.txt"),
+                   expected);
+}
+
+// There are many solutions to this problem, but we can check that one of them
+// is equal to a solution found here:
+// http://datagenetics.com/blog/april22019/index.html
+// Unfortunately I had to add some words that were not present in english.txt
+// as the author uses an expanded dictionary (eg, unfaded didn't exist).
+TEST_CASE("atlases -> cabaret") {
+    const auto solution = std::vector<std::string>{
+        "atlases", "anlases", "anlaces", "unlaces", "unlaced", "unladed",
+        "unfaded", "unfaked", "uncaked", "uncakes", "uncases", "uneases",
+        "ureases", "creases", "cresses", "crosses", "crosser", "crasser",
+        "crasher", "brasher", "brasier", "brakier", "beakier", "peakier",
+        "peckier", "pickier", "dickier", "dickies", "hickies", "hackies",
+        "hackles", "heckles", "deckles", "deciles", "defiles", "defiled",
+        "deviled", "develed", "reveled", "raveled", "ravened", "havened",
+        "havered", "wavered", "watered", "catered", "capered", "tapered",
+        "tabered", "tabored", "taboret", "tabaret", "cabaret"};
+    const auto lexicon =
+        std::accumulate(std::begin(solution), std::end(solution),
+                        word_ladder::read_lexicon("./english.txt"),
+                        [](auto&& lexicon, const auto& word) {
+                            lexicon.emplace(word);
+                            return lexicon;
+                        });
+    const auto ladders = word_ladder::generate("atlases", "cabaret", lexicon);
+
+    if (const auto it = std::find_if(
+            std::begin(ladders), std::end(ladders),
+            [&solution](const auto& ladder) { return ladder == solution; });
+        it == std::end(ladders)) {
+        FAIL("failed to find solution");
+    }
+}
diff --git a/comp6771/1/src/word_ladder_benchmark.test.cpp b/comp6771/1/src/word_ladder_benchmark.test.cpp
new file mode 100644
index 0000000..2a512d5
--- /dev/null
+++ b/comp6771/1/src/word_ladder_benchmark.test.cpp
@@ -0,0 +1,11 @@
+#include <catch2/catch.hpp>
+
+#include "word_ladder.h"
+
+TEST_CASE("atlases -> cabaret") {
+    const auto english_lexicon = word_ladder::read_lexicon("./english.txt");
+    const auto ladders =
+        word_ladder::generate("atlases", "cabaret", english_lexicon);
+
+    CHECK(std::size(ladders) != 0);
+}
author	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2025-02-13 18:00:17 +1100
committer	Nicolas James <Eele1Ephe7uZahRie@tutanota.com>	2025-02-13 18:00:17 +1100
commit	98cef5e9a772602d42acfcf233838c760424db9a (patch)
tree	5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp6771/1/src