aboutsummaryrefslogtreecommitdiff
path: root/comp6771/1/src
diff options
context:
space:
mode:
authorNicolas James <Eele1Ephe7uZahRie@tutanota.com>2025-02-13 18:00:17 +1100
committerNicolas James <Eele1Ephe7uZahRie@tutanota.com>2025-02-13 18:00:17 +1100
commit98cef5e9a772602d42acfcf233838c760424db9a (patch)
tree5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp6771/1/src
initial commit
Diffstat (limited to 'comp6771/1/src')
-rw-r--r--comp6771/1/src/main.cpp39
-rw-r--r--comp6771/1/src/word_ladder.cpp157
-rw-r--r--comp6771/1/src/word_ladder.h31
-rw-r--r--comp6771/1/src/word_ladder.test.cpp171
-rw-r--r--comp6771/1/src/word_ladder_benchmark.test.cpp11
5 files changed, 409 insertions, 0 deletions
diff --git a/comp6771/1/src/main.cpp b/comp6771/1/src/main.cpp
new file mode 100644
index 0000000..94c8e9f
--- /dev/null
+++ b/comp6771/1/src/main.cpp
@@ -0,0 +1,39 @@
+#include <iostream>
+
+#include "word_ladder.h"
+
+// Please note: it's not good practice to test your code via a main function
+// that does printing. Instead, you should be using your test folder. This file
+// should only really be used for more "primitive" debugging as we know that
+// working solely with test frameworks might be overwhelming for some.
+
+namespace {
+
+void print_generate(const std::string& from, const std::string& to,
+ const auto& lexicon) {
+ const auto ladder = word_ladder::generate(from, to, lexicon);
+ std::cout << from << ':' << to << " has " << std::size(ladder)
+ << " solutions\n";
+ for (const auto& solution : ladder) {
+ std::cout << " ";
+ for (const auto& word : solution) {
+ std::cout << word << ' ';
+ }
+ std::cout << '\n';
+ }
+ std::cout << '\n';
+}
+
+} // namespace
+
+auto main() -> int {
+ const auto english_lexicon = word_ladder::read_lexicon("./english.txt");
+
+ print_generate("at", "it", english_lexicon);
+ print_generate("fly", "sky", english_lexicon);
+ print_generate("code", "data", english_lexicon);
+ print_generate("work", "play", english_lexicon);
+ print_generate("awake", "sleep", english_lexicon);
+ print_generate("airplane", "tricycle", english_lexicon);
+ print_generate("atlases", "cabaret", english_lexicon);
+}
diff --git a/comp6771/1/src/word_ladder.cpp b/comp6771/1/src/word_ladder.cpp
new file mode 100644
index 0000000..22b3223
--- /dev/null
+++ b/comp6771/1/src/word_ladder.cpp
@@ -0,0 +1,157 @@
+#include "word_ladder.h"
+
+using word_t = std::shared_ptr<std::string>;
+using words_t = std::vector<word_t>;
+
+namespace {
+
+// Returns valid words which neighbour current by one letter.
+// Performs caching on neighbours so future lookups of the same arguments are
+// not computed unnecessarily.
+auto get_neighbours(const std::string& current, const std::string& from,
+ const std::unordered_set<std::string>& readable,
+ std::unordered_map<std::string, words_t>& neighbours)
+ -> std::unordered_map<std::string, words_t>::iterator {
+ if (const auto it = neighbours.find(current); it != std::end(neighbours)) {
+ return it;
+ }
+
+ words_t lookup_neighbours{};
+ for (std::size_t pos = 0; pos < std::size(from); ++pos) {
+ std::string potential = current;
+
+ for (char c = 'a'; c <= 'z'; ++c) {
+ potential[pos] = c;
+
+ if (!readable.contains(potential)) {
+ continue;
+ }
+
+ lookup_neighbours.emplace_back(
+ std::make_shared<std::string>(potential));
+ }
+ }
+ return neighbours.emplace(current, std::move(lookup_neighbours)).first;
+}
+
+// Performs a breadth first search using only words found in lexicon. Words are
+// considered connected if they differ by one character only.
+auto do_bfs(const std::string& to, const std::string& from,
+ const std::unordered_set<std::string>& lexicon)
+ -> std::vector<std::vector<std::string>> {
+ std::vector<std::vector<std::string>> results{};
+
+ // Optimisation where we copy strings from the lexicon into our own set. By
+ // reducing the size of the hashmap to words of only size from/to, we get
+ // faster lookup. Also we can remove words and avoid unnecessary neighbour
+ // calculations later by ignoring them (treating them as if they did not
+ // exist in the lexicon entirely avoids a second hashing).
+ std::unordered_set<std::string> readable = std::accumulate(
+ std::begin(lexicon), std::end(lexicon),
+ std::unordered_set<std::string>{}, [&](auto&& l, const auto& word) {
+ if (std::size(word) == std::size(to)) {
+ l.emplace(word);
+ }
+ return l;
+ });
+
+ std::unordered_map<std::string, words_t> neighbours{};
+
+ std::queue<words_t> paths_queue{
+ {words_t{std::make_shared<std::string>(from)}}};
+
+ while (!paths_queue.empty()) {
+ const words_t path = paths_queue.front();
+ const std::string& current = *path.back();
+ paths_queue.pop();
+
+ readable.erase(current);
+
+ if (const auto it = std::begin(results);
+ it != std::end(results) && std::size(path) >= std::size(*it)) {
+ continue;
+ }
+
+ const auto lookup = ::get_neighbours(current, to, readable, neighbours);
+
+ for (const auto& neighbour : lookup->second) {
+ const auto concatenate_path = [&]() {
+ words_t new_path{};
+ // Vector optimisation where we allocate exactly as needed,
+ // reducing syscalls asking for more memory.
+ new_path.reserve(std::size(path) + 1);
+ std::copy(std::begin(path), std::end(path),
+ std::back_inserter(new_path));
+ new_path.emplace_back(neighbour);
+ return new_path;
+ };
+
+ if (*neighbour == to) {
+ const std::vector<word_t> result_path = concatenate_path();
+ results.emplace_back(std::accumulate(
+ std::rbegin(result_path), std::rend(result_path),
+ std::vector<std::string>{}, [](auto&& v, const auto& ptr) {
+ v.emplace_back(*ptr);
+ return v;
+ }));
+ continue;
+ }
+
+ if (!readable.contains(*neighbour)) {
+ continue;
+ }
+
+ paths_queue.push(concatenate_path());
+ }
+ }
+
+ return results;
+}
+
+} // namespace
+
+namespace word_ladder {
+
+auto read_lexicon(const std::string& path) -> std::unordered_set<std::string> {
+ std::ifstream in{path};
+ if (!in.is_open()) {
+ throw std::runtime_error("unable to open file '" + path + "'");
+ }
+
+ std::unordered_set<std::string> words{};
+
+ std::stringstream contents{std::string{std::istreambuf_iterator<char>{in},
+ std::istreambuf_iterator<char>{}}};
+ for (std::string line; std::getline(contents, line);) {
+ if (line.empty()) {
+ continue;
+ }
+
+ words.emplace(std::move(line));
+ }
+
+ return words;
+}
+
+auto generate(const std::string& from, const std::string& to,
+ const std::unordered_set<std::string>& lexicon)
+ -> std::vector<std::vector<std::string>> {
+ std::vector<std::vector<std::string>> results = ::do_bfs(from, to, lexicon);
+
+ // Sorts results lexicographically (avoiding large string concatenation).
+ std::sort(std::begin(results), std::end(results),
+ [](const auto& a, const auto& b) {
+ for (std::size_t i = 0; i < std::size(a); ++i) {
+ const auto result = a[i].compare(b[i]);
+ if (result == 0) {
+ continue;
+ }
+ return result < 0;
+ }
+ return false;
+ });
+
+ return results;
+}
+
+} // namespace word_ladder \ No newline at end of file
diff --git a/comp6771/1/src/word_ladder.h b/comp6771/1/src/word_ladder.h
new file mode 100644
index 0000000..ef0f04a
--- /dev/null
+++ b/comp6771/1/src/word_ladder.h
@@ -0,0 +1,31 @@
+#ifndef COMP6771_WORD_LADDER_H
+#define COMP6771_WORD_LADDER_H
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <numeric>
+#include <queue>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+namespace word_ladder {
+// Given a file path to a newline-separated list of words...
+// Loads those words into an unordered set and returns it.
+auto read_lexicon(const std::string& path) -> std::unordered_set<std::string>;
+
+// Given a start word and destination word, returns all the shortest possible
+// paths from the start word to the destination, where each word in an
+// individual path is a valid word per the provided lexicon. Preconditions:
+// - from.size() == to.size()
+// - lexicon.contains(from)
+// - lexicon.contains(to)
+auto generate(const std::string& from, const std::string& to,
+ const std::unordered_set<std::string>& lexicon)
+ -> std::vector<std::vector<std::string>>;
+} // namespace word_ladder
+
+#endif // COMP6771_WORD_LADDER_H
diff --git a/comp6771/1/src/word_ladder.test.cpp b/comp6771/1/src/word_ladder.test.cpp
new file mode 100644
index 0000000..34eec4a
--- /dev/null
+++ b/comp6771/1/src/word_ladder.test.cpp
@@ -0,0 +1,171 @@
+#include "word_ladder.h"
+
+#include <catch2/catch.hpp>
+
+// Fixture to handle cleanup of temporary file between both tests for
+// word_ladder::read_lexicon.
+struct lexicon_fixture {
+ static const constexpr char* const path = "words.txt";
+ ~lexicon_fixture() noexcept { std::filesystem::remove(path); }
+};
+
+TEST_CASE_METHOD(lexicon_fixture,
+ "word_ladder::read_lexicon works as expected") {
+ const std::vector<std::string> words = {"test", "words", "for", "writing"};
+ {
+ std::ofstream out{lexicon_fixture::path};
+ if (!out.is_open()) {
+ FAIL("failed to open test file for writing");
+ }
+ for (const auto& word : words) {
+ out << word << '\n';
+ }
+ }
+
+ const auto lexicon = word_ladder::read_lexicon(lexicon_fixture::path);
+ CHECK(std::size(lexicon) == std::size(words));
+ for (const auto& word : words) {
+ CHECK(lexicon.contains(word));
+ }
+}
+
+TEST_CASE_METHOD(lexicon_fixture,
+ "word_ladder::read_lexicon fails with a non-existent file") {
+ REQUIRE_THROWS(word_ladder::read_lexicon(lexicon_fixture::path));
+}
+
+namespace {
+
+auto check_generate(const std::string& from, const std::string& to,
+ const std::unordered_set<std::string>& lexicon,
+ const std::vector<std::vector<std::string>>& expected)
+ -> void {
+ const auto ladders = word_ladder::generate(from, to, lexicon);
+ CHECK(ladders == expected);
+}
+
+} // namespace
+
+TEST_CASE("\"\" -> \"\"") {
+ check_generate("", "", word_ladder::read_lexicon("./english.txt"), {});
+}
+
+TEST_CASE("at -> it") {
+ const auto expected = std::vector<std::vector<std::string>>{{"at", "it"}};
+ check_generate("at", "it", word_ladder::read_lexicon("./english.txt"),
+ expected);
+}
+
+// Previous test ensures at->it is correct.
+// Ensure that the words in the lexicon are respected and do not generate
+// solutions for an empty lexicon, for this same test case.
+TEST_CASE("at -> it, empty lexicon") { check_generate("at", "it", {}, {}); }
+
+TEST_CASE("fly -> sky") {
+ const auto expected =
+ std::vector<std::vector<std::string>>{{"fly", "sly", "sky"}};
+ check_generate("fly", "sky", word_ladder::read_lexicon("./english.txt"),
+ expected);
+}
+
+TEST_CASE("code -> data") {
+ const auto expected = std::vector<std::vector<std::string>>{
+ {"code", "cade", "cate", "date", "data"},
+ {"code", "cote", "cate", "date", "data"},
+ {"code", "cote", "dote", "date", "data"}};
+ check_generate("code", "data", word_ladder::read_lexicon("./english.txt"),
+ expected);
+}
+
+// Previous test ensured that code->data returned correct solutions. We now
+// ensure that some of the solutions are impossible to retrieve given that we
+// modify the lexicon to make those solutions impossible.
+TEST_CASE("code -> data, modified lexicon") {
+ const auto expected = std::vector<std::vector<std::string>>{
+ {"code", "cade", "cate", "date", "data"}};
+ const auto lexicon = []() -> std::unordered_set<std::string> {
+ auto lexicon = word_ladder::read_lexicon("./english.txt");
+ lexicon.erase("cote");
+ return lexicon;
+ }();
+ check_generate("code", "data", lexicon, expected);
+}
+
+TEST_CASE("work -> play") {
+ const auto expected = std::vector<std::vector<std::string>>{
+ {"work", "fork", "form", "foam", "flam", "flay", "play"},
+ {"work", "pork", "perk", "peak", "pean", "plan", "play"},
+ {"work", "pork", "perk", "peak", "peat", "plat", "play"},
+ {"work", "pork", "perk", "pert", "peat", "plat", "play"},
+ {"work", "pork", "porn", "pirn", "pian", "plan", "play"},
+ {"work", "pork", "port", "pert", "peat", "plat", "play"},
+ {"work", "word", "wood", "pood", "plod", "ploy", "play"},
+ {"work", "worm", "form", "foam", "flam", "flay", "play"},
+ {"work", "worn", "porn", "pirn", "pian", "plan", "play"},
+ {"work", "wort", "bort", "boat", "blat", "plat", "play"},
+ {"work", "wort", "port", "pert", "peat", "plat", "play"},
+ {"work", "wort", "wert", "pert", "peat", "plat", "play"}};
+ check_generate("work", "play", word_ladder::read_lexicon("./english.txt"),
+ expected);
+}
+
+TEST_CASE("awake -> sleep") {
+ const auto expected = std::vector<std::vector<std::string>>{
+ {"awake", "aware", "sware", "share", "sharn", "shawn", "shewn", "sheen",
+ "sheep", "sleep"},
+ {"awake", "aware", "sware", "share", "shire", "shirr", "shier", "sheer",
+ "sheep", "sleep"}};
+ check_generate("awake", "sleep", word_ladder::read_lexicon("./english.txt"),
+ expected);
+}
+
+TEST_CASE("airplane -> tricycle") {
+ check_generate("airplane", "tricycle",
+ word_ladder::read_lexicon("./english.txt"), {});
+}
+
+TEST_CASE("warm -> cold") {
+ const auto expected = std::vector<std::vector<std::string>>{
+ {"warm", "ward", "card", "cord", "cold"},
+ {"warm", "ward", "word", "cord", "cold"},
+ {"warm", "ward", "word", "wold", "cold"},
+ {"warm", "worm", "corm", "cord", "cold"},
+ {"warm", "worm", "word", "cord", "cold"},
+ {"warm", "worm", "word", "wold", "cold"},
+ };
+ check_generate("warm", "cold", word_ladder::read_lexicon("./english.txt"),
+ expected);
+}
+
+// There are many solutions to this problem, but we can check that one of them
+// is equal to a solution found here:
+// http://datagenetics.com/blog/april22019/index.html
+// Unfortunately I had to add some words that were not present in english.txt
+// as the author uses an expanded dictionary (eg, unfaded didn't exist).
+TEST_CASE("atlases -> cabaret") {
+ const auto solution = std::vector<std::string>{
+ "atlases", "anlases", "anlaces", "unlaces", "unlaced", "unladed",
+ "unfaded", "unfaked", "uncaked", "uncakes", "uncases", "uneases",
+ "ureases", "creases", "cresses", "crosses", "crosser", "crasser",
+ "crasher", "brasher", "brasier", "brakier", "beakier", "peakier",
+ "peckier", "pickier", "dickier", "dickies", "hickies", "hackies",
+ "hackles", "heckles", "deckles", "deciles", "defiles", "defiled",
+ "deviled", "develed", "reveled", "raveled", "ravened", "havened",
+ "havered", "wavered", "watered", "catered", "capered", "tapered",
+ "tabered", "tabored", "taboret", "tabaret", "cabaret"};
+ const auto lexicon =
+ std::accumulate(std::begin(solution), std::end(solution),
+ word_ladder::read_lexicon("./english.txt"),
+ [](auto&& lexicon, const auto& word) {
+ lexicon.emplace(word);
+ return lexicon;
+ });
+ const auto ladders = word_ladder::generate("atlases", "cabaret", lexicon);
+
+ if (const auto it = std::find_if(
+ std::begin(ladders), std::end(ladders),
+ [&solution](const auto& ladder) { return ladder == solution; });
+ it == std::end(ladders)) {
+ FAIL("failed to find solution");
+ }
+}
diff --git a/comp6771/1/src/word_ladder_benchmark.test.cpp b/comp6771/1/src/word_ladder_benchmark.test.cpp
new file mode 100644
index 0000000..2a512d5
--- /dev/null
+++ b/comp6771/1/src/word_ladder_benchmark.test.cpp
@@ -0,0 +1,11 @@
+#include <catch2/catch.hpp>
+
+#include "word_ladder.h"
+
+TEST_CASE("atlases -> cabaret") {
+ const auto english_lexicon = word_ladder::read_lexicon("./english.txt");
+ const auto ladders =
+ word_ladder::generate("atlases", "cabaret", english_lexicon);
+
+ CHECK(std::size(ladders) != 0);
+}