From 98cef5e9a772602d42acfcf233838c760424db9a Mon Sep 17 00:00:00 2001 From: Nicolas James Date: Thu, 13 Feb 2025 18:00:17 +1100 Subject: initial commit --- comp6771/1/src/main.cpp | 39 ++++++ comp6771/1/src/word_ladder.cpp | 157 +++++++++++++++++++++++ comp6771/1/src/word_ladder.h | 31 +++++ comp6771/1/src/word_ladder.test.cpp | 171 ++++++++++++++++++++++++++ comp6771/1/src/word_ladder_benchmark.test.cpp | 11 ++ 5 files changed, 409 insertions(+) create mode 100644 comp6771/1/src/main.cpp create mode 100644 comp6771/1/src/word_ladder.cpp create mode 100644 comp6771/1/src/word_ladder.h create mode 100644 comp6771/1/src/word_ladder.test.cpp create mode 100644 comp6771/1/src/word_ladder_benchmark.test.cpp (limited to 'comp6771/1/src') diff --git a/comp6771/1/src/main.cpp b/comp6771/1/src/main.cpp new file mode 100644 index 0000000..94c8e9f --- /dev/null +++ b/comp6771/1/src/main.cpp @@ -0,0 +1,39 @@ +#include + +#include "word_ladder.h" + +// Please note: it's not good practice to test your code via a main function +// that does printing. Instead, you should be using your test folder. This file +// should only really be used for more "primitive" debugging as we know that +// working solely with test frameworks might be overwhelming for some. + +namespace { + +void print_generate(const std::string& from, const std::string& to, + const auto& lexicon) { + const auto ladder = word_ladder::generate(from, to, lexicon); + std::cout << from << ':' << to << " has " << std::size(ladder) + << " solutions\n"; + for (const auto& solution : ladder) { + std::cout << " "; + for (const auto& word : solution) { + std::cout << word << ' '; + } + std::cout << '\n'; + } + std::cout << '\n'; +} + +} // namespace + +auto main() -> int { + const auto english_lexicon = word_ladder::read_lexicon("./english.txt"); + + print_generate("at", "it", english_lexicon); + print_generate("fly", "sky", english_lexicon); + print_generate("code", "data", english_lexicon); + print_generate("work", "play", english_lexicon); + print_generate("awake", "sleep", english_lexicon); + print_generate("airplane", "tricycle", english_lexicon); + print_generate("atlases", "cabaret", english_lexicon); +} diff --git a/comp6771/1/src/word_ladder.cpp b/comp6771/1/src/word_ladder.cpp new file mode 100644 index 0000000..22b3223 --- /dev/null +++ b/comp6771/1/src/word_ladder.cpp @@ -0,0 +1,157 @@ +#include "word_ladder.h" + +using word_t = std::shared_ptr; +using words_t = std::vector; + +namespace { + +// Returns valid words which neighbour current by one letter. +// Performs caching on neighbours so future lookups of the same arguments are +// not computed unnecessarily. +auto get_neighbours(const std::string& current, const std::string& from, + const std::unordered_set& readable, + std::unordered_map& neighbours) + -> std::unordered_map::iterator { + if (const auto it = neighbours.find(current); it != std::end(neighbours)) { + return it; + } + + words_t lookup_neighbours{}; + for (std::size_t pos = 0; pos < std::size(from); ++pos) { + std::string potential = current; + + for (char c = 'a'; c <= 'z'; ++c) { + potential[pos] = c; + + if (!readable.contains(potential)) { + continue; + } + + lookup_neighbours.emplace_back( + std::make_shared(potential)); + } + } + return neighbours.emplace(current, std::move(lookup_neighbours)).first; +} + +// Performs a breadth first search using only words found in lexicon. Words are +// considered connected if they differ by one character only. +auto do_bfs(const std::string& to, const std::string& from, + const std::unordered_set& lexicon) + -> std::vector> { + std::vector> results{}; + + // Optimisation where we copy strings from the lexicon into our own set. By + // reducing the size of the hashmap to words of only size from/to, we get + // faster lookup. Also we can remove words and avoid unnecessary neighbour + // calculations later by ignoring them (treating them as if they did not + // exist in the lexicon entirely avoids a second hashing). + std::unordered_set readable = std::accumulate( + std::begin(lexicon), std::end(lexicon), + std::unordered_set{}, [&](auto&& l, const auto& word) { + if (std::size(word) == std::size(to)) { + l.emplace(word); + } + return l; + }); + + std::unordered_map neighbours{}; + + std::queue paths_queue{ + {words_t{std::make_shared(from)}}}; + + while (!paths_queue.empty()) { + const words_t path = paths_queue.front(); + const std::string& current = *path.back(); + paths_queue.pop(); + + readable.erase(current); + + if (const auto it = std::begin(results); + it != std::end(results) && std::size(path) >= std::size(*it)) { + continue; + } + + const auto lookup = ::get_neighbours(current, to, readable, neighbours); + + for (const auto& neighbour : lookup->second) { + const auto concatenate_path = [&]() { + words_t new_path{}; + // Vector optimisation where we allocate exactly as needed, + // reducing syscalls asking for more memory. + new_path.reserve(std::size(path) + 1); + std::copy(std::begin(path), std::end(path), + std::back_inserter(new_path)); + new_path.emplace_back(neighbour); + return new_path; + }; + + if (*neighbour == to) { + const std::vector result_path = concatenate_path(); + results.emplace_back(std::accumulate( + std::rbegin(result_path), std::rend(result_path), + std::vector{}, [](auto&& v, const auto& ptr) { + v.emplace_back(*ptr); + return v; + })); + continue; + } + + if (!readable.contains(*neighbour)) { + continue; + } + + paths_queue.push(concatenate_path()); + } + } + + return results; +} + +} // namespace + +namespace word_ladder { + +auto read_lexicon(const std::string& path) -> std::unordered_set { + std::ifstream in{path}; + if (!in.is_open()) { + throw std::runtime_error("unable to open file '" + path + "'"); + } + + std::unordered_set words{}; + + std::stringstream contents{std::string{std::istreambuf_iterator{in}, + std::istreambuf_iterator{}}}; + for (std::string line; std::getline(contents, line);) { + if (line.empty()) { + continue; + } + + words.emplace(std::move(line)); + } + + return words; +} + +auto generate(const std::string& from, const std::string& to, + const std::unordered_set& lexicon) + -> std::vector> { + std::vector> results = ::do_bfs(from, to, lexicon); + + // Sorts results lexicographically (avoiding large string concatenation). + std::sort(std::begin(results), std::end(results), + [](const auto& a, const auto& b) { + for (std::size_t i = 0; i < std::size(a); ++i) { + const auto result = a[i].compare(b[i]); + if (result == 0) { + continue; + } + return result < 0; + } + return false; + }); + + return results; +} + +} // namespace word_ladder \ No newline at end of file diff --git a/comp6771/1/src/word_ladder.h b/comp6771/1/src/word_ladder.h new file mode 100644 index 0000000..ef0f04a --- /dev/null +++ b/comp6771/1/src/word_ladder.h @@ -0,0 +1,31 @@ +#ifndef COMP6771_WORD_LADDER_H +#define COMP6771_WORD_LADDER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace word_ladder { +// Given a file path to a newline-separated list of words... +// Loads those words into an unordered set and returns it. +auto read_lexicon(const std::string& path) -> std::unordered_set; + +// Given a start word and destination word, returns all the shortest possible +// paths from the start word to the destination, where each word in an +// individual path is a valid word per the provided lexicon. Preconditions: +// - from.size() == to.size() +// - lexicon.contains(from) +// - lexicon.contains(to) +auto generate(const std::string& from, const std::string& to, + const std::unordered_set& lexicon) + -> std::vector>; +} // namespace word_ladder + +#endif // COMP6771_WORD_LADDER_H diff --git a/comp6771/1/src/word_ladder.test.cpp b/comp6771/1/src/word_ladder.test.cpp new file mode 100644 index 0000000..34eec4a --- /dev/null +++ b/comp6771/1/src/word_ladder.test.cpp @@ -0,0 +1,171 @@ +#include "word_ladder.h" + +#include + +// Fixture to handle cleanup of temporary file between both tests for +// word_ladder::read_lexicon. +struct lexicon_fixture { + static const constexpr char* const path = "words.txt"; + ~lexicon_fixture() noexcept { std::filesystem::remove(path); } +}; + +TEST_CASE_METHOD(lexicon_fixture, + "word_ladder::read_lexicon works as expected") { + const std::vector words = {"test", "words", "for", "writing"}; + { + std::ofstream out{lexicon_fixture::path}; + if (!out.is_open()) { + FAIL("failed to open test file for writing"); + } + for (const auto& word : words) { + out << word << '\n'; + } + } + + const auto lexicon = word_ladder::read_lexicon(lexicon_fixture::path); + CHECK(std::size(lexicon) == std::size(words)); + for (const auto& word : words) { + CHECK(lexicon.contains(word)); + } +} + +TEST_CASE_METHOD(lexicon_fixture, + "word_ladder::read_lexicon fails with a non-existent file") { + REQUIRE_THROWS(word_ladder::read_lexicon(lexicon_fixture::path)); +} + +namespace { + +auto check_generate(const std::string& from, const std::string& to, + const std::unordered_set& lexicon, + const std::vector>& expected) + -> void { + const auto ladders = word_ladder::generate(from, to, lexicon); + CHECK(ladders == expected); +} + +} // namespace + +TEST_CASE("\"\" -> \"\"") { + check_generate("", "", word_ladder::read_lexicon("./english.txt"), {}); +} + +TEST_CASE("at -> it") { + const auto expected = std::vector>{{"at", "it"}}; + check_generate("at", "it", word_ladder::read_lexicon("./english.txt"), + expected); +} + +// Previous test ensures at->it is correct. +// Ensure that the words in the lexicon are respected and do not generate +// solutions for an empty lexicon, for this same test case. +TEST_CASE("at -> it, empty lexicon") { check_generate("at", "it", {}, {}); } + +TEST_CASE("fly -> sky") { + const auto expected = + std::vector>{{"fly", "sly", "sky"}}; + check_generate("fly", "sky", word_ladder::read_lexicon("./english.txt"), + expected); +} + +TEST_CASE("code -> data") { + const auto expected = std::vector>{ + {"code", "cade", "cate", "date", "data"}, + {"code", "cote", "cate", "date", "data"}, + {"code", "cote", "dote", "date", "data"}}; + check_generate("code", "data", word_ladder::read_lexicon("./english.txt"), + expected); +} + +// Previous test ensured that code->data returned correct solutions. We now +// ensure that some of the solutions are impossible to retrieve given that we +// modify the lexicon to make those solutions impossible. +TEST_CASE("code -> data, modified lexicon") { + const auto expected = std::vector>{ + {"code", "cade", "cate", "date", "data"}}; + const auto lexicon = []() -> std::unordered_set { + auto lexicon = word_ladder::read_lexicon("./english.txt"); + lexicon.erase("cote"); + return lexicon; + }(); + check_generate("code", "data", lexicon, expected); +} + +TEST_CASE("work -> play") { + const auto expected = std::vector>{ + {"work", "fork", "form", "foam", "flam", "flay", "play"}, + {"work", "pork", "perk", "peak", "pean", "plan", "play"}, + {"work", "pork", "perk", "peak", "peat", "plat", "play"}, + {"work", "pork", "perk", "pert", "peat", "plat", "play"}, + {"work", "pork", "porn", "pirn", "pian", "plan", "play"}, + {"work", "pork", "port", "pert", "peat", "plat", "play"}, + {"work", "word", "wood", "pood", "plod", "ploy", "play"}, + {"work", "worm", "form", "foam", "flam", "flay", "play"}, + {"work", "worn", "porn", "pirn", "pian", "plan", "play"}, + {"work", "wort", "bort", "boat", "blat", "plat", "play"}, + {"work", "wort", "port", "pert", "peat", "plat", "play"}, + {"work", "wort", "wert", "pert", "peat", "plat", "play"}}; + check_generate("work", "play", word_ladder::read_lexicon("./english.txt"), + expected); +} + +TEST_CASE("awake -> sleep") { + const auto expected = std::vector>{ + {"awake", "aware", "sware", "share", "sharn", "shawn", "shewn", "sheen", + "sheep", "sleep"}, + {"awake", "aware", "sware", "share", "shire", "shirr", "shier", "sheer", + "sheep", "sleep"}}; + check_generate("awake", "sleep", word_ladder::read_lexicon("./english.txt"), + expected); +} + +TEST_CASE("airplane -> tricycle") { + check_generate("airplane", "tricycle", + word_ladder::read_lexicon("./english.txt"), {}); +} + +TEST_CASE("warm -> cold") { + const auto expected = std::vector>{ + {"warm", "ward", "card", "cord", "cold"}, + {"warm", "ward", "word", "cord", "cold"}, + {"warm", "ward", "word", "wold", "cold"}, + {"warm", "worm", "corm", "cord", "cold"}, + {"warm", "worm", "word", "cord", "cold"}, + {"warm", "worm", "word", "wold", "cold"}, + }; + check_generate("warm", "cold", word_ladder::read_lexicon("./english.txt"), + expected); +} + +// There are many solutions to this problem, but we can check that one of them +// is equal to a solution found here: +// http://datagenetics.com/blog/april22019/index.html +// Unfortunately I had to add some words that were not present in english.txt +// as the author uses an expanded dictionary (eg, unfaded didn't exist). +TEST_CASE("atlases -> cabaret") { + const auto solution = std::vector{ + "atlases", "anlases", "anlaces", "unlaces", "unlaced", "unladed", + "unfaded", "unfaked", "uncaked", "uncakes", "uncases", "uneases", + "ureases", "creases", "cresses", "crosses", "crosser", "crasser", + "crasher", "brasher", "brasier", "brakier", "beakier", "peakier", + "peckier", "pickier", "dickier", "dickies", "hickies", "hackies", + "hackles", "heckles", "deckles", "deciles", "defiles", "defiled", + "deviled", "develed", "reveled", "raveled", "ravened", "havened", + "havered", "wavered", "watered", "catered", "capered", "tapered", + "tabered", "tabored", "taboret", "tabaret", "cabaret"}; + const auto lexicon = + std::accumulate(std::begin(solution), std::end(solution), + word_ladder::read_lexicon("./english.txt"), + [](auto&& lexicon, const auto& word) { + lexicon.emplace(word); + return lexicon; + }); + const auto ladders = word_ladder::generate("atlases", "cabaret", lexicon); + + if (const auto it = std::find_if( + std::begin(ladders), std::end(ladders), + [&solution](const auto& ladder) { return ladder == solution; }); + it == std::end(ladders)) { + FAIL("failed to find solution"); + } +} diff --git a/comp6771/1/src/word_ladder_benchmark.test.cpp b/comp6771/1/src/word_ladder_benchmark.test.cpp new file mode 100644 index 0000000..2a512d5 --- /dev/null +++ b/comp6771/1/src/word_ladder_benchmark.test.cpp @@ -0,0 +1,11 @@ +#include + +#include "word_ladder.h" + +TEST_CASE("atlases -> cabaret") { + const auto english_lexicon = word_ladder::read_lexicon("./english.txt"); + const auto ladders = + word_ladder::generate("atlases", "cabaret", english_lexicon); + + CHECK(std::size(ladders) != 0); +} -- cgit v1.2.3