From 98cef5e9a772602d42acfcf233838c760424db9a Mon Sep 17 00:00:00 2001 From: Nicolas James Date: Thu, 13 Feb 2025 18:00:17 +1100 Subject: initial commit --- comp6771/1/src/word_ladder.cpp | 157 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 comp6771/1/src/word_ladder.cpp (limited to 'comp6771/1/src/word_ladder.cpp') diff --git a/comp6771/1/src/word_ladder.cpp b/comp6771/1/src/word_ladder.cpp new file mode 100644 index 0000000..22b3223 --- /dev/null +++ b/comp6771/1/src/word_ladder.cpp @@ -0,0 +1,157 @@ +#include "word_ladder.h" + +using word_t = std::shared_ptr; +using words_t = std::vector; + +namespace { + +// Returns valid words which neighbour current by one letter. +// Performs caching on neighbours so future lookups of the same arguments are +// not computed unnecessarily. +auto get_neighbours(const std::string& current, const std::string& from, + const std::unordered_set& readable, + std::unordered_map& neighbours) + -> std::unordered_map::iterator { + if (const auto it = neighbours.find(current); it != std::end(neighbours)) { + return it; + } + + words_t lookup_neighbours{}; + for (std::size_t pos = 0; pos < std::size(from); ++pos) { + std::string potential = current; + + for (char c = 'a'; c <= 'z'; ++c) { + potential[pos] = c; + + if (!readable.contains(potential)) { + continue; + } + + lookup_neighbours.emplace_back( + std::make_shared(potential)); + } + } + return neighbours.emplace(current, std::move(lookup_neighbours)).first; +} + +// Performs a breadth first search using only words found in lexicon. Words are +// considered connected if they differ by one character only. +auto do_bfs(const std::string& to, const std::string& from, + const std::unordered_set& lexicon) + -> std::vector> { + std::vector> results{}; + + // Optimisation where we copy strings from the lexicon into our own set. By + // reducing the size of the hashmap to words of only size from/to, we get + // faster lookup. Also we can remove words and avoid unnecessary neighbour + // calculations later by ignoring them (treating them as if they did not + // exist in the lexicon entirely avoids a second hashing). + std::unordered_set readable = std::accumulate( + std::begin(lexicon), std::end(lexicon), + std::unordered_set{}, [&](auto&& l, const auto& word) { + if (std::size(word) == std::size(to)) { + l.emplace(word); + } + return l; + }); + + std::unordered_map neighbours{}; + + std::queue paths_queue{ + {words_t{std::make_shared(from)}}}; + + while (!paths_queue.empty()) { + const words_t path = paths_queue.front(); + const std::string& current = *path.back(); + paths_queue.pop(); + + readable.erase(current); + + if (const auto it = std::begin(results); + it != std::end(results) && std::size(path) >= std::size(*it)) { + continue; + } + + const auto lookup = ::get_neighbours(current, to, readable, neighbours); + + for (const auto& neighbour : lookup->second) { + const auto concatenate_path = [&]() { + words_t new_path{}; + // Vector optimisation where we allocate exactly as needed, + // reducing syscalls asking for more memory. + new_path.reserve(std::size(path) + 1); + std::copy(std::begin(path), std::end(path), + std::back_inserter(new_path)); + new_path.emplace_back(neighbour); + return new_path; + }; + + if (*neighbour == to) { + const std::vector result_path = concatenate_path(); + results.emplace_back(std::accumulate( + std::rbegin(result_path), std::rend(result_path), + std::vector{}, [](auto&& v, const auto& ptr) { + v.emplace_back(*ptr); + return v; + })); + continue; + } + + if (!readable.contains(*neighbour)) { + continue; + } + + paths_queue.push(concatenate_path()); + } + } + + return results; +} + +} // namespace + +namespace word_ladder { + +auto read_lexicon(const std::string& path) -> std::unordered_set { + std::ifstream in{path}; + if (!in.is_open()) { + throw std::runtime_error("unable to open file '" + path + "'"); + } + + std::unordered_set words{}; + + std::stringstream contents{std::string{std::istreambuf_iterator{in}, + std::istreambuf_iterator{}}}; + for (std::string line; std::getline(contents, line);) { + if (line.empty()) { + continue; + } + + words.emplace(std::move(line)); + } + + return words; +} + +auto generate(const std::string& from, const std::string& to, + const std::unordered_set& lexicon) + -> std::vector> { + std::vector> results = ::do_bfs(from, to, lexicon); + + // Sorts results lexicographically (avoiding large string concatenation). + std::sort(std::begin(results), std::end(results), + [](const auto& a, const auto& b) { + for (std::size_t i = 0; i < std::size(a); ++i) { + const auto result = a[i].compare(b[i]); + if (result == 0) { + continue; + } + return result < 0; + } + return false; + }); + + return results; +} + +} // namespace word_ladder \ No newline at end of file -- cgit v1.2.3