diff options
| author | Nicolas James <Eele1Ephe7uZahRie@tutanota.com> | 2025-02-13 18:00:17 +1100 |
|---|---|---|
| committer | Nicolas James <Eele1Ephe7uZahRie@tutanota.com> | 2025-02-13 18:00:17 +1100 |
| commit | 98cef5e9a772602d42acfcf233838c760424db9a (patch) | |
| tree | 5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp6771/1/src/word_ladder.cpp | |
initial commit
Diffstat (limited to 'comp6771/1/src/word_ladder.cpp')
| -rw-r--r-- | comp6771/1/src/word_ladder.cpp | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/comp6771/1/src/word_ladder.cpp b/comp6771/1/src/word_ladder.cpp new file mode 100644 index 0000000..22b3223 --- /dev/null +++ b/comp6771/1/src/word_ladder.cpp @@ -0,0 +1,157 @@ +#include "word_ladder.h" + +using word_t = std::shared_ptr<std::string>; +using words_t = std::vector<word_t>; + +namespace { + +// Returns valid words which neighbour current by one letter. +// Performs caching on neighbours so future lookups of the same arguments are +// not computed unnecessarily. +auto get_neighbours(const std::string& current, const std::string& from, + const std::unordered_set<std::string>& readable, + std::unordered_map<std::string, words_t>& neighbours) + -> std::unordered_map<std::string, words_t>::iterator { + if (const auto it = neighbours.find(current); it != std::end(neighbours)) { + return it; + } + + words_t lookup_neighbours{}; + for (std::size_t pos = 0; pos < std::size(from); ++pos) { + std::string potential = current; + + for (char c = 'a'; c <= 'z'; ++c) { + potential[pos] = c; + + if (!readable.contains(potential)) { + continue; + } + + lookup_neighbours.emplace_back( + std::make_shared<std::string>(potential)); + } + } + return neighbours.emplace(current, std::move(lookup_neighbours)).first; +} + +// Performs a breadth first search using only words found in lexicon. Words are +// considered connected if they differ by one character only. +auto do_bfs(const std::string& to, const std::string& from, + const std::unordered_set<std::string>& lexicon) + -> std::vector<std::vector<std::string>> { + std::vector<std::vector<std::string>> results{}; + + // Optimisation where we copy strings from the lexicon into our own set. By + // reducing the size of the hashmap to words of only size from/to, we get + // faster lookup. Also we can remove words and avoid unnecessary neighbour + // calculations later by ignoring them (treating them as if they did not + // exist in the lexicon entirely avoids a second hashing). + std::unordered_set<std::string> readable = std::accumulate( + std::begin(lexicon), std::end(lexicon), + std::unordered_set<std::string>{}, [&](auto&& l, const auto& word) { + if (std::size(word) == std::size(to)) { + l.emplace(word); + } + return l; + }); + + std::unordered_map<std::string, words_t> neighbours{}; + + std::queue<words_t> paths_queue{ + {words_t{std::make_shared<std::string>(from)}}}; + + while (!paths_queue.empty()) { + const words_t path = paths_queue.front(); + const std::string& current = *path.back(); + paths_queue.pop(); + + readable.erase(current); + + if (const auto it = std::begin(results); + it != std::end(results) && std::size(path) >= std::size(*it)) { + continue; + } + + const auto lookup = ::get_neighbours(current, to, readable, neighbours); + + for (const auto& neighbour : lookup->second) { + const auto concatenate_path = [&]() { + words_t new_path{}; + // Vector optimisation where we allocate exactly as needed, + // reducing syscalls asking for more memory. + new_path.reserve(std::size(path) + 1); + std::copy(std::begin(path), std::end(path), + std::back_inserter(new_path)); + new_path.emplace_back(neighbour); + return new_path; + }; + + if (*neighbour == to) { + const std::vector<word_t> result_path = concatenate_path(); + results.emplace_back(std::accumulate( + std::rbegin(result_path), std::rend(result_path), + std::vector<std::string>{}, [](auto&& v, const auto& ptr) { + v.emplace_back(*ptr); + return v; + })); + continue; + } + + if (!readable.contains(*neighbour)) { + continue; + } + + paths_queue.push(concatenate_path()); + } + } + + return results; +} + +} // namespace + +namespace word_ladder { + +auto read_lexicon(const std::string& path) -> std::unordered_set<std::string> { + std::ifstream in{path}; + if (!in.is_open()) { + throw std::runtime_error("unable to open file '" + path + "'"); + } + + std::unordered_set<std::string> words{}; + + std::stringstream contents{std::string{std::istreambuf_iterator<char>{in}, + std::istreambuf_iterator<char>{}}}; + for (std::string line; std::getline(contents, line);) { + if (line.empty()) { + continue; + } + + words.emplace(std::move(line)); + } + + return words; +} + +auto generate(const std::string& from, const std::string& to, + const std::unordered_set<std::string>& lexicon) + -> std::vector<std::vector<std::string>> { + std::vector<std::vector<std::string>> results = ::do_bfs(from, to, lexicon); + + // Sorts results lexicographically (avoiding large string concatenation). + std::sort(std::begin(results), std::end(results), + [](const auto& a, const auto& b) { + for (std::size_t i = 0; i < std::size(a); ++i) { + const auto result = a[i].compare(b[i]); + if (result == 0) { + continue; + } + return result < 0; + } + return false; + }); + + return results; +} + +} // namespace word_ladder
\ No newline at end of file |
