aboutsummaryrefslogtreecommitdiff
path: root/comp6771/1/src/word_ladder.cpp
diff options
context:
space:
mode:
authorNicolas James <Eele1Ephe7uZahRie@tutanota.com>2025-02-13 18:00:17 +1100
committerNicolas James <Eele1Ephe7uZahRie@tutanota.com>2025-02-13 18:00:17 +1100
commit98cef5e9a772602d42acfcf233838c760424db9a (patch)
tree5277fa1d7cc0a69a0f166fcbf10fd320f345f049 /comp6771/1/src/word_ladder.cpp
initial commit
Diffstat (limited to 'comp6771/1/src/word_ladder.cpp')
-rw-r--r--comp6771/1/src/word_ladder.cpp157
1 files changed, 157 insertions, 0 deletions
diff --git a/comp6771/1/src/word_ladder.cpp b/comp6771/1/src/word_ladder.cpp
new file mode 100644
index 0000000..22b3223
--- /dev/null
+++ b/comp6771/1/src/word_ladder.cpp
@@ -0,0 +1,157 @@
+#include "word_ladder.h"
+
+using word_t = std::shared_ptr<std::string>;
+using words_t = std::vector<word_t>;
+
+namespace {
+
+// Returns valid words which neighbour current by one letter.
+// Performs caching on neighbours so future lookups of the same arguments are
+// not computed unnecessarily.
+auto get_neighbours(const std::string& current, const std::string& from,
+ const std::unordered_set<std::string>& readable,
+ std::unordered_map<std::string, words_t>& neighbours)
+ -> std::unordered_map<std::string, words_t>::iterator {
+ if (const auto it = neighbours.find(current); it != std::end(neighbours)) {
+ return it;
+ }
+
+ words_t lookup_neighbours{};
+ for (std::size_t pos = 0; pos < std::size(from); ++pos) {
+ std::string potential = current;
+
+ for (char c = 'a'; c <= 'z'; ++c) {
+ potential[pos] = c;
+
+ if (!readable.contains(potential)) {
+ continue;
+ }
+
+ lookup_neighbours.emplace_back(
+ std::make_shared<std::string>(potential));
+ }
+ }
+ return neighbours.emplace(current, std::move(lookup_neighbours)).first;
+}
+
+// Performs a breadth first search using only words found in lexicon. Words are
+// considered connected if they differ by one character only.
+auto do_bfs(const std::string& to, const std::string& from,
+ const std::unordered_set<std::string>& lexicon)
+ -> std::vector<std::vector<std::string>> {
+ std::vector<std::vector<std::string>> results{};
+
+ // Optimisation where we copy strings from the lexicon into our own set. By
+ // reducing the size of the hashmap to words of only size from/to, we get
+ // faster lookup. Also we can remove words and avoid unnecessary neighbour
+ // calculations later by ignoring them (treating them as if they did not
+ // exist in the lexicon entirely avoids a second hashing).
+ std::unordered_set<std::string> readable = std::accumulate(
+ std::begin(lexicon), std::end(lexicon),
+ std::unordered_set<std::string>{}, [&](auto&& l, const auto& word) {
+ if (std::size(word) == std::size(to)) {
+ l.emplace(word);
+ }
+ return l;
+ });
+
+ std::unordered_map<std::string, words_t> neighbours{};
+
+ std::queue<words_t> paths_queue{
+ {words_t{std::make_shared<std::string>(from)}}};
+
+ while (!paths_queue.empty()) {
+ const words_t path = paths_queue.front();
+ const std::string& current = *path.back();
+ paths_queue.pop();
+
+ readable.erase(current);
+
+ if (const auto it = std::begin(results);
+ it != std::end(results) && std::size(path) >= std::size(*it)) {
+ continue;
+ }
+
+ const auto lookup = ::get_neighbours(current, to, readable, neighbours);
+
+ for (const auto& neighbour : lookup->second) {
+ const auto concatenate_path = [&]() {
+ words_t new_path{};
+ // Vector optimisation where we allocate exactly as needed,
+ // reducing syscalls asking for more memory.
+ new_path.reserve(std::size(path) + 1);
+ std::copy(std::begin(path), std::end(path),
+ std::back_inserter(new_path));
+ new_path.emplace_back(neighbour);
+ return new_path;
+ };
+
+ if (*neighbour == to) {
+ const std::vector<word_t> result_path = concatenate_path();
+ results.emplace_back(std::accumulate(
+ std::rbegin(result_path), std::rend(result_path),
+ std::vector<std::string>{}, [](auto&& v, const auto& ptr) {
+ v.emplace_back(*ptr);
+ return v;
+ }));
+ continue;
+ }
+
+ if (!readable.contains(*neighbour)) {
+ continue;
+ }
+
+ paths_queue.push(concatenate_path());
+ }
+ }
+
+ return results;
+}
+
+} // namespace
+
+namespace word_ladder {
+
+auto read_lexicon(const std::string& path) -> std::unordered_set<std::string> {
+ std::ifstream in{path};
+ if (!in.is_open()) {
+ throw std::runtime_error("unable to open file '" + path + "'");
+ }
+
+ std::unordered_set<std::string> words{};
+
+ std::stringstream contents{std::string{std::istreambuf_iterator<char>{in},
+ std::istreambuf_iterator<char>{}}};
+ for (std::string line; std::getline(contents, line);) {
+ if (line.empty()) {
+ continue;
+ }
+
+ words.emplace(std::move(line));
+ }
+
+ return words;
+}
+
+auto generate(const std::string& from, const std::string& to,
+ const std::unordered_set<std::string>& lexicon)
+ -> std::vector<std::vector<std::string>> {
+ std::vector<std::vector<std::string>> results = ::do_bfs(from, to, lexicon);
+
+ // Sorts results lexicographically (avoiding large string concatenation).
+ std::sort(std::begin(results), std::end(results),
+ [](const auto& a, const auto& b) {
+ for (std::size_t i = 0; i < std::size(a); ++i) {
+ const auto result = a[i].compare(b[i]);
+ if (result == 0) {
+ continue;
+ }
+ return result < 0;
+ }
+ return false;
+ });
+
+ return results;
+}
+
+} // namespace word_ladder \ No newline at end of file