commit 3fffaa1927db2bde91cde273377628c3b81430c8 Author: feathers <> Date: Sun Oct 8 18:32:35 2023 +0200 search using levenshtein distance diff --git a/resources/qml/MessageInput.qml b/resources/qml/MessageInput.qml index 59b19d4d..298a5697 100644 --- a/resources/qml/MessageInput.qml +++ b/resources/qml/MessageInput.qml @@ -129,7 +129,7 @@ Rectangle { completerTriggeredAt = pos; completer.completerName = type; popup.open(); - completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText); + completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText); } function positionCursorAtEnd() { @@ -182,12 +182,12 @@ Rectangle { popup.close(); if (popup.opened) - completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText); + completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText); } onPreeditTextChanged: { if (popup.opened) - completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText); + completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText); } onSelectionStartChanged: room.input.updateState(selectionStart, selectionEnd, cursorPosition, text) onSelectionEndChanged: room.input.updateState(selectionStart, selectionEnd, cursorPosition, text) diff --git a/src/CompletionProxyModel.h b/src/CompletionProxyModel.h index 4d9c9f0e..24767a2a 100644 --- a/src/CompletionProxyModel.h +++ b/src/CompletionProxyModel.h @@ -8,6 +8,8 @@ // Class for showing a limited amount of completions at a time +#include + #include enum class ElementRank @@ -19,140 +21,126 @@ enum class ElementRank template struct trie { - std::vector values; - std::map next; + struct entry { + QVector key; + Value value; + }; + + std::vector entries; template void insert(const QVector &keys, const Value &v) { - auto t = this; - for (const auto k : keys) { - t = &t->next[k]; - } - if constexpr (r == ElementRank::first) { - t->values.insert(t->values.begin(), v); + entries.emplace(entries.begin(), keys, v); } else if constexpr (r == ElementRank::second) { - t->values.push_back(v); + entries.emplace_back(keys, v); } } - std::vector valuesAndSubvalues(size_t limit = -1) const + // mostly stolen from https://nasauber.de/blog/2019/levenshtein-distance-and-longest-common-subsequence-in-qt/ + static size_t levenshteinDistance(const QVector &source, const QVector &target) { - std::vector ret; - if (limit < 200) - ret.reserve(limit); - - for (const auto &v : values) { - if (ret.size() >= limit) - return ret; - else - ret.push_back(v); + // Mostly stolen from https://qgis.org/api/2.14/qgsstringutils_8cpp_source.html + + if (source == target) { + return 0; } - for (const auto &[k, t] : next) { - (void)k; - if (ret.size() >= limit) - return ret; - else { - auto temp = t.valuesAndSubvalues(limit - ret.size()); - for (auto &&v : temp) { - if (ret.size() >= limit) - return ret; - - if (std::find(ret.begin(), ret.end(), v) == ret.end()) { - ret.push_back(std::move(v)); - } - } + const size_t sourceCount = source.count(); + const size_t targetCount = target.count(); + + if (source.isEmpty()) { + return targetCount; + } + + if (target.isEmpty()) { + return sourceCount; + } + + if (sourceCount > targetCount) { + return levenshteinDistance(target, source); + } + + QVector column; + column.fill(0, targetCount + 1); + QVector previousColumn; + previousColumn.reserve(targetCount + 1); + for (size_t i = 0; i < targetCount + 1; i++) { + previousColumn.append(i); + } + + for (size_t i = 0; i < sourceCount; i++) { + column[0] = i + 1; + for (size_t j = 0; j < targetCount; j++) { + column[j + 1] = std::min({ + 1 + column.at(j), + 1 + previousColumn.at(1 + j), + previousColumn.at(j) + ((source.at(i) == target.at(j)) ? 0 : 1) + }); } + column.swap(previousColumn); } - return ret; + return previousColumn.at(targetCount); } - std::vector search(const QVector &keys, //< TODO(Nico): replace this with a span + std::vector search(QVector keys, //< TODO(Nico): replace this with a span size_t result_count_limit, size_t max_edit_distance_ = 2) const { std::vector ret; + std::set seen; + // mapping -> + std::vector> matches(max_edit_distance_ + 1); if (!result_count_limit) - return ret; + goto done; - if (keys.isEmpty()) - return valuesAndSubvalues(result_count_limit); - - auto append = [&ret, result_count_limit](std::vector &&in) { - for (auto &&v : in) { - if (ret.size() >= result_count_limit) - return; - - if (std::find(ret.begin(), ret.end(), v) == ret.end()) { - ret.push_back(std::move(v)); - } + if (keys.isEmpty()) { + for (size_t i = 0; i < result_count_limit; i++) { + ret.push_back(entries[i].value); } - }; - - auto limit = [&ret, result_count_limit] { - return std::min(result_count_limit, (result_count_limit - ret.size()) * 2); - }; - - // Try first exact matches, then with maximum errors - for (size_t max_edit_distance = 0; - max_edit_distance <= max_edit_distance_ && ret.size() < result_count_limit; - max_edit_distance += 1) { - if (max_edit_distance && ret.size() < result_count_limit) { - max_edit_distance -= 1; - - // swap chars case - if (keys.size() >= 2) { - auto t = this; - for (int i = 1; i >= 0; i--) { - if (auto e = t->next.find(keys[i]); e != t->next.end()) { - t = &e->second; - } else { - t = nullptr; - break; - } - } - - if (t) { - append(t->search(keys.mid(2), limit(), max_edit_distance)); - } - } - - // insert case - for (const auto &[k, t] : this->next) { - if (k == keys[0]) - continue; - if (ret.size() >= limit()) - break; - - // insert - append(t.search(keys, limit(), max_edit_distance)); - } - - // delete character case - append(this->search(keys.mid(1), limit(), max_edit_distance)); - - // substitute case - for (const auto &[k, t] : this->next) { - if (k == keys[0]) - continue; - if (ret.size() >= limit()) - break; + goto done; + } - // substitute - append(t.search(keys.mid(1), limit(), max_edit_distance)); - } + // search for perfect infixes and order them early. this is an imperfect heuristic, + // but Works For Us™ + for (const auto& entry : entries) { + if (std::search(entry.key.begin(), entry.key.end(), keys.begin(), keys.end()) + != entry.key.end()) { + matches.at(0).insert(entry.value); + } + } - max_edit_distance += 1; + // find all matches. we truncate the entry we're matching with to it's longer than + // the search key + edit distance to allow prefix searches + for (const auto& entry : entries) { + auto truncated_key = entry.key; + if (truncated_key.size() > keys.count() + max_edit_distance_) { + truncated_key.resize(keys.count() + max_edit_distance_); + } + const auto distance = levenshteinDistance(keys, truncated_key); + if (distance <= max_edit_distance_) { + matches.at(distance).insert(entry.value); } + } - if (auto e = this->next.find(keys[0]); e != this->next.end()) { - append(e->second.search(keys.mid(1), limit(), max_edit_distance)); + for (const auto& match_set : matches) { + for (const auto match : match_set) { + if (seen.insert(match).second) { + ret.push_back(match); + if (ret.size() >= result_count_limit) + goto done; + } } } + done: + // ugly hack because nheko currently sometimes just hangs if the completion list + // empties and refills multiple times during typing. shows as `(undefined)`, which + // is good enough. + if (ret.empty()) + ret.push_back(-1); return ret; } };