dotfiles/nixos/overlays/fixup-nheko/nheko-search.patch
2024-11-07 16:25:41 -07:00

277 lines
10 KiB
Diff

commit 3fffaa1927db2bde91cde273377628c3b81430c8
Author: feathers <>
Date: Sun Oct 8 18:32:35 2023 +0200
search using levenshtein distance
diff --git a/resources/qml/MessageInput.qml b/resources/qml/MessageInput.qml
index 59b19d4d..298a5697 100644
--- a/resources/qml/MessageInput.qml
+++ b/resources/qml/MessageInput.qml
@@ -129,7 +129,7 @@ Rectangle {
completerTriggeredAt = pos;
completer.completerName = type;
popup.open();
- completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText);
+ completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText);
}
function positionCursorAtEnd() {
@@ -182,12 +182,12 @@ Rectangle {
popup.close();
if (popup.opened)
- completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText);
+ completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText);
}
onPreeditTextChanged: {
if (popup.opened)
- completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText);
+ completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText);
}
onSelectionStartChanged: room.input.updateState(selectionStart, selectionEnd, cursorPosition, text)
onSelectionEndChanged: room.input.updateState(selectionStart, selectionEnd, cursorPosition, text)
diff --git a/src/CompletionProxyModel.h b/src/CompletionProxyModel.h
index 4d9c9f0e..24767a2a 100644
--- a/src/CompletionProxyModel.h
+++ b/src/CompletionProxyModel.h
@@ -8,6 +8,8 @@
// Class for showing a limited amount of completions at a time
+#include <set>
+
#include <QAbstractProxyModel>
enum class ElementRank
@@ -19,140 +21,126 @@ enum class ElementRank
template<typename Key, typename Value>
struct trie
{
- std::vector<Value> values;
- std::map<Key, trie> next;
+ struct entry {
+ QVector<Key> key;
+ Value value;
+ };
+
+ std::vector<entry> entries;
template<ElementRank r>
void insert(const QVector<Key> &keys, const Value &v)
{
- auto t = this;
- for (const auto k : keys) {
- t = &t->next[k];
- }
-
if constexpr (r == ElementRank::first) {
- t->values.insert(t->values.begin(), v);
+ entries.emplace(entries.begin(), keys, v);
} else if constexpr (r == ElementRank::second) {
- t->values.push_back(v);
+ entries.emplace_back(keys, v);
}
}
- std::vector<Value> valuesAndSubvalues(size_t limit = -1) const
+ // mostly stolen from https://nasauber.de/blog/2019/levenshtein-distance-and-longest-common-subsequence-in-qt/
+ static size_t levenshteinDistance(const QVector<Key> &source, const QVector<Key> &target)
{
- std::vector<Value> ret;
- if (limit < 200)
- ret.reserve(limit);
-
- for (const auto &v : values) {
- if (ret.size() >= limit)
- return ret;
- else
- ret.push_back(v);
+ // Mostly stolen from https://qgis.org/api/2.14/qgsstringutils_8cpp_source.html
+
+ if (source == target) {
+ return 0;
}
- for (const auto &[k, t] : next) {
- (void)k;
- if (ret.size() >= limit)
- return ret;
- else {
- auto temp = t.valuesAndSubvalues(limit - ret.size());
- for (auto &&v : temp) {
- if (ret.size() >= limit)
- return ret;
-
- if (std::find(ret.begin(), ret.end(), v) == ret.end()) {
- ret.push_back(std::move(v));
- }
- }
+ const size_t sourceCount = source.count();
+ const size_t targetCount = target.count();
+
+ if (source.isEmpty()) {
+ return targetCount;
+ }
+
+ if (target.isEmpty()) {
+ return sourceCount;
+ }
+
+ if (sourceCount > targetCount) {
+ return levenshteinDistance(target, source);
+ }
+
+ QVector<size_t> column;
+ column.fill(0, targetCount + 1);
+ QVector<size_t> previousColumn;
+ previousColumn.reserve(targetCount + 1);
+ for (size_t i = 0; i < targetCount + 1; i++) {
+ previousColumn.append(i);
+ }
+
+ for (size_t i = 0; i < sourceCount; i++) {
+ column[0] = i + 1;
+ for (size_t j = 0; j < targetCount; j++) {
+ column[j + 1] = std::min({
+ 1 + column.at(j),
+ 1 + previousColumn.at(1 + j),
+ previousColumn.at(j) + ((source.at(i) == target.at(j)) ? 0 : 1)
+ });
}
+ column.swap(previousColumn);
}
- return ret;
+ return previousColumn.at(targetCount);
}
- std::vector<Value> search(const QVector<Key> &keys, //< TODO(Nico): replace this with a span
+ std::vector<Value> search(QVector<Key> keys, //< TODO(Nico): replace this with a span
size_t result_count_limit,
size_t max_edit_distance_ = 2) const
{
std::vector<Value> ret;
+ std::set<Value> seen;
+ // mapping <edit distance> -> <matching values>
+ std::vector<std::set<Value>> matches(max_edit_distance_ + 1);
if (!result_count_limit)
- return ret;
+ goto done;
- if (keys.isEmpty())
- return valuesAndSubvalues(result_count_limit);
-
- auto append = [&ret, result_count_limit](std::vector<Value> &&in) {
- for (auto &&v : in) {
- if (ret.size() >= result_count_limit)
- return;
-
- if (std::find(ret.begin(), ret.end(), v) == ret.end()) {
- ret.push_back(std::move(v));
- }
+ if (keys.isEmpty()) {
+ for (size_t i = 0; i < result_count_limit; i++) {
+ ret.push_back(entries[i].value);
}
- };
-
- auto limit = [&ret, result_count_limit] {
- return std::min(result_count_limit, (result_count_limit - ret.size()) * 2);
- };
-
- // Try first exact matches, then with maximum errors
- for (size_t max_edit_distance = 0;
- max_edit_distance <= max_edit_distance_ && ret.size() < result_count_limit;
- max_edit_distance += 1) {
- if (max_edit_distance && ret.size() < result_count_limit) {
- max_edit_distance -= 1;
-
- // swap chars case
- if (keys.size() >= 2) {
- auto t = this;
- for (int i = 1; i >= 0; i--) {
- if (auto e = t->next.find(keys[i]); e != t->next.end()) {
- t = &e->second;
- } else {
- t = nullptr;
- break;
- }
- }
-
- if (t) {
- append(t->search(keys.mid(2), limit(), max_edit_distance));
- }
- }
-
- // insert case
- for (const auto &[k, t] : this->next) {
- if (k == keys[0])
- continue;
- if (ret.size() >= limit())
- break;
-
- // insert
- append(t.search(keys, limit(), max_edit_distance));
- }
-
- // delete character case
- append(this->search(keys.mid(1), limit(), max_edit_distance));
-
- // substitute case
- for (const auto &[k, t] : this->next) {
- if (k == keys[0])
- continue;
- if (ret.size() >= limit())
- break;
+ goto done;
+ }
- // substitute
- append(t.search(keys.mid(1), limit(), max_edit_distance));
- }
+ // search for perfect infixes and order them early. this is an imperfect heuristic,
+ // but Works For Us™
+ for (const auto& entry : entries) {
+ if (std::search(entry.key.begin(), entry.key.end(), keys.begin(), keys.end())
+ != entry.key.end()) {
+ matches.at(0).insert(entry.value);
+ }
+ }
- max_edit_distance += 1;
+ // find all matches. we truncate the entry we're matching with to it's longer than
+ // the search key + edit distance to allow prefix searches
+ for (const auto& entry : entries) {
+ auto truncated_key = entry.key;
+ if (truncated_key.size() > keys.count() + max_edit_distance_) {
+ truncated_key.resize(keys.count() + max_edit_distance_);
+ }
+ const auto distance = levenshteinDistance(keys, truncated_key);
+ if (distance <= max_edit_distance_) {
+ matches.at(distance).insert(entry.value);
}
+ }
- if (auto e = this->next.find(keys[0]); e != this->next.end()) {
- append(e->second.search(keys.mid(1), limit(), max_edit_distance));
+ for (const auto& match_set : matches) {
+ for (const auto match : match_set) {
+ if (seen.insert(match).second) {
+ ret.push_back(match);
+ if (ret.size() >= result_count_limit)
+ goto done;
+ }
}
}
+ done:
+ // ugly hack because nheko currently sometimes just hangs if the completion list
+ // empties and refills multiple times during typing. shows as `(undefined)`, which
+ // is good enough.
+ if (ret.empty())
+ ret.push_back(-1);
return ret;
}
};