dotfiles/nixos/overlays/fixup-nheko/nheko-search.patch

commit 3fffaa1927db2bde91cde273377628c3b81430c8
Author: feathers <>
Date:   Sun Oct 8 18:32:35 2023 +0200

    search using levenshtein distance

diff --git a/resources/qml/MessageInput.qml b/resources/qml/MessageInput.qml
index 59b19d4d..298a5697 100644
--- a/resources/qml/MessageInput.qml
+++ b/resources/qml/MessageInput.qml
@@ -129,7 +129,7 @@ Rectangle {
                     completerTriggeredAt = pos;
                     completer.completerName = type;
                     popup.open();
-                    completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText);
+                    completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText);
                 }

                 function positionCursorAtEnd() {
@@ -182,12 +182,12 @@ Rectangle {
                         popup.close();

                     if (popup.opened)
-                        completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText);
+                        completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText);

                 }
                 onPreeditTextChanged: {
                     if (popup.opened)
-                        completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText);
+                        completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText);
                 }
                 onSelectionStartChanged: room.input.updateState(selectionStart, selectionEnd, cursorPosition, text)
                 onSelectionEndChanged: room.input.updateState(selectionStart, selectionEnd, cursorPosition, text)
diff --git a/src/CompletionProxyModel.h b/src/CompletionProxyModel.h
index 4d9c9f0e..24767a2a 100644
--- a/src/CompletionProxyModel.h
+++ b/src/CompletionProxyModel.h
@@ -8,6 +8,8 @@

 // Class for showing a limited amount of completions at a time

+#include <set>
+
 #include <QAbstractProxyModel>

 enum class ElementRank
@@ -19,140 +21,126 @@ enum class ElementRank
 template<typename Key, typename Value>
 struct trie
 {
-    std::vector<Value> values;
-    std::map<Key, trie> next;
+    struct entry {
+        QVector<Key> key;
+        Value value;
+    };
+
+    std::vector<entry> entries;

     template<ElementRank r>
     void insert(const QVector<Key> &keys, const Value &v)
     {
-        auto t = this;
-        for (const auto k : keys) {
-            t = &t->next[k];
-        }
-
         if constexpr (r == ElementRank::first) {
-            t->values.insert(t->values.begin(), v);
+            entries.emplace(entries.begin(), keys, v);
         } else if constexpr (r == ElementRank::second) {
-            t->values.push_back(v);
+            entries.emplace_back(keys, v);
         }
     }

-    std::vector<Value> valuesAndSubvalues(size_t limit = -1) const
+    // mostly stolen from https://nasauber.de/blog/2019/levenshtein-distance-and-longest-common-subsequence-in-qt/
+    static size_t levenshteinDistance(const QVector<Key> &source, const QVector<Key> &target)
     {
-        std::vector<Value> ret;
-        if (limit < 200)
-            ret.reserve(limit);
-
-        for (const auto &v : values) {
-            if (ret.size() >= limit)
-                return ret;
-            else
-                ret.push_back(v);
+        // Mostly stolen from https://qgis.org/api/2.14/qgsstringutils_8cpp_source.html
+
+        if (source == target) {
+            return 0;
         }

-        for (const auto &[k, t] : next) {
-            (void)k;
-            if (ret.size() >= limit)
-                return ret;
-            else {
-                auto temp = t.valuesAndSubvalues(limit - ret.size());
-                for (auto &&v : temp) {
-                    if (ret.size() >= limit)
-                        return ret;
-
-                    if (std::find(ret.begin(), ret.end(), v) == ret.end()) {
-                        ret.push_back(std::move(v));
-                    }
-                }
+        const size_t sourceCount = source.count();
+        const size_t targetCount = target.count();
+
+        if (source.isEmpty()) {
+            return targetCount;
+        }
+
+        if (target.isEmpty()) {
+            return sourceCount;
+        }
+
+        if (sourceCount > targetCount) {
+            return levenshteinDistance(target, source);
+        }
+
+        QVector<size_t> column;
+        column.fill(0, targetCount + 1);
+        QVector<size_t> previousColumn;
+        previousColumn.reserve(targetCount + 1);
+        for (size_t i = 0; i < targetCount + 1; i++) {
+            previousColumn.append(i);
+        }
+
+        for (size_t i = 0; i < sourceCount; i++) {
+            column[0] = i + 1;
+            for (size_t j = 0; j < targetCount; j++) {
+                column[j + 1] = std::min({
+                        1 + column.at(j),
+                        1 + previousColumn.at(1 + j),
+                        previousColumn.at(j) + ((source.at(i) == target.at(j)) ? 0 : 1)
+                    });
             }
+            column.swap(previousColumn);
         }

-        return ret;
+        return previousColumn.at(targetCount);
     }

-    std::vector<Value> search(const QVector<Key> &keys, //< TODO(Nico): replace this with a span
+    std::vector<Value> search(QVector<Key> keys, //< TODO(Nico): replace this with a span
                               size_t result_count_limit,
                               size_t max_edit_distance_ = 2) const
     {
         std::vector<Value> ret;
+        std::set<Value> seen;
+        // mapping <edit distance> -> <matching values>
+        std::vector<std::set<Value>> matches(max_edit_distance_ + 1);
         if (!result_count_limit)
-            return ret;
+            goto done;

-        if (keys.isEmpty())
-            return valuesAndSubvalues(result_count_limit);
-
-        auto append = [&ret, result_count_limit](std::vector<Value> &&in) {
-            for (auto &&v : in) {
-                if (ret.size() >= result_count_limit)
-                    return;
-
-                if (std::find(ret.begin(), ret.end(), v) == ret.end()) {
-                    ret.push_back(std::move(v));
-                }
+        if (keys.isEmpty()) {
+            for (size_t i = 0; i < result_count_limit; i++) {
+                ret.push_back(entries[i].value);
             }
-        };
-
-        auto limit = [&ret, result_count_limit] {
-            return std::min(result_count_limit, (result_count_limit - ret.size()) * 2);
-        };
-
-        // Try first exact matches, then with maximum errors
-        for (size_t max_edit_distance = 0;
-             max_edit_distance <= max_edit_distance_ && ret.size() < result_count_limit;
-             max_edit_distance += 1) {
-            if (max_edit_distance && ret.size() < result_count_limit) {
-                max_edit_distance -= 1;
-
-                // swap chars case
-                if (keys.size() >= 2) {
-                    auto t = this;
-                    for (int i = 1; i >= 0; i--) {
-                        if (auto e = t->next.find(keys[i]); e != t->next.end()) {
-                            t = &e->second;
-                        } else {
-                            t = nullptr;
-                            break;
-                        }
-                    }
-
-                    if (t) {
-                        append(t->search(keys.mid(2), limit(), max_edit_distance));
-                    }
-                }
-
-                // insert case
-                for (const auto &[k, t] : this->next) {
-                    if (k == keys[0])
-                        continue;
-                    if (ret.size() >= limit())
-                        break;
-
-                    // insert
-                    append(t.search(keys, limit(), max_edit_distance));
-                }
-
-                // delete character case
-                append(this->search(keys.mid(1), limit(), max_edit_distance));
-
-                // substitute case
-                for (const auto &[k, t] : this->next) {
-                    if (k == keys[0])
-                        continue;
-                    if (ret.size() >= limit())
-                        break;
+            goto done;
+        }

-                    // substitute
-                    append(t.search(keys.mid(1), limit(), max_edit_distance));
-                }
+        // search for perfect infixes and order them early. this is an imperfect heuristic,
+        // but Works For Us™
+        for (const auto& entry : entries) {
+            if (std::search(entry.key.begin(), entry.key.end(), keys.begin(), keys.end())
+                != entry.key.end()) {
+                matches.at(0).insert(entry.value);
+            }
+        }

-                max_edit_distance += 1;
+        // find all matches. we truncate the entry we're matching with to it's longer than
+        // the search key + edit distance to allow prefix searches
+        for (const auto& entry : entries) {
+            auto truncated_key = entry.key;
+            if (truncated_key.size() > keys.count() + max_edit_distance_) {
+                truncated_key.resize(keys.count() + max_edit_distance_);
+            }
+            const auto distance = levenshteinDistance(keys, truncated_key);
+            if (distance <= max_edit_distance_) {
+                matches.at(distance).insert(entry.value);
             }
+        }

-            if (auto e = this->next.find(keys[0]); e != this->next.end()) {
-                append(e->second.search(keys.mid(1), limit(), max_edit_distance));
+        for (const auto& match_set : matches) {
+            for (const auto match : match_set) {
+                if (seen.insert(match).second) {
+                    ret.push_back(match);
+                    if (ret.size() >= result_count_limit)
+                        goto done;
+                }
             }
         }

+    done:
+        // ugly hack because nheko currently sometimes just hangs if the completion list
+        // empties and refills multiple times during typing. shows as `(undefined)`, which
+        // is good enough.
+        if (ret.empty())
+            ret.push_back(-1);
         return ret;
     }
 };