277 lines
10 KiB
Diff
277 lines
10 KiB
Diff
commit 3fffaa1927db2bde91cde273377628c3b81430c8
|
|
Author: feathers <>
|
|
Date: Sun Oct 8 18:32:35 2023 +0200
|
|
|
|
search using levenshtein distance
|
|
|
|
diff --git a/resources/qml/MessageInput.qml b/resources/qml/MessageInput.qml
|
|
index 59b19d4d..298a5697 100644
|
|
--- a/resources/qml/MessageInput.qml
|
|
+++ b/resources/qml/MessageInput.qml
|
|
@@ -129,7 +129,7 @@ Rectangle {
|
|
completerTriggeredAt = pos;
|
|
completer.completerName = type;
|
|
popup.open();
|
|
- completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText);
|
|
+ completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText);
|
|
}
|
|
|
|
function positionCursorAtEnd() {
|
|
@@ -182,12 +182,12 @@ Rectangle {
|
|
popup.close();
|
|
|
|
if (popup.opened)
|
|
- completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText);
|
|
+ completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText);
|
|
|
|
}
|
|
onPreeditTextChanged: {
|
|
if (popup.opened)
|
|
- completer.completer.setSearchString(messageInput.getText(completerTriggeredAt, cursorPosition)+messageInput.preeditText);
|
|
+ completer.completer.setSearchString(messageInput.getText(completerTriggeredAt+1, cursorPosition)+messageInput.preeditText);
|
|
}
|
|
onSelectionStartChanged: room.input.updateState(selectionStart, selectionEnd, cursorPosition, text)
|
|
onSelectionEndChanged: room.input.updateState(selectionStart, selectionEnd, cursorPosition, text)
|
|
diff --git a/src/CompletionProxyModel.h b/src/CompletionProxyModel.h
|
|
index 4d9c9f0e..24767a2a 100644
|
|
--- a/src/CompletionProxyModel.h
|
|
+++ b/src/CompletionProxyModel.h
|
|
@@ -8,6 +8,8 @@
|
|
|
|
// Class for showing a limited amount of completions at a time
|
|
|
|
+#include <set>
|
|
+
|
|
#include <QAbstractProxyModel>
|
|
|
|
enum class ElementRank
|
|
@@ -19,140 +21,126 @@ enum class ElementRank
|
|
template<typename Key, typename Value>
|
|
struct trie
|
|
{
|
|
- std::vector<Value> values;
|
|
- std::map<Key, trie> next;
|
|
+ struct entry {
|
|
+ QVector<Key> key;
|
|
+ Value value;
|
|
+ };
|
|
+
|
|
+ std::vector<entry> entries;
|
|
|
|
template<ElementRank r>
|
|
void insert(const QVector<Key> &keys, const Value &v)
|
|
{
|
|
- auto t = this;
|
|
- for (const auto k : keys) {
|
|
- t = &t->next[k];
|
|
- }
|
|
-
|
|
if constexpr (r == ElementRank::first) {
|
|
- t->values.insert(t->values.begin(), v);
|
|
+ entries.emplace(entries.begin(), keys, v);
|
|
} else if constexpr (r == ElementRank::second) {
|
|
- t->values.push_back(v);
|
|
+ entries.emplace_back(keys, v);
|
|
}
|
|
}
|
|
|
|
- std::vector<Value> valuesAndSubvalues(size_t limit = -1) const
|
|
+ // mostly stolen from https://nasauber.de/blog/2019/levenshtein-distance-and-longest-common-subsequence-in-qt/
|
|
+ static size_t levenshteinDistance(const QVector<Key> &source, const QVector<Key> &target)
|
|
{
|
|
- std::vector<Value> ret;
|
|
- if (limit < 200)
|
|
- ret.reserve(limit);
|
|
-
|
|
- for (const auto &v : values) {
|
|
- if (ret.size() >= limit)
|
|
- return ret;
|
|
- else
|
|
- ret.push_back(v);
|
|
+ // Mostly stolen from https://qgis.org/api/2.14/qgsstringutils_8cpp_source.html
|
|
+
|
|
+ if (source == target) {
|
|
+ return 0;
|
|
}
|
|
|
|
- for (const auto &[k, t] : next) {
|
|
- (void)k;
|
|
- if (ret.size() >= limit)
|
|
- return ret;
|
|
- else {
|
|
- auto temp = t.valuesAndSubvalues(limit - ret.size());
|
|
- for (auto &&v : temp) {
|
|
- if (ret.size() >= limit)
|
|
- return ret;
|
|
-
|
|
- if (std::find(ret.begin(), ret.end(), v) == ret.end()) {
|
|
- ret.push_back(std::move(v));
|
|
- }
|
|
- }
|
|
+ const size_t sourceCount = source.count();
|
|
+ const size_t targetCount = target.count();
|
|
+
|
|
+ if (source.isEmpty()) {
|
|
+ return targetCount;
|
|
+ }
|
|
+
|
|
+ if (target.isEmpty()) {
|
|
+ return sourceCount;
|
|
+ }
|
|
+
|
|
+ if (sourceCount > targetCount) {
|
|
+ return levenshteinDistance(target, source);
|
|
+ }
|
|
+
|
|
+ QVector<size_t> column;
|
|
+ column.fill(0, targetCount + 1);
|
|
+ QVector<size_t> previousColumn;
|
|
+ previousColumn.reserve(targetCount + 1);
|
|
+ for (size_t i = 0; i < targetCount + 1; i++) {
|
|
+ previousColumn.append(i);
|
|
+ }
|
|
+
|
|
+ for (size_t i = 0; i < sourceCount; i++) {
|
|
+ column[0] = i + 1;
|
|
+ for (size_t j = 0; j < targetCount; j++) {
|
|
+ column[j + 1] = std::min({
|
|
+ 1 + column.at(j),
|
|
+ 1 + previousColumn.at(1 + j),
|
|
+ previousColumn.at(j) + ((source.at(i) == target.at(j)) ? 0 : 1)
|
|
+ });
|
|
}
|
|
+ column.swap(previousColumn);
|
|
}
|
|
|
|
- return ret;
|
|
+ return previousColumn.at(targetCount);
|
|
}
|
|
|
|
- std::vector<Value> search(const QVector<Key> &keys, //< TODO(Nico): replace this with a span
|
|
+ std::vector<Value> search(QVector<Key> keys, //< TODO(Nico): replace this with a span
|
|
size_t result_count_limit,
|
|
size_t max_edit_distance_ = 2) const
|
|
{
|
|
std::vector<Value> ret;
|
|
+ std::set<Value> seen;
|
|
+ // mapping <edit distance> -> <matching values>
|
|
+ std::vector<std::set<Value>> matches(max_edit_distance_ + 1);
|
|
if (!result_count_limit)
|
|
- return ret;
|
|
+ goto done;
|
|
|
|
- if (keys.isEmpty())
|
|
- return valuesAndSubvalues(result_count_limit);
|
|
-
|
|
- auto append = [&ret, result_count_limit](std::vector<Value> &&in) {
|
|
- for (auto &&v : in) {
|
|
- if (ret.size() >= result_count_limit)
|
|
- return;
|
|
-
|
|
- if (std::find(ret.begin(), ret.end(), v) == ret.end()) {
|
|
- ret.push_back(std::move(v));
|
|
- }
|
|
+ if (keys.isEmpty()) {
|
|
+ for (size_t i = 0; i < result_count_limit; i++) {
|
|
+ ret.push_back(entries[i].value);
|
|
}
|
|
- };
|
|
-
|
|
- auto limit = [&ret, result_count_limit] {
|
|
- return std::min(result_count_limit, (result_count_limit - ret.size()) * 2);
|
|
- };
|
|
-
|
|
- // Try first exact matches, then with maximum errors
|
|
- for (size_t max_edit_distance = 0;
|
|
- max_edit_distance <= max_edit_distance_ && ret.size() < result_count_limit;
|
|
- max_edit_distance += 1) {
|
|
- if (max_edit_distance && ret.size() < result_count_limit) {
|
|
- max_edit_distance -= 1;
|
|
-
|
|
- // swap chars case
|
|
- if (keys.size() >= 2) {
|
|
- auto t = this;
|
|
- for (int i = 1; i >= 0; i--) {
|
|
- if (auto e = t->next.find(keys[i]); e != t->next.end()) {
|
|
- t = &e->second;
|
|
- } else {
|
|
- t = nullptr;
|
|
- break;
|
|
- }
|
|
- }
|
|
-
|
|
- if (t) {
|
|
- append(t->search(keys.mid(2), limit(), max_edit_distance));
|
|
- }
|
|
- }
|
|
-
|
|
- // insert case
|
|
- for (const auto &[k, t] : this->next) {
|
|
- if (k == keys[0])
|
|
- continue;
|
|
- if (ret.size() >= limit())
|
|
- break;
|
|
-
|
|
- // insert
|
|
- append(t.search(keys, limit(), max_edit_distance));
|
|
- }
|
|
-
|
|
- // delete character case
|
|
- append(this->search(keys.mid(1), limit(), max_edit_distance));
|
|
-
|
|
- // substitute case
|
|
- for (const auto &[k, t] : this->next) {
|
|
- if (k == keys[0])
|
|
- continue;
|
|
- if (ret.size() >= limit())
|
|
- break;
|
|
+ goto done;
|
|
+ }
|
|
|
|
- // substitute
|
|
- append(t.search(keys.mid(1), limit(), max_edit_distance));
|
|
- }
|
|
+ // search for perfect infixes and order them early. this is an imperfect heuristic,
|
|
+ // but Works For Us™
|
|
+ for (const auto& entry : entries) {
|
|
+ if (std::search(entry.key.begin(), entry.key.end(), keys.begin(), keys.end())
|
|
+ != entry.key.end()) {
|
|
+ matches.at(0).insert(entry.value);
|
|
+ }
|
|
+ }
|
|
|
|
- max_edit_distance += 1;
|
|
+ // find all matches. we truncate the entry we're matching with to it's longer than
|
|
+ // the search key + edit distance to allow prefix searches
|
|
+ for (const auto& entry : entries) {
|
|
+ auto truncated_key = entry.key;
|
|
+ if (truncated_key.size() > keys.count() + max_edit_distance_) {
|
|
+ truncated_key.resize(keys.count() + max_edit_distance_);
|
|
+ }
|
|
+ const auto distance = levenshteinDistance(keys, truncated_key);
|
|
+ if (distance <= max_edit_distance_) {
|
|
+ matches.at(distance).insert(entry.value);
|
|
}
|
|
+ }
|
|
|
|
- if (auto e = this->next.find(keys[0]); e != this->next.end()) {
|
|
- append(e->second.search(keys.mid(1), limit(), max_edit_distance));
|
|
+ for (const auto& match_set : matches) {
|
|
+ for (const auto match : match_set) {
|
|
+ if (seen.insert(match).second) {
|
|
+ ret.push_back(match);
|
|
+ if (ret.size() >= result_count_limit)
|
|
+ goto done;
|
|
+ }
|
|
}
|
|
}
|
|
|
|
+ done:
|
|
+ // ugly hack because nheko currently sometimes just hangs if the completion list
|
|
+ // empties and refills multiple times during typing. shows as `(undefined)`, which
|
|
+ // is good enough.
|
|
+ if (ret.empty())
|
|
+ ret.push_back(-1);
|
|
return ret;
|
|
}
|
|
};
|