Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ranked match: prefer when each path component matches within a path component #5033

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/memory.hh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ enum class MemoryDomain
Events,
Completion,
Regex,
RankedMatch,
Count
};

Expand Down Expand Up @@ -68,6 +69,7 @@ inline const char* domain_name(MemoryDomain domain)
case MemoryDomain::Events: return "Events";
case MemoryDomain::Completion: return "Completion";
case MemoryDomain::Regex: return "Regex";
case MemoryDomain::RankedMatch: return "RankedMatch";
case MemoryDomain::Count: break;
}
kak_assert(false);
Expand Down
45 changes: 45 additions & 0 deletions src/ranges.hh
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,51 @@ ConcatView<DecayRange<Range1>, DecayRange<Range2>> concatenated(Range1&& range1,
return {range1, range2};
}

template<typename Range1, typename Range2>
struct ZipView
{
using RangeIt1 = decltype(std::declval<Range1>().begin());
using RangeIt2 = decltype(std::declval<Range2>().begin());
using ValueType = typename std::tuple<
const typename std::iterator_traits<RangeIt1>::value_type*,
const typename std::iterator_traits<RangeIt2>::value_type*>;

struct Iterator
{
using difference_type = ptrdiff_t;
using value_type = ValueType;
using pointer = ValueType*;
using reference = ValueType&;
using iterator_category = std::forward_iterator_tag;

Iterator(RangeIt1 it1, RangeIt2 it2)
: m_it1(std::move(it1)), m_it2(std::move(it2)) {}

ValueType operator*() { return std::make_tuple(&*m_it1, &*m_it2); }
Iterator& operator++() { ++m_it1; ++m_it2; return *this; }
Iterator operator++(int) { auto copy = *this; ++*this; return copy; }

friend bool operator==(const Iterator& lhs, const Iterator& rhs)
{ return lhs.m_it1 == rhs.m_it1 or lhs.m_it2 == rhs.m_it2; }

private:
RangeIt1 m_it1;
RangeIt2 m_it2;
};

Iterator begin() const { return {m_range1.begin(), m_range2.begin()}; }
Iterator end() const { return {m_range1.end(), m_range2.end()}; }

Range1 m_range1;
Range2 m_range2;
};

template<typename Range1, typename Range2>
ZipView<DecayRange<Range1>, DecayRange<Range2>> zip(Range1&& range1, Range2&& range2)
{
return {range1, range2};
}

template<typename Range, typename T>
auto find(Range&& range, const T& value)
{
Expand Down
52 changes: 52 additions & 0 deletions src/ranked_match.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,39 @@ RankedMatch::RankedMatch(StringView candidate, StringView query, TestFunc func)
if (not res)
return;

if (contains(query, '/'))
{
auto c_component_begin = candidate.begin();
auto c_component_end = find(candidate, '/');;
for (StringView query_component : query | split('/') |
transform([](auto t) { auto &[begin, end] = t; return StringView{begin, end}; }))
{
if (c_component_end == candidate.end())
{
m_path_component_matches.clear();
break;
}
bool match = false;
while (not match)
{
StringView c_component{c_component_begin, c_component_end};
RankedMatch component_match{c_component, query_component};
match = (bool)component_match;
if (match)
m_path_component_matches.push_back(std::move(component_match));
if (c_component_end == candidate.end())
break;
c_component_begin = c_component_end + 1;
c_component_end = std::find(c_component_begin, candidate.end(), '/');
}
if (not match)
{
m_path_component_matches.clear();
break;
}
}
}

m_candidate = candidate;
m_matches = true;
m_max_index = res->max_index;
Expand Down Expand Up @@ -199,6 +232,13 @@ bool RankedMatch::operator<(const RankedMatch& other) const
if (diff != Flags::None)
return (int)(m_flags & diff) > (int)(other.m_flags & diff);

if (m_path_component_matches.empty() ^ other.m_path_component_matches.empty())
return m_path_component_matches.size() > other.m_path_component_matches.size();
kak_assert(m_path_component_matches.size() == other.m_path_component_matches.size());
for (auto [lhs, rhs] : zip(m_path_component_matches, other.m_path_component_matches))
if (*lhs < *rhs)
return true;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably if we use the same recursive-matching for the basename (even if the query contains no slash) then we can make @raiguard's test work:

kak_assert(preferred("luaremote", "src/script/LuaRemote.cpp", "tests/TestLuaRemote.cpp"));

because luaremote is a prefix (or is it?) of the basename so the RankedMatch will win


// If we are SingleWord, we dont want to take word boundaries from other
// words into account.
if (not (m_flags & (Flags::Prefix | Flags::SingleWord)) and
Expand Down Expand Up @@ -282,6 +322,18 @@ UnitTest test_ranked_match{[] {
kak_assert(preferred("fb", "foo_bar/", "foo.bar"));
kak_assert(preferred("foo_bar", "test_foo_bar", "foo_test_bar"));
kak_assert(preferred("rm.cc", "src/ranked_match.cc", "test/README.asciidoc"));
kak_assert(preferred("CAPRAN", "CAPABILITY_RANGE_FORMATTING", "CAPABILITY_SELECTION_RANGE"));
kak_assert(preferred("mal", "malt", "formal"));
kak_assert(preferred("cne", "cargo-next-error", "comment-line"));
kak_assert(preferred("cne", "cargo-next-error", "ccls-navigate"));
kak_assert(preferred("cpe", "cargo-previous-error", "cpp-alternative-file"));
kak_assert(preferred("server_", "server_capabilities", "SERVER_CANCELLED"));
kak_assert(preferred("server_", "server_capabilities_capabilities", "SERVER_CANCELLED"));
kak_assert(preferred("codegen", "clang/test/CodeGen/asm.c", "clang/test/ASTMerge/codegen-body/test.c"));
kak_assert(preferred("cho", "tchou kanaky", "tachou kanay")); // Prefer the leftmost match.
kak_assert(preferred("clangd", "clang-tools-extra/clangd/README.md", "clang/docs/conf.py"));
kak_assert(preferred("lang/haystack/needle.c", "git.evilcorp.com/language/haystack/aaa/needle.c", "git.evilcorp.com/aaa/ng/wrong-haystack/needle.cpp"));
kak_assert(preferred("evilcorp-lint/bar.go", "scripts/evilcorp-lint/foo/bar.go", "src/evilcorp-client/foo/bar.go"));
}};

UnitTest test_used_letters{[]()
Expand Down
2 changes: 2 additions & 0 deletions src/ranked_match.hh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "string.hh"
#include "meta.hh"
#include "vector.hh"

#include <cstdint>

Expand Down Expand Up @@ -52,6 +53,7 @@ private:
StringView m_candidate{};
bool m_matches = false;
Flags m_flags = Flags::None;
Vector<RankedMatch, MemoryDomain::RankedMatch> m_path_component_matches;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If these semantics are okay we could get rid of this allocation by computing the path component matches lazily in RankedMatch::operator<. I think this could work fine because we use a heap to get the top 100 or so completions, so we never need to sort everything; the number of calls to RankedMatch::operator< is linear.
I'd need to check.

int m_word_boundary_match_count = 0;
int m_max_index = 0;
};
Expand Down