summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam McCall <sam.mccall@gmail.com>2018-10-04 14:08:11 +0000
committerSam McCall <sam.mccall@gmail.com>2018-10-04 14:08:11 +0000
commit2cbfd57b54c5aaea3c7f7fa38f95dc205e4beebb (patch)
tree492c2ff258294132276181d5168e87bbe36e6e09
parente99f435298d4982cc5620ac1f3415bfbdc7fcd26 (diff)
[clangd] Remove one-segment-skipping from Dex trigrams.
Summary: Currently queries like "ab" can match identifiers like a_yellow_bee. The value of allowing this for exactly one segment but no more seems dubious. It costs ~3% of overall ram (~9% of posting list ram) and some quality. Reviewers: ilya-biryukov, ioeric Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D52885
-rw-r--r--clang-tools-extra/clangd/index/dex/Trigram.cpp6
-rw-r--r--clang-tools-extra/clangd/index/dex/Trigram.h2
-rw-r--r--clang-tools-extra/unittests/clangd/DexTests.cpp12
3 files changed, 7 insertions, 13 deletions
diff --git a/clang-tools-extra/clangd/index/dex/Trigram.cpp b/clang-tools-extra/clangd/index/dex/Trigram.cpp
index eb5473ddd53..641c899ab84 100644
--- a/clang-tools-extra/clangd/index/dex/Trigram.cpp
+++ b/clang-tools-extra/clangd/index/dex/Trigram.cpp
@@ -36,17 +36,15 @@ std::vector<Token> generateIdentifierTrigrams(llvm::StringRef Identifier) {
//
// * Next Tail - next character from the same segment
// * Next Head - front character of the next segment
- // * Skip-1-Next Head - front character of the skip-1-next segment
//
// Next stores tuples of three indices in the presented order, if a variant is
// not available then 0 is stored.
std::vector<std::array<unsigned, 3>> Next(LowercaseIdentifier.size());
- unsigned NextTail = 0, NextHead = 0, NextNextHead = 0;
+ unsigned NextTail = 0, NextHead = 0;
for (int I = LowercaseIdentifier.size() - 1; I >= 0; --I) {
- Next[I] = {{NextTail, NextHead, NextNextHead}};
+ Next[I] = {{NextTail, NextHead}};
NextTail = Roles[I] == Tail ? I : 0;
if (Roles[I] == Head) {
- NextNextHead = NextHead;
NextHead = I;
}
}
diff --git a/clang-tools-extra/clangd/index/dex/Trigram.h b/clang-tools-extra/clangd/index/dex/Trigram.h
index d0fb474ffd7..adce9f42340 100644
--- a/clang-tools-extra/clangd/index/dex/Trigram.h
+++ b/clang-tools-extra/clangd/index/dex/Trigram.h
@@ -37,7 +37,7 @@ namespace dex {
///
/// The symbol's name is broken into segments, e.g. "FooBar" has two segments.
/// Trigrams can start at any character in the input. Then we can choose to move
-/// to the next character, move to the start of the next segment, or stop.
+/// to the next character or to the start of the next segment.
///
/// Short trigrams (length 1-2) are used for short queries. These are:
/// - prefixes of the identifier, of length 1 and 2
diff --git a/clang-tools-extra/unittests/clangd/DexTests.cpp b/clang-tools-extra/unittests/clangd/DexTests.cpp
index 29319303e51..d7223ab1904 100644
--- a/clang-tools-extra/unittests/clangd/DexTests.cpp
+++ b/clang-tools-extra/unittests/clangd/DexTests.cpp
@@ -381,8 +381,7 @@ TEST(DexTrigrams, IdentifierTrigrams) {
"cde", "def"}));
EXPECT_THAT(generateIdentifierTrigrams("a_b_c_d_e_"),
- trigramsAre({"a", "a_", "ab", "abc", "abd", "acd", "ace", "bcd",
- "bce", "bde", "cde"}));
+ trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
EXPECT_THAT(generateIdentifierTrigrams("unique_ptr"),
trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
@@ -396,14 +395,11 @@ TEST(DexTrigrams, IdentifierTrigrams) {
EXPECT_THAT(generateIdentifierTrigrams("IsOK"),
trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
- auto X = generateIdentifierTrigrams("abc_defGhij__klm");
EXPECT_THAT(
generateIdentifierTrigrams("abc_defGhij__klm"),
- trigramsAre({"a", "ab", "ad", "abc", "abd", "abg", "ade", "adg",
- "adk", "agh", "agk", "bcd", "bcg", "bde", "bdg", "bdk",
- "bgh", "bgk", "cde", "cdg", "cdk", "cgh", "cgk", "def",
- "deg", "dek", "dgh", "dgk", "dkl", "efg", "efk", "egh",
- "egk", "ekl", "fgh", "fgk", "fkl", "ghi", "ghk", "gkl",
+ trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "adg", "bcd",
+ "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
+ "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
"hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
}