Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F30995
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
13 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/cpp/fts_fuzzy_match.hpp b/cpp/fts_fuzzy_match.hpp
new file mode 100644
index 0000000..9b77fe3
--- /dev/null
+++ b/cpp/fts_fuzzy_match.hpp
@@ -0,0 +1,212 @@
+// LICENSE
+//
+// This software is dual-licensed to the public domain and under the following
+// license: you are granted a perpetual, irrevocable license to copy, modify,
+// publish, and distribute this file as you see fit.
+//
+// VERSION
+// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
+// 0.1.0 (2016-03-28) Initial release
+//
+// AUTHOR
+// Forrest Smith
+//
+// NOTES
+// Compiling
+// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
+//
+// fuzzy_match_simple(...)
+// Returns true if each character in pattern is found sequentially within str
+//
+// fuzzy_match(...)
+// Returns true if pattern is found AND calculates a score.
+// Performs exhaustive search via recursion to find all possible matches and match with highest score.
+// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
+// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
+// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
+// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
+
+#ifndef FTS_FUZZY_MATCH_H
+#define FTS_FUZZY_MATCH_H
+
+#include <ctype.h> // ::tolower, ::toupper
+
+#include <cstdint> // uint8_t
+#include <cstdio>
+#include <cstring> // memcpy
+#include <iostream>
+
+// Public interface
+namespace fts {
+static bool fuzzy_match_simple(char const* pattern, char const* str);
+static bool fuzzy_match(char const* pattern, char const* str, int& outScore);
+static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches);
+} // namespace fts
+
+#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
+namespace fts {
+
+// Forward declarations for "private" implementation
+namespace fuzzy_internal {
+static bool fuzzy_match_recursive(const char* pattern, const char* str, int& outScore, const char* strBegin,
+ uint8_t const* srcMatches, uint8_t* newMatches, int maxMatches, int nextMatch,
+ int& recursionCount, int recursionLimit);
+}
+
+// Public interface
+static bool fuzzy_match_simple(char const* pattern, char const* str) {
+ while (*pattern != '\0' && *str != '\0') {
+ if (tolower(*pattern) == tolower(*str))
+ ++pattern;
+ ++str;
+ }
+
+ return *pattern == '\0' ? true : false;
+}
+
+static bool fuzzy_match(char const* pattern, char const* str, int& outScore) {
+ uint8_t matches[256];
+ return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
+}
+
+static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches) {
+ int recursionCount = 0;
+ int recursionLimit = 10;
+
+ return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit);
+}
+
+// Private implementation
+static bool fuzzy_internal::fuzzy_match_recursive(const char* pattern, const char* str, int& outScore,
+ const char* strBegin, uint8_t const* srcMatches, uint8_t* matches, int maxMatches,
+ int nextMatch, int& recursionCount, int recursionLimit) {
+ // Count recursions
+ ++recursionCount;
+ if (recursionCount >= recursionLimit)
+ return false;
+
+ // Detect end of strings
+ if (*pattern == '\0' || *str == '\0')
+ return false;
+
+ // Recursion params
+ bool recursiveMatch = false;
+ uint8_t bestRecursiveMatches[256];
+ int bestRecursiveScore = 0;
+
+ // Loop through pattern and str looking for a match
+ bool first_match = true;
+ while (*pattern != '\0' && *str != '\0') {
+ // Found match. We assume a space in the pattern is match so we can match paths better
+ if (tolower(*pattern) == tolower(*str) || *pattern == ' ') {
+ // Supplied matches buffer was too short
+ if (nextMatch >= maxMatches) {
+ return false;
+ }
+
+ // "Copy-on-Write" srcMatches into matches
+ if (first_match && srcMatches) {
+ memcpy(matches, srcMatches, nextMatch);
+ first_match = false;
+ }
+
+ // Recursive call that "skips" this match
+ uint8_t recursiveMatches[256];
+ int recursiveScore;
+ if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
+ // Pick best recursive score
+ if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
+ memcpy(bestRecursiveMatches, recursiveMatches, 256);
+ bestRecursiveScore = recursiveScore;
+ }
+ recursiveMatch = true;
+ }
+
+ // Advance
+ matches[nextMatch++] = (uint8_t)(str - strBegin);
+ ++pattern;
+ }
+ ++str;
+ }
+
+ // Determine if full pattern was matched
+ bool matched = *pattern == '\0' ? true : false;
+
+ // Calculate score
+ if (matched) {
+ const int sequential_bonus = 15; // bonus for adjacent matches
+ const int separator_bonus = 30; // bonus if match occurs after a separator
+ const int camel_bonus = 30; // bonus if match is uppercase and prev is lower
+ const int first_letter_bonus = 15; // bonus if the first letter is matched
+
+ const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
+ const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
+ const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
+
+ // Iterate str to end
+ while (*str != '\0')
+ ++str;
+
+ // Initialize score
+ outScore = 100;
+
+ // Apply leading letter penalty
+ int penalty = leading_letter_penalty * matches[0];
+ if (penalty < max_leading_letter_penalty)
+ penalty = max_leading_letter_penalty;
+ outScore += penalty;
+
+ // Apply unmatched penalty
+ int unmatched = (int)(str - strBegin) - nextMatch;
+ outScore += unmatched_letter_penalty * unmatched;
+
+ // Apply ordering bonuses
+ for (int i = 0; i < nextMatch; ++i) {
+ uint8_t currIdx = matches[i];
+
+ if (i > 0) {
+ uint8_t prevIdx = matches[i - 1];
+
+ // Sequential
+ if (currIdx == (prevIdx + 1))
+ outScore += sequential_bonus;
+ }
+
+ // Check for bonuses based on neighbor character value
+ if (currIdx > 0) {
+ // Camel case
+ char neighbor = strBegin[currIdx - 1];
+ char curr = strBegin[currIdx];
+ if (::islower(neighbor) && ::isupper(curr))
+ outScore += camel_bonus;
+
+ // Separator
+ bool neighborSeparator = neighbor == '_' || neighbor == ' ' || neighbor == '/' || neighbor == '-';
+ if (neighborSeparator)
+ outScore += separator_bonus;
+ } else {
+ // First letter
+ outScore += first_letter_bonus;
+ }
+ }
+ }
+
+ // Return best result
+ if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
+ // Recursive score is better than "this"
+ memcpy(matches, bestRecursiveMatches, maxMatches);
+ outScore = bestRecursiveScore;
+ return true;
+ } else if (matched) {
+ // "this" score is better than recursive
+ return true;
+ } else {
+ // no match
+ return false;
+ }
+}
+} // namespace fts
+
+#endif // FTS_FUZZY_MATCH_IMPLEMENTATION
+
+#endif // FTS_FUZZY_MATCH_H
diff --git a/cpp/lib.cpp b/cpp/lib.cpp
index aadf00a..b97f478 100644
--- a/cpp/lib.cpp
+++ b/cpp/lib.cpp
@@ -1,41 +1,44 @@
#include <cstring>
#include <map>
#include <string>
#include <vector>
+#define FTS_FUZZY_MATCH_IMPLEMENTATION
#include "./file_scanner.hpp"
-#include "./fuzzy_match.hpp"
+#include "./fts_fuzzy_match.hpp"
#include "./match.hpp"
#include "./sorter.hpp"
namespace ivy {
static std::map<std::string, std::vector<std::string>> file_cache;
}; // namespace ivy
extern "C" void ivy_init(const char* dir) {
auto scanner = ivy::FileScanner(dir);
ivy::file_cache[std::string(dir)] = scanner.scan();
}
extern "C" int ivy_match(const char* pattern, const char* text) {
- auto matcher = ivy::FuzzyMatcher(pattern, 0);
- return matcher.match(text, false);
+ int score = 0;
+ fts::fuzzy_match(pattern, text, score);
+
+ return score;
}
extern "C" char* ivy_files(const char* search, const char* base_dir) {
if (!ivy::file_cache.count(base_dir)) {
auto scanner = ivy::FileScanner(base_dir);
ivy::file_cache[std::string(base_dir)] = scanner.scan();
}
auto sorter = ivy::Sorter(search);
// TODO(ade): Sort out how this memory is freed. I am assuming its in lua
// land via ffi
auto* s = new std::string();
for (ivy::Match const& match : sorter.sort(ivy::file_cache.at(base_dir))) {
s->append(match.content + "\n");
}
return s->data();
}
diff --git a/cpp/sorter.hpp b/cpp/sorter.hpp
index ae81bcb..02fa900 100644
--- a/cpp/sorter.hpp
+++ b/cpp/sorter.hpp
@@ -1,45 +1,46 @@
#pragma once
-#include "./fuzzy_match.hpp"
+#define FTS_FUZZY_MATCH_IMPLEMENTATION
+#include "./fts_fuzzy_match.hpp"
#include "./match.hpp"
#include "./thread_pool.hpp"
namespace ivy {
class Sorter {
ivy::ThreadPool m_thread_pool;
- std::string_view m_term;
+ std::string m_term;
std::mutex m_matches_lock;
std::vector<Match> m_matches;
inline void add_entry(const std::string& file) {
- ivy::FuzzyMatcher matcher(m_term, 0);
- int score = matcher.match(file, false);
+ int score = 0;
+ fts::fuzzy_match(m_term.c_str(), file.c_str(), score);
- if (score > -200) {
+ if (score > 50) {
std::unique_lock<std::mutex> lock(m_matches_lock);
m_matches.emplace_back(Match{score, std::move(file)});
}
}
public:
explicit Sorter(std::string_view term) : m_term(term) {}
~Sorter() { m_thread_pool.shutdown(); }
inline std::vector<Match> sort(const std::vector<std::string>& list) {
for (const std::string& item : list) {
m_thread_pool.push([&item, this]() { add_entry(item); });
}
while (!m_thread_pool.empty()) {
// Wait for all of the jobs to be finished
}
std::sort(m_matches.begin(), m_matches.end(), sort_match);
return m_matches;
}
};
} // namespace ivy
diff --git a/lua/ivy/libivy.lua b/lua/ivy/libivy.lua
index 5bc4a10..d6222fa 100644
--- a/lua/ivy/libivy.lua
+++ b/lua/ivy/libivy.lua
@@ -1,30 +1,29 @@
local library_path = (function()
local dirname = string.sub(debug.getinfo(1).source, 2, #"/fzf_lib.lua" * -1)
- -- return dirname .. "/../../build/Debug/lib/libivy.so"
return dirname .. "/../../build/Release/lib/libivy.so"
end)()
local ffi = require "ffi"
local ivy_c = ffi.load(library_path)
ffi.cdef [[
void ivy_init(const char*);
int ivy_match(const char*, const char*);
char* ivy_files(const char*, const char*);
]]
local libivy = {}
libivy.ivy_init = function(dir)
ivy_c.ivy_init(dir)
end
libivy.ivy_match = function(pattern, text)
return ivy_c.ivy_match(pattern, text)
end
libivy.ivy_files = function(pattern, base_dir)
return ffi.string(ivy_c.ivy_files(pattern, base_dir))
end
return libivy
diff --git a/lua/ivy/libivy_test.lua b/lua/ivy/libivy_test.lua
index 44159e5..28fb527 100644
--- a/lua/ivy/libivy_test.lua
+++ b/lua/ivy/libivy_test.lua
@@ -1,9 +1,9 @@
local libivy = require "ivy.libivy"
it("should run a simple match", function(t)
local score = libivy.ivy_match("term", "I am a serch term")
- if score > 0 then
- t.error "Score should not be grater than 0"
+ if score <= 0 then
+ t.error("Score should not be less than 0 found " .. score)
end
end)
diff --git a/lua/ivy/matcher_test.lua b/lua/ivy/matcher_test.lua
index 3b057f5..f80f565 100644
--- a/lua/ivy/matcher_test.lua
+++ b/lua/ivy/matcher_test.lua
@@ -1,30 +1,37 @@
local libivy = require "ivy.libivy"
-- Helper function to test a that string `one` has a higher match score than
-- string `two`. If string `one` has a lower score than string `two` a string
-- will be returned that can be used in body of an error. If not then `nil` is
-- returned and all is good.
local match_test = function(term, one, two)
local score_one = libivy.ivy_match(term, one)
local score_two = libivy.ivy_match(term, two)
if score_one < score_two then
return one .. " should be ranked higher than " .. two
end
return nil
end
it("sould match path separator", function(t)
local result = match_test("file", "some/file.lua", "somefile.lua")
if result then
t.error(result)
end
end)
+it("sould match pattern with spaces", function(t)
+ local result = match_test("so fi", "some/file.lua", "somefile.lua")
+ if result then
+ t.error(result)
+ end
+end)
+
it("sould match the start of a string", function(t)
local result = match_test("file", "file.lua", "somefile.lua")
if result then
t.error(result)
end
end)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Wed, Sep 10, 5:16 PM (9 h, 49 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
8977
Default Alt Text
(13 KB)
Attached To
Mode
R1 ivy.nvim
Attached
Detach File
Event Timeline
Log In to Comment