Page MenuHomePhorge

No OneTemporary

diff --git a/cpp/fts_fuzzy_match.hpp b/cpp/fts_fuzzy_match.hpp
new file mode 100644
index 0000000..9b77fe3
--- /dev/null
+++ b/cpp/fts_fuzzy_match.hpp
@@ -0,0 +1,212 @@
+// LICENSE
+//
+// This software is dual-licensed to the public domain and under the following
+// license: you are granted a perpetual, irrevocable license to copy, modify,
+// publish, and distribute this file as you see fit.
+//
+// VERSION
+// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
+// 0.1.0 (2016-03-28) Initial release
+//
+// AUTHOR
+// Forrest Smith
+//
+// NOTES
+// Compiling
+// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
+//
+// fuzzy_match_simple(...)
+// Returns true if each character in pattern is found sequentially within str
+//
+// fuzzy_match(...)
+// Returns true if pattern is found AND calculates a score.
+// Performs exhaustive search via recursion to find all possible matches and match with highest score.
+// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
+// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
+// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
+// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
+
+#ifndef FTS_FUZZY_MATCH_H
+#define FTS_FUZZY_MATCH_H
+
+#include <ctype.h> // ::tolower, ::toupper
+
+#include <cstdint> // uint8_t
+#include <cstdio>
+#include <cstring> // memcpy
+#include <iostream>
+
+// Public interface
+namespace fts {
+static bool fuzzy_match_simple(char const* pattern, char const* str);
+static bool fuzzy_match(char const* pattern, char const* str, int& outScore);
+static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches);
+} // namespace fts
+
+#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
+namespace fts {
+
+// Forward declarations for "private" implementation
+namespace fuzzy_internal {
+static bool fuzzy_match_recursive(const char* pattern, const char* str, int& outScore, const char* strBegin,
+ uint8_t const* srcMatches, uint8_t* newMatches, int maxMatches, int nextMatch,
+ int& recursionCount, int recursionLimit);
+}
+
+// Public interface
+static bool fuzzy_match_simple(char const* pattern, char const* str) {
+ while (*pattern != '\0' && *str != '\0') {
+ if (tolower(*pattern) == tolower(*str))
+ ++pattern;
+ ++str;
+ }
+
+ return *pattern == '\0' ? true : false;
+}
+
+static bool fuzzy_match(char const* pattern, char const* str, int& outScore) {
+ uint8_t matches[256];
+ return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
+}
+
+static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches) {
+ int recursionCount = 0;
+ int recursionLimit = 10;
+
+ return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit);
+}
+
+// Private implementation
+static bool fuzzy_internal::fuzzy_match_recursive(const char* pattern, const char* str, int& outScore,
+ const char* strBegin, uint8_t const* srcMatches, uint8_t* matches, int maxMatches,
+ int nextMatch, int& recursionCount, int recursionLimit) {
+ // Count recursions
+ ++recursionCount;
+ if (recursionCount >= recursionLimit)
+ return false;
+
+ // Detect end of strings
+ if (*pattern == '\0' || *str == '\0')
+ return false;
+
+ // Recursion params
+ bool recursiveMatch = false;
+ uint8_t bestRecursiveMatches[256];
+ int bestRecursiveScore = 0;
+
+ // Loop through pattern and str looking for a match
+ bool first_match = true;
+ while (*pattern != '\0' && *str != '\0') {
+ // Found match. We assume a space in the pattern is match so we can match paths better
+ if (tolower(*pattern) == tolower(*str) || *pattern == ' ') {
+ // Supplied matches buffer was too short
+ if (nextMatch >= maxMatches) {
+ return false;
+ }
+
+ // "Copy-on-Write" srcMatches into matches
+ if (first_match && srcMatches) {
+ memcpy(matches, srcMatches, nextMatch);
+ first_match = false;
+ }
+
+ // Recursive call that "skips" this match
+ uint8_t recursiveMatches[256];
+ int recursiveScore;
+ if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
+ // Pick best recursive score
+ if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
+ memcpy(bestRecursiveMatches, recursiveMatches, 256);
+ bestRecursiveScore = recursiveScore;
+ }
+ recursiveMatch = true;
+ }
+
+ // Advance
+ matches[nextMatch++] = (uint8_t)(str - strBegin);
+ ++pattern;
+ }
+ ++str;
+ }
+
+ // Determine if full pattern was matched
+ bool matched = *pattern == '\0' ? true : false;
+
+ // Calculate score
+ if (matched) {
+ const int sequential_bonus = 15; // bonus for adjacent matches
+ const int separator_bonus = 30; // bonus if match occurs after a separator
+ const int camel_bonus = 30; // bonus if match is uppercase and prev is lower
+ const int first_letter_bonus = 15; // bonus if the first letter is matched
+
+ const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
+ const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
+ const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
+
+ // Iterate str to end
+ while (*str != '\0')
+ ++str;
+
+ // Initialize score
+ outScore = 100;
+
+ // Apply leading letter penalty
+ int penalty = leading_letter_penalty * matches[0];
+ if (penalty < max_leading_letter_penalty)
+ penalty = max_leading_letter_penalty;
+ outScore += penalty;
+
+ // Apply unmatched penalty
+ int unmatched = (int)(str - strBegin) - nextMatch;
+ outScore += unmatched_letter_penalty * unmatched;
+
+ // Apply ordering bonuses
+ for (int i = 0; i < nextMatch; ++i) {
+ uint8_t currIdx = matches[i];
+
+ if (i > 0) {
+ uint8_t prevIdx = matches[i - 1];
+
+ // Sequential
+ if (currIdx == (prevIdx + 1))
+ outScore += sequential_bonus;
+ }
+
+ // Check for bonuses based on neighbor character value
+ if (currIdx > 0) {
+ // Camel case
+ char neighbor = strBegin[currIdx - 1];
+ char curr = strBegin[currIdx];
+ if (::islower(neighbor) && ::isupper(curr))
+ outScore += camel_bonus;
+
+ // Separator
+ bool neighborSeparator = neighbor == '_' || neighbor == ' ' || neighbor == '/' || neighbor == '-';
+ if (neighborSeparator)
+ outScore += separator_bonus;
+ } else {
+ // First letter
+ outScore += first_letter_bonus;
+ }
+ }
+ }
+
+ // Return best result
+ if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
+ // Recursive score is better than "this"
+ memcpy(matches, bestRecursiveMatches, maxMatches);
+ outScore = bestRecursiveScore;
+ return true;
+ } else if (matched) {
+ // "this" score is better than recursive
+ return true;
+ } else {
+ // no match
+ return false;
+ }
+}
+} // namespace fts
+
+#endif // FTS_FUZZY_MATCH_IMPLEMENTATION
+
+#endif // FTS_FUZZY_MATCH_H
diff --git a/cpp/lib.cpp b/cpp/lib.cpp
index aadf00a..b97f478 100644
--- a/cpp/lib.cpp
+++ b/cpp/lib.cpp
@@ -1,41 +1,44 @@
#include <cstring>
#include <map>
#include <string>
#include <vector>
+#define FTS_FUZZY_MATCH_IMPLEMENTATION
#include "./file_scanner.hpp"
-#include "./fuzzy_match.hpp"
+#include "./fts_fuzzy_match.hpp"
#include "./match.hpp"
#include "./sorter.hpp"
namespace ivy {
static std::map<std::string, std::vector<std::string>> file_cache;
}; // namespace ivy
extern "C" void ivy_init(const char* dir) {
auto scanner = ivy::FileScanner(dir);
ivy::file_cache[std::string(dir)] = scanner.scan();
}
extern "C" int ivy_match(const char* pattern, const char* text) {
- auto matcher = ivy::FuzzyMatcher(pattern, 0);
- return matcher.match(text, false);
+ int score = 0;
+ fts::fuzzy_match(pattern, text, score);
+
+ return score;
}
extern "C" char* ivy_files(const char* search, const char* base_dir) {
if (!ivy::file_cache.count(base_dir)) {
auto scanner = ivy::FileScanner(base_dir);
ivy::file_cache[std::string(base_dir)] = scanner.scan();
}
auto sorter = ivy::Sorter(search);
// TODO(ade): Sort out how this memory is freed. I am assuming its in lua
// land via ffi
auto* s = new std::string();
for (ivy::Match const& match : sorter.sort(ivy::file_cache.at(base_dir))) {
s->append(match.content + "\n");
}
return s->data();
}
diff --git a/cpp/sorter.hpp b/cpp/sorter.hpp
index ae81bcb..02fa900 100644
--- a/cpp/sorter.hpp
+++ b/cpp/sorter.hpp
@@ -1,45 +1,46 @@
#pragma once
-#include "./fuzzy_match.hpp"
+#define FTS_FUZZY_MATCH_IMPLEMENTATION
+#include "./fts_fuzzy_match.hpp"
#include "./match.hpp"
#include "./thread_pool.hpp"
namespace ivy {
class Sorter {
ivy::ThreadPool m_thread_pool;
- std::string_view m_term;
+ std::string m_term;
std::mutex m_matches_lock;
std::vector<Match> m_matches;
inline void add_entry(const std::string& file) {
- ivy::FuzzyMatcher matcher(m_term, 0);
- int score = matcher.match(file, false);
+ int score = 0;
+ fts::fuzzy_match(m_term.c_str(), file.c_str(), score);
- if (score > -200) {
+ if (score > 50) {
std::unique_lock<std::mutex> lock(m_matches_lock);
m_matches.emplace_back(Match{score, std::move(file)});
}
}
public:
explicit Sorter(std::string_view term) : m_term(term) {}
~Sorter() { m_thread_pool.shutdown(); }
inline std::vector<Match> sort(const std::vector<std::string>& list) {
for (const std::string& item : list) {
m_thread_pool.push([&item, this]() { add_entry(item); });
}
while (!m_thread_pool.empty()) {
// Wait for all of the jobs to be finished
}
std::sort(m_matches.begin(), m_matches.end(), sort_match);
return m_matches;
}
};
} // namespace ivy
diff --git a/lua/ivy/libivy.lua b/lua/ivy/libivy.lua
index 5bc4a10..d6222fa 100644
--- a/lua/ivy/libivy.lua
+++ b/lua/ivy/libivy.lua
@@ -1,30 +1,29 @@
local library_path = (function()
local dirname = string.sub(debug.getinfo(1).source, 2, #"/fzf_lib.lua" * -1)
- -- return dirname .. "/../../build/Debug/lib/libivy.so"
return dirname .. "/../../build/Release/lib/libivy.so"
end)()
local ffi = require "ffi"
local ivy_c = ffi.load(library_path)
ffi.cdef [[
void ivy_init(const char*);
int ivy_match(const char*, const char*);
char* ivy_files(const char*, const char*);
]]
local libivy = {}
libivy.ivy_init = function(dir)
ivy_c.ivy_init(dir)
end
libivy.ivy_match = function(pattern, text)
return ivy_c.ivy_match(pattern, text)
end
libivy.ivy_files = function(pattern, base_dir)
return ffi.string(ivy_c.ivy_files(pattern, base_dir))
end
return libivy
diff --git a/lua/ivy/libivy_test.lua b/lua/ivy/libivy_test.lua
index 44159e5..28fb527 100644
--- a/lua/ivy/libivy_test.lua
+++ b/lua/ivy/libivy_test.lua
@@ -1,9 +1,9 @@
local libivy = require "ivy.libivy"
it("should run a simple match", function(t)
local score = libivy.ivy_match("term", "I am a serch term")
- if score > 0 then
- t.error "Score should not be grater than 0"
+ if score <= 0 then
+ t.error("Score should not be less than 0 found " .. score)
end
end)
diff --git a/lua/ivy/matcher_test.lua b/lua/ivy/matcher_test.lua
index 3b057f5..f80f565 100644
--- a/lua/ivy/matcher_test.lua
+++ b/lua/ivy/matcher_test.lua
@@ -1,30 +1,37 @@
local libivy = require "ivy.libivy"
-- Helper function to test a that string `one` has a higher match score than
-- string `two`. If string `one` has a lower score than string `two` a string
-- will be returned that can be used in body of an error. If not then `nil` is
-- returned and all is good.
local match_test = function(term, one, two)
local score_one = libivy.ivy_match(term, one)
local score_two = libivy.ivy_match(term, two)
if score_one < score_two then
return one .. " should be ranked higher than " .. two
end
return nil
end
it("sould match path separator", function(t)
local result = match_test("file", "some/file.lua", "somefile.lua")
if result then
t.error(result)
end
end)
+it("sould match pattern with spaces", function(t)
+ local result = match_test("so fi", "some/file.lua", "somefile.lua")
+ if result then
+ t.error(result)
+ end
+end)
+
it("sould match the start of a string", function(t)
local result = match_test("file", "file.lua", "somefile.lua")
if result then
t.error(result)
end
end)

File Metadata

Mime Type
text/x-diff
Expires
Wed, Sep 10, 5:16 PM (9 h, 49 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
8977
Default Alt Text
(13 KB)

Event Timeline