diff --git a/cpp/fts_fuzzy_match.hpp b/cpp/fts_fuzzy_match.hpp new file mode 100644 index 0000000..9b77fe3 --- /dev/null +++ b/cpp/fts_fuzzy_match.hpp @@ -0,0 +1,212 @@ +// LICENSE +// +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// VERSION +// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score +// 0.1.0 (2016-03-28) Initial release +// +// AUTHOR +// Forrest Smith +// +// NOTES +// Compiling +// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation. +// +// fuzzy_match_simple(...) +// Returns true if each character in pattern is found sequentially within str +// +// fuzzy_match(...) +// Returns true if pattern is found AND calculates a score. +// Performs exhaustive search via recursion to find all possible matches and match with highest score. +// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern. +// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") +// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters. +// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning. + +#ifndef FTS_FUZZY_MATCH_H +#define FTS_FUZZY_MATCH_H + +#include // ::tolower, ::toupper + +#include // uint8_t +#include +#include // memcpy +#include + +// Public interface +namespace fts { +static bool fuzzy_match_simple(char const* pattern, char const* str); +static bool fuzzy_match(char const* pattern, char const* str, int& outScore); +static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches); +} // namespace fts + +#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION +namespace fts { + +// Forward declarations for "private" implementation +namespace fuzzy_internal { +static bool fuzzy_match_recursive(const char* pattern, const char* str, int& outScore, const char* strBegin, + uint8_t const* srcMatches, uint8_t* newMatches, int maxMatches, int nextMatch, + int& recursionCount, int recursionLimit); +} + +// Public interface +static bool fuzzy_match_simple(char const* pattern, char const* str) { + while (*pattern != '\0' && *str != '\0') { + if (tolower(*pattern) == tolower(*str)) + ++pattern; + ++str; + } + + return *pattern == '\0' ? true : false; +} + +static bool fuzzy_match(char const* pattern, char const* str, int& outScore) { + uint8_t matches[256]; + return fuzzy_match(pattern, str, outScore, matches, sizeof(matches)); +} + +static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches) { + int recursionCount = 0; + int recursionLimit = 10; + + return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit); +} + +// Private implementation +static bool fuzzy_internal::fuzzy_match_recursive(const char* pattern, const char* str, int& outScore, + const char* strBegin, uint8_t const* srcMatches, uint8_t* matches, int maxMatches, + int nextMatch, int& recursionCount, int recursionLimit) { + // Count recursions + ++recursionCount; + if (recursionCount >= recursionLimit) + return false; + + // Detect end of strings + if (*pattern == '\0' || *str == '\0') + return false; + + // Recursion params + bool recursiveMatch = false; + uint8_t bestRecursiveMatches[256]; + int bestRecursiveScore = 0; + + // Loop through pattern and str looking for a match + bool first_match = true; + while (*pattern != '\0' && *str != '\0') { + // Found match. We assume a space in the pattern is match so we can match paths better + if (tolower(*pattern) == tolower(*str) || *pattern == ' ') { + // Supplied matches buffer was too short + if (nextMatch >= maxMatches) { + return false; + } + + // "Copy-on-Write" srcMatches into matches + if (first_match && srcMatches) { + memcpy(matches, srcMatches, nextMatch); + first_match = false; + } + + // Recursive call that "skips" this match + uint8_t recursiveMatches[256]; + int recursiveScore; + if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) { + // Pick best recursive score + if (!recursiveMatch || recursiveScore > bestRecursiveScore) { + memcpy(bestRecursiveMatches, recursiveMatches, 256); + bestRecursiveScore = recursiveScore; + } + recursiveMatch = true; + } + + // Advance + matches[nextMatch++] = (uint8_t)(str - strBegin); + ++pattern; + } + ++str; + } + + // Determine if full pattern was matched + bool matched = *pattern == '\0' ? true : false; + + // Calculate score + if (matched) { + const int sequential_bonus = 15; // bonus for adjacent matches + const int separator_bonus = 30; // bonus if match occurs after a separator + const int camel_bonus = 30; // bonus if match is uppercase and prev is lower + const int first_letter_bonus = 15; // bonus if the first letter is matched + + const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match + const int max_leading_letter_penalty = -15; // maximum penalty for leading letters + const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter + + // Iterate str to end + while (*str != '\0') + ++str; + + // Initialize score + outScore = 100; + + // Apply leading letter penalty + int penalty = leading_letter_penalty * matches[0]; + if (penalty < max_leading_letter_penalty) + penalty = max_leading_letter_penalty; + outScore += penalty; + + // Apply unmatched penalty + int unmatched = (int)(str - strBegin) - nextMatch; + outScore += unmatched_letter_penalty * unmatched; + + // Apply ordering bonuses + for (int i = 0; i < nextMatch; ++i) { + uint8_t currIdx = matches[i]; + + if (i > 0) { + uint8_t prevIdx = matches[i - 1]; + + // Sequential + if (currIdx == (prevIdx + 1)) + outScore += sequential_bonus; + } + + // Check for bonuses based on neighbor character value + if (currIdx > 0) { + // Camel case + char neighbor = strBegin[currIdx - 1]; + char curr = strBegin[currIdx]; + if (::islower(neighbor) && ::isupper(curr)) + outScore += camel_bonus; + + // Separator + bool neighborSeparator = neighbor == '_' || neighbor == ' ' || neighbor == '/' || neighbor == '-'; + if (neighborSeparator) + outScore += separator_bonus; + } else { + // First letter + outScore += first_letter_bonus; + } + } + } + + // Return best result + if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) { + // Recursive score is better than "this" + memcpy(matches, bestRecursiveMatches, maxMatches); + outScore = bestRecursiveScore; + return true; + } else if (matched) { + // "this" score is better than recursive + return true; + } else { + // no match + return false; + } +} +} // namespace fts + +#endif // FTS_FUZZY_MATCH_IMPLEMENTATION + +#endif // FTS_FUZZY_MATCH_H diff --git a/cpp/lib.cpp b/cpp/lib.cpp index aadf00a..b97f478 100644 --- a/cpp/lib.cpp +++ b/cpp/lib.cpp @@ -1,41 +1,44 @@ #include #include #include #include +#define FTS_FUZZY_MATCH_IMPLEMENTATION #include "./file_scanner.hpp" -#include "./fuzzy_match.hpp" +#include "./fts_fuzzy_match.hpp" #include "./match.hpp" #include "./sorter.hpp" namespace ivy { static std::map> file_cache; }; // namespace ivy extern "C" void ivy_init(const char* dir) { auto scanner = ivy::FileScanner(dir); ivy::file_cache[std::string(dir)] = scanner.scan(); } extern "C" int ivy_match(const char* pattern, const char* text) { - auto matcher = ivy::FuzzyMatcher(pattern, 0); - return matcher.match(text, false); + int score = 0; + fts::fuzzy_match(pattern, text, score); + + return score; } extern "C" char* ivy_files(const char* search, const char* base_dir) { if (!ivy::file_cache.count(base_dir)) { auto scanner = ivy::FileScanner(base_dir); ivy::file_cache[std::string(base_dir)] = scanner.scan(); } auto sorter = ivy::Sorter(search); // TODO(ade): Sort out how this memory is freed. I am assuming its in lua // land via ffi auto* s = new std::string(); for (ivy::Match const& match : sorter.sort(ivy::file_cache.at(base_dir))) { s->append(match.content + "\n"); } return s->data(); } diff --git a/cpp/sorter.hpp b/cpp/sorter.hpp index ae81bcb..02fa900 100644 --- a/cpp/sorter.hpp +++ b/cpp/sorter.hpp @@ -1,45 +1,46 @@ #pragma once -#include "./fuzzy_match.hpp" +#define FTS_FUZZY_MATCH_IMPLEMENTATION +#include "./fts_fuzzy_match.hpp" #include "./match.hpp" #include "./thread_pool.hpp" namespace ivy { class Sorter { ivy::ThreadPool m_thread_pool; - std::string_view m_term; + std::string m_term; std::mutex m_matches_lock; std::vector m_matches; inline void add_entry(const std::string& file) { - ivy::FuzzyMatcher matcher(m_term, 0); - int score = matcher.match(file, false); + int score = 0; + fts::fuzzy_match(m_term.c_str(), file.c_str(), score); - if (score > -200) { + if (score > 50) { std::unique_lock lock(m_matches_lock); m_matches.emplace_back(Match{score, std::move(file)}); } } public: explicit Sorter(std::string_view term) : m_term(term) {} ~Sorter() { m_thread_pool.shutdown(); } inline std::vector sort(const std::vector& list) { for (const std::string& item : list) { m_thread_pool.push([&item, this]() { add_entry(item); }); } while (!m_thread_pool.empty()) { // Wait for all of the jobs to be finished } std::sort(m_matches.begin(), m_matches.end(), sort_match); return m_matches; } }; } // namespace ivy diff --git a/lua/ivy/libivy.lua b/lua/ivy/libivy.lua index 5bc4a10..d6222fa 100644 --- a/lua/ivy/libivy.lua +++ b/lua/ivy/libivy.lua @@ -1,30 +1,29 @@ local library_path = (function() local dirname = string.sub(debug.getinfo(1).source, 2, #"/fzf_lib.lua" * -1) - -- return dirname .. "/../../build/Debug/lib/libivy.so" return dirname .. "/../../build/Release/lib/libivy.so" end)() local ffi = require "ffi" local ivy_c = ffi.load(library_path) ffi.cdef [[ void ivy_init(const char*); int ivy_match(const char*, const char*); char* ivy_files(const char*, const char*); ]] local libivy = {} libivy.ivy_init = function(dir) ivy_c.ivy_init(dir) end libivy.ivy_match = function(pattern, text) return ivy_c.ivy_match(pattern, text) end libivy.ivy_files = function(pattern, base_dir) return ffi.string(ivy_c.ivy_files(pattern, base_dir)) end return libivy diff --git a/lua/ivy/libivy_test.lua b/lua/ivy/libivy_test.lua index 44159e5..28fb527 100644 --- a/lua/ivy/libivy_test.lua +++ b/lua/ivy/libivy_test.lua @@ -1,9 +1,9 @@ local libivy = require "ivy.libivy" it("should run a simple match", function(t) local score = libivy.ivy_match("term", "I am a serch term") - if score > 0 then - t.error "Score should not be grater than 0" + if score <= 0 then + t.error("Score should not be less than 0 found " .. score) end end) diff --git a/lua/ivy/matcher_test.lua b/lua/ivy/matcher_test.lua index 3b057f5..f80f565 100644 --- a/lua/ivy/matcher_test.lua +++ b/lua/ivy/matcher_test.lua @@ -1,30 +1,37 @@ local libivy = require "ivy.libivy" -- Helper function to test a that string `one` has a higher match score than -- string `two`. If string `one` has a lower score than string `two` a string -- will be returned that can be used in body of an error. If not then `nil` is -- returned and all is good. local match_test = function(term, one, two) local score_one = libivy.ivy_match(term, one) local score_two = libivy.ivy_match(term, two) if score_one < score_two then return one .. " should be ranked higher than " .. two end return nil end it("sould match path separator", function(t) local result = match_test("file", "some/file.lua", "somefile.lua") if result then t.error(result) end end) +it("sould match pattern with spaces", function(t) + local result = match_test("so fi", "some/file.lua", "somefile.lua") + if result then + t.error(result) + end +end) + it("sould match the start of a string", function(t) local result = match_test("file", "file.lua", "somefile.lua") if result then t.error(result) end end)