Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F131758
fts_fuzzy_match.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
7 KB
Referenced Files
None
Subscribers
None
fts_fuzzy_match.hpp
View Options
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
//
// VERSION
// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
// 0.1.0 (2016-03-28) Initial release
//
// AUTHOR
// Forrest Smith
//
// NOTES
// Compiling
// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
//
// fuzzy_match_simple(...)
// Returns true if each character in pattern is found sequentially within str
//
// fuzzy_match(...)
// Returns true if pattern is found AND calculates a score.
// Performs exhaustive search via recursion to find all possible matches and match with highest score.
// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
#ifndef FTS_FUZZY_MATCH_H
#define FTS_FUZZY_MATCH_H
#include
<ctype.h>
// ::tolower, ::toupper
#include
<cstdint>
// uint8_t
#include
<cstdio>
#include
<cstring>
// memcpy
#include
<iostream>
// Public interface
namespace
fts
{
static
bool
fuzzy_match_simple
(
char
const
*
pattern
,
char
const
*
str
);
static
bool
fuzzy_match
(
char
const
*
pattern
,
char
const
*
str
,
int
&
outScore
);
static
bool
fuzzy_match
(
char
const
*
pattern
,
char
const
*
str
,
int
&
outScore
,
uint8_t
*
matches
,
int
maxMatches
);
}
// namespace fts
#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
namespace
fts
{
// Forward declarations for "private" implementation
namespace
fuzzy_internal
{
static
bool
fuzzy_match_recursive
(
const
char
*
pattern
,
const
char
*
str
,
int
&
outScore
,
const
char
*
strBegin
,
uint8_t
const
*
srcMatches
,
uint8_t
*
newMatches
,
int
maxMatches
,
int
nextMatch
,
int
&
recursionCount
,
int
recursionLimit
);
}
// Public interface
static
bool
fuzzy_match_simple
(
char
const
*
pattern
,
char
const
*
str
)
{
while
(
*
pattern
!=
'\0'
&&
*
str
!=
'\0'
)
{
if
(
tolower
(
*
pattern
)
==
tolower
(
*
str
))
++
pattern
;
++
str
;
}
return
*
pattern
==
'\0'
?
true
:
false
;
}
static
bool
fuzzy_match
(
char
const
*
pattern
,
char
const
*
str
,
int
&
outScore
)
{
uint8_t
matches
[
256
];
return
fuzzy_match
(
pattern
,
str
,
outScore
,
matches
,
sizeof
(
matches
));
}
static
bool
fuzzy_match
(
char
const
*
pattern
,
char
const
*
str
,
int
&
outScore
,
uint8_t
*
matches
,
int
maxMatches
)
{
int
recursionCount
=
0
;
int
recursionLimit
=
10
;
return
fuzzy_internal
::
fuzzy_match_recursive
(
pattern
,
str
,
outScore
,
str
,
nullptr
,
matches
,
maxMatches
,
0
,
recursionCount
,
recursionLimit
);
}
// Private implementation
static
bool
fuzzy_internal
::
fuzzy_match_recursive
(
const
char
*
pattern
,
const
char
*
str
,
int
&
outScore
,
const
char
*
strBegin
,
uint8_t
const
*
srcMatches
,
uint8_t
*
matches
,
int
maxMatches
,
int
nextMatch
,
int
&
recursionCount
,
int
recursionLimit
)
{
// Count recursions
++
recursionCount
;
if
(
recursionCount
>=
recursionLimit
)
return
false
;
// Detect end of strings
if
(
*
pattern
==
'\0'
||
*
str
==
'\0'
)
return
false
;
// Recursion params
bool
recursiveMatch
=
false
;
uint8_t
bestRecursiveMatches
[
256
];
int
bestRecursiveScore
=
0
;
// Loop through pattern and str looking for a match
bool
first_match
=
true
;
while
(
*
pattern
!=
'\0'
&&
*
str
!=
'\0'
)
{
// Found match. We assume a space in the pattern is match so we can match paths better
if
(
tolower
(
*
pattern
)
==
tolower
(
*
str
)
||
*
pattern
==
' '
)
{
// Supplied matches buffer was too short
if
(
nextMatch
>=
maxMatches
)
{
return
false
;
}
// "Copy-on-Write" srcMatches into matches
if
(
first_match
&&
srcMatches
)
{
memcpy
(
matches
,
srcMatches
,
nextMatch
);
first_match
=
false
;
}
// Recursive call that "skips" this match
uint8_t
recursiveMatches
[
256
];
int
recursiveScore
;
if
(
fuzzy_match_recursive
(
pattern
,
str
+
1
,
recursiveScore
,
strBegin
,
matches
,
recursiveMatches
,
sizeof
(
recursiveMatches
),
nextMatch
,
recursionCount
,
recursionLimit
))
{
// Pick best recursive score
if
(
!
recursiveMatch
||
recursiveScore
>
bestRecursiveScore
)
{
memcpy
(
bestRecursiveMatches
,
recursiveMatches
,
256
);
bestRecursiveScore
=
recursiveScore
;
}
recursiveMatch
=
true
;
}
// Advance
matches
[
nextMatch
++
]
=
(
uint8_t
)(
str
-
strBegin
);
++
pattern
;
}
++
str
;
}
// Determine if full pattern was matched
bool
matched
=
*
pattern
==
'\0'
?
true
:
false
;
// Calculate score
if
(
matched
)
{
const
int
sequential_bonus
=
15
;
// bonus for adjacent matches
const
int
separator_bonus
=
30
;
// bonus if match occurs after a separator
const
int
camel_bonus
=
30
;
// bonus if match is uppercase and prev is lower
const
int
first_letter_bonus
=
15
;
// bonus if the first letter is matched
const
int
leading_letter_penalty
=
-5
;
// penalty applied for every letter in str before the first match
const
int
max_leading_letter_penalty
=
-15
;
// maximum penalty for leading letters
const
int
unmatched_letter_penalty
=
-1
;
// penalty for every letter that doesn't matter
// Iterate str to end
while
(
*
str
!=
'\0'
)
++
str
;
// Initialize score
outScore
=
100
;
// Apply leading letter penalty
int
penalty
=
leading_letter_penalty
*
matches
[
0
];
if
(
penalty
<
max_leading_letter_penalty
)
penalty
=
max_leading_letter_penalty
;
outScore
+=
penalty
;
// Apply unmatched penalty
int
unmatched
=
(
int
)(
str
-
strBegin
)
-
nextMatch
;
outScore
+=
unmatched_letter_penalty
*
unmatched
;
// Apply ordering bonuses
for
(
int
i
=
0
;
i
<
nextMatch
;
++
i
)
{
uint8_t
currIdx
=
matches
[
i
];
if
(
i
>
0
)
{
uint8_t
prevIdx
=
matches
[
i
-
1
];
// Sequential
if
(
currIdx
==
(
prevIdx
+
1
))
outScore
+=
sequential_bonus
;
}
// Check for bonuses based on neighbor character value
if
(
currIdx
>
0
)
{
// Camel case
char
neighbor
=
strBegin
[
currIdx
-
1
];
char
curr
=
strBegin
[
currIdx
];
if
(
::
islower
(
neighbor
)
&&
::
isupper
(
curr
))
outScore
+=
camel_bonus
;
// Separator
bool
neighborSeparator
=
neighbor
==
'_'
||
neighbor
==
' '
||
neighbor
==
'/'
||
neighbor
==
'-'
;
if
(
neighborSeparator
)
outScore
+=
separator_bonus
;
}
else
{
// First letter
outScore
+=
first_letter_bonus
;
}
}
}
// Return best result
if
(
recursiveMatch
&&
(
!
matched
||
bestRecursiveScore
>
outScore
))
{
// Recursive score is better than "this"
memcpy
(
matches
,
bestRecursiveMatches
,
maxMatches
);
outScore
=
bestRecursiveScore
;
return
true
;
}
else
if
(
matched
)
{
// "this" score is better than recursive
return
true
;
}
else
{
// no match
return
false
;
}
}
}
// namespace fts
#endif
// FTS_FUZZY_MATCH_IMPLEMENTATION
#endif
// FTS_FUZZY_MATCH_H
File Metadata
Details
Attached
Mime Type
text/x-c++
Expires
Apr 6 2026, 6:24 PM (5 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15623
Default Alt Text
fts_fuzzy_match.hpp (7 KB)
Attached To
Mode
R1 ivy.nvim
Attached
Detach File
Event Timeline
Log In to Comment