1 /** 2 * Implements a fuzzy search algorithm, yielding the same results as what the 3 * fuzzy match algorithm in VSCode matches. This is basically a fuzzy `contains` 4 * / `canFind` method for strings and arrays. 5 * 6 * However this library does not offer any fuzzy match scoring. This 7 * functionality might be added in the future in new methods. The check-only 8 * methods are ideal if the result is intended to be passed into other systems 9 * that are responsible for display and sorting. (e.g. from DCD / serve-d into 10 * VSCode, IDEs or other editors) 11 * 12 * It is quite efficient, allocates no memory and works with character ranges as 13 * well as simple arrays. Pre-compiled string versions are available for reduced 14 * compilation time for simple string/wstring/dstring matches. 15 * 16 * This works by going through the search string character by character, on 17 * every matching character, the matcher string advances by one character. If 18 * the matcher string is completely checked, the fuzzy match returns true. 19 * 20 * Methods: 21 * - $(LREF fuzzyMatchesUni) - fuzzy contains method with unicode decoding 22 * - $(LREF fuzzyMatchesRaw) - fuzzy contains method on arbitrary arrays 23 */ 24 module fuzzymatch; 25 26 version (D_BetterC) 27 { 28 } 29 else 30 version = HasUnicodeFuzzymatch; 31 32 // import std.traits : isSomeString; 33 private enum bool shouldBeDecoded(T) = is(immutable T == immutable C[], C) && (is(C == char) || is(C == wchar)); 34 35 pragma(inline, true) 36 private bool empty(T)(scope const(T)[] s) @safe pure nothrow @nogc 37 { 38 return s.length == 0; 39 } 40 41 version (HasUnicodeFuzzymatch) 42 { 43 /** 44 * Checks if doesThis contains matchThis in a way that a fuzzy search would find 45 * it. 46 * 47 * Performs basic case-insensitivity checks. UTF decodes strings and wstrings, 48 * skipping invalid characters. Note that the case-insensitive version does not 49 * check for unicode sequences, such as German `ß` matching `ss`, but only by 50 * comparing single codepoints using their upper variant. 51 * 52 * To perform no UTF decoding, either call this method with dstrings (UTF32 53 * strings) or, if you checked that the string ONLY contains single code unit 54 * per user-conceived character, by using `.representation` and then 55 * $(LREF fuzzyMatchesRaw) - note that this method only works case-sensitive and 56 * won't perform any case-transformations! 57 * 58 * If you have strings, you can save compilation speed by using the pre-compiled 59 * method $(LREF fuzzyMatchesString), which accepts strings, wstring or dstrings. 60 * 61 * The $(LEF fuzzyMatchesStringCS) method is another pre-compiled version of 62 * this fuzzyMatchesUni function, but performs caseSensitive checks. 63 * 64 * See_Also: 65 * - $(LREF fuzzyMatchesRaw) - performs no unicode decoding, not usable with 66 * strings, but with representations. 67 */ 68 bool fuzzyMatchesUni(bool caseSensitive = false, R1, R2)(scope R1 doesThis, scope R2 matchThis) @safe pure nothrow @nogc 69 if (!(is(R1 == dstring) && is(R2 == dstring) && caseSensitive)) 70 { 71 import std.utf : byUTF, decode, UseReplacementDchar; 72 static if (!caseSensitive) 73 import std.uni : toUpper; 74 75 if (matchThis.empty) 76 return true; 77 78 size_t i = 0; 79 dchar next = matchThis.decode!(UseReplacementDchar.yes)(i); 80 const matchThisLength = matchThis.length; 81 static if (!caseSensitive) 82 next = next.toUpper; 83 foreach (c; doesThis.byUTF!dchar) 84 { 85 static if (!caseSensitive) 86 bool match = toUpper(c) == next; 87 else 88 bool match = c == next; 89 if (match) 90 { 91 if (i >= matchThisLength) 92 return true; 93 next = matchThis.decode!(UseReplacementDchar.yes)(i); 94 static if (!caseSensitive) 95 next = next.toUpper; 96 } 97 } 98 return false; 99 } 100 101 /// ditto 102 pragma(inline, true) 103 bool fuzzyMatchesUni(bool caseSensitive = false, R1, R2)(scope R1 doesThis, scope R2 matchThis) @safe pure nothrow @nogc 104 if (is(R1 == dstring) && is(R2 == dstring) && caseSensitive) 105 { 106 // fast code for simple dstring, dstring + case sensitive code path. 107 return fuzzyMatchesRaw(doesThis, matchThis); 108 } 109 110 /// ditto 111 bool fuzzyMatchesString(scope const(char)[] doesThis, scope const(char)[] matchThis) @safe pure nothrow @nogc 112 { 113 return fuzzyMatchesUni(doesThis, matchThis); 114 } 115 116 /// ditto 117 bool fuzzyMatchesString(scope const(wchar)[] doesThis, scope const(wchar)[] matchThis) @safe pure nothrow @nogc 118 { 119 return fuzzyMatchesUni(doesThis, matchThis); 120 } 121 122 /// ditto 123 bool fuzzyMatchesString(scope const(dchar)[] doesThis, scope const(dchar)[] matchThis) @safe pure nothrow @nogc 124 { 125 return fuzzyMatchesUni(doesThis, matchThis); 126 } 127 128 /// ditto 129 bool fuzzyMatchesStringCS(scope const(char)[] doesThis, scope const(char)[] matchThis) @safe pure nothrow @nogc 130 { 131 return fuzzyMatchesUni!true(doesThis, matchThis); 132 } 133 134 /// ditto 135 bool fuzzyMatchesStringCS(scope const(wchar)[] doesThis, scope const(wchar)[] matchThis) @safe pure nothrow @nogc 136 { 137 return fuzzyMatchesUni!true(doesThis, matchThis); 138 } 139 140 /// ditto 141 bool fuzzyMatchesStringCS(scope const(dchar)[] doesThis, scope const(dchar)[] matchThis) @safe pure nothrow @nogc 142 { 143 return fuzzyMatchesUni!true(doesThis, matchThis); 144 } 145 146 /// 147 @safe unittest 148 { 149 assert( "foo".fuzzyMatchesString("")); 150 assert( "foo".fuzzyMatchesString("Fo")); 151 assert(!"foo".fuzzyMatchesString("b")); 152 153 assert( "foo".fuzzyMatchesStringCS("")); 154 assert(!"foo".fuzzyMatchesStringCS("Fo")); 155 assert( "foo".fuzzyMatchesStringCS("fo")); 156 assert(!"foo".fuzzyMatchesStringCS("b")); 157 158 assert( "path/to/game.txt".fuzzyMatchesString("path/to/game.txt")); 159 assert( "path/to/game.txt".fuzzyMatchesString("path/to/game.")); 160 assert( "path/to/game.txt".fuzzyMatchesString("pathgametxt")); 161 assert( "path/to/game.txt".fuzzyMatchesString("ptg")); 162 assert(!"path/to/game.txt".fuzzyMatchesString("ptf")); 163 assert("path/to/game.txt".fuzzyMatchesString("game.txt")); 164 assert(!"path/to/game.txt".fuzzyMatchesString("work.txt")); 165 } 166 167 /// 168 @safe unittest 169 { 170 import std.path : chainPath; 171 assert( chainPath("path", "to", "game.txt").fuzzyMatchesUni("ptg"w)); 172 assert(!chainPath("path", "to", "game.txt").fuzzyMatchesUni("root"w)); 173 } 174 175 @safe unittest 176 { 177 assert("foo".fuzzyMatchesString("")); 178 assert("foo"w.fuzzyMatchesString(""w)); 179 assert("foo"d.fuzzyMatchesString(""d)); 180 assert("foo".fuzzyMatchesString("Fo")); 181 assert("foo"w.fuzzyMatchesString("Fo"w)); 182 assert("foo"d.fuzzyMatchesString("Fo"d)); 183 assert(!"foo".fuzzyMatchesString("b")); 184 assert(!"foo"w.fuzzyMatchesString("b"w)); 185 assert(!"foo"d.fuzzyMatchesString("b"d)); 186 187 assert("path/to/game.txt".fuzzyMatchesString("pathgametxt")); 188 assert("path/to/game.txt".fuzzyMatchesString("ptg")); 189 assert(!"path/to/game.txt".fuzzyMatchesString("ptf")); 190 assert("path/to/game.txt"w.fuzzyMatchesString("pathgametxt")); 191 assert("path/to/game.txt"w.fuzzyMatchesString("ptg")); 192 assert(!"path/to/game.txt"w.fuzzyMatchesString("ptf")); 193 assert("path/to/game.txt".fuzzyMatchesString("pathgametxt"w)); 194 assert("path/to/game.txt".fuzzyMatchesString("ptg"w)); 195 assert(!"path/to/game.txt".fuzzyMatchesString("ptf"w)); 196 197 assert("path/to/game.txt".fuzzyMatchesStringCS("pathgametxt")); 198 assert("path/to/game.txt".fuzzyMatchesStringCS("ptg")); 199 assert(!"path/to/game.txt".fuzzyMatchesStringCS("ptf")); 200 assert("path/to/game.txt"w.fuzzyMatchesStringCS("pathgametxt")); 201 assert("path/to/game.txt"w.fuzzyMatchesStringCS("ptg")); 202 assert(!"path/to/game.txt"w.fuzzyMatchesStringCS("ptf")); 203 assert("path/to/game.txt".fuzzyMatchesStringCS("pathgametxt"w)); 204 assert("path/to/game.txt".fuzzyMatchesStringCS("ptg"w)); 205 assert(!"path/to/game.txt".fuzzyMatchesStringCS("ptf"w)); 206 } 207 } 208 209 /** 210 * Works like $(LREF fuzzyMatchesUni), but does not do any UTF decoding, but 211 * rather just goes through the arrays element-by-element. 212 * 213 * This method works case-sensitive if dstrings are passed into it. 214 * 215 * This method has no dependency on the standard library and should work with 216 * betterC. 217 */ 218 bool fuzzyMatchesRaw(R1, R2)(scope const R1 doesThis, scope const R2 matchThis) @safe pure nothrow @nogc 219 if (!shouldBeDecoded!R1 && !shouldBeDecoded!R2) 220 { 221 if (matchThis.empty) 222 return true; 223 224 size_t i; 225 dchar next = matchThis[i++]; 226 const matchThisLength = matchThis.length; 227 foreach (c; doesThis) 228 { 229 if (c == next) 230 { 231 if (i >= matchThisLength) 232 return true; 233 next = matchThis[i++]; 234 } 235 } 236 return false; 237 } 238 239 /// 240 @safe unittest 241 { 242 assert( "foo"d.fuzzyMatchesRaw(""d)); 243 assert( "foo"d.fuzzyMatchesRaw("fo"d)); 244 assert(!"foo"d.fuzzyMatchesRaw("Fo"d)); 245 assert(!"foo"d.fuzzyMatchesRaw("b"d)); 246 247 assert( "path/to/game.txt"d.fuzzyMatchesRaw("pathgametxt"d)); 248 assert( "path/to/game.txt"d.fuzzyMatchesRaw("ptg"d)); 249 assert(!"path/to/game.txt"d.fuzzyMatchesRaw("ptf"d)); 250 251 assert([1, 2, 3, 4, 5].fuzzyMatchesRaw([1, 3, 5])); 252 assert(![1, 2, 3, 4, 5].fuzzyMatchesRaw([1, 5, 3])); 253 assert([1, 2, 3, 4, 5].fuzzyMatchesRaw([1, 5])); 254 assert([1, 2, 3, 4, 5].fuzzyMatchesRaw([5])); 255 assert(![1, 2, 3, 4, 5].fuzzyMatchesRaw([0])); 256 } 257