1 /**
2  * Implements a fuzzy search algorithm, yielding the same results as what the
3  * fuzzy match algorithm in VSCode matches. This is basically a fuzzy `contains`
4  * / `canFind` method for strings and arrays.
5  *
6  * However this library does not offer any fuzzy match scoring. This
7  * functionality might be added in the future in new methods. The check-only
8  * methods are ideal if the result is intended to be passed into other systems
9  * that are responsible for display and sorting. (e.g. from DCD / serve-d into
10  * VSCode, IDEs or other editors)
11  *
12  * It is quite efficient, allocates no memory and works with character ranges as
13  * well as simple arrays. Pre-compiled string versions are available for reduced
14  * compilation time for simple string/wstring/dstring matches.
15  *
16  * This works by going through the search string character by character, on
17  * every matching character, the matcher string advances by one character. If
18  * the matcher string is completely checked, the fuzzy match returns true.
19  *
20  * Methods:
21  * - $(LREF fuzzyMatchesUni) - fuzzy contains method with unicode decoding
22  * - $(LREF fuzzyMatchesRaw) - fuzzy contains method on arbitrary arrays
23  */
24 module fuzzymatch;
25 
26 version (D_BetterC)
27 {
28 }
29 else
30 	version = HasUnicodeFuzzymatch;
31 
32 // import std.traits : isSomeString;
33 private enum bool shouldBeDecoded(T) = is(immutable T == immutable C[], C) && (is(C == char) || is(C == wchar));
34 
35 pragma(inline, true)
36 private bool empty(T)(scope const(T)[] s) @safe pure nothrow @nogc
37 {
38 	return s.length == 0;
39 }
40 
41 version (HasUnicodeFuzzymatch)
42 {
43 	/**
44 	 * Checks if doesThis contains matchThis in a way that a fuzzy search would find
45 	 * it.
46 	 *
47 	 * Performs basic case-insensitivity checks. UTF decodes strings and wstrings,
48 	 * skipping invalid characters. Note that the case-insensitive version does not
49 	 * check for unicode sequences, such as German `ß` matching `ss`, but only by
50 	 * comparing single codepoints using their upper variant.
51 	 *
52 	 * To perform no UTF decoding, either call this method with dstrings (UTF32
53 	 * strings) or, if you checked that the string ONLY contains single code unit
54 	 * per user-conceived character, by using `.representation` and then
55 	 * $(LREF fuzzyMatchesRaw) - note that this method only works case-sensitive and
56 	 * won't perform any case-transformations!
57 	 *
58 	 * If you have strings, you can save compilation speed by using the pre-compiled
59 	 * method $(LREF fuzzyMatchesString), which accepts strings, wstring or dstrings.
60 	 *
61 	 * The $(LEF fuzzyMatchesStringCS) method is another pre-compiled version of
62 	 * this fuzzyMatchesUni function, but performs caseSensitive checks.
63 	 *
64 	 * See_Also:
65 	 * - $(LREF fuzzyMatchesRaw) - performs no unicode decoding, not usable with
66 	 *   strings, but with representations.
67 	 */
68 	bool fuzzyMatchesUni(bool caseSensitive = false, R1, R2)(scope R1 doesThis, scope R2 matchThis) @safe pure nothrow @nogc
69 		if (!(is(R1 == dstring) && is(R2 == dstring) && caseSensitive))
70 	{
71 		import std.utf : byUTF, decode, UseReplacementDchar;
72 		static if (!caseSensitive)
73 			import std.uni : toUpper;
74 
75 		if (matchThis.empty)
76 			return true;
77 
78 		size_t i = 0;
79 		dchar next = matchThis.decode!(UseReplacementDchar.yes)(i);
80 		const matchThisLength = matchThis.length;
81 		static if (!caseSensitive)
82 			next = next.toUpper;
83 		foreach (c; doesThis.byUTF!dchar)
84 		{
85 			static if (!caseSensitive)
86 				bool match = toUpper(c) == next;
87 			else
88 				bool match = c == next;
89 			if (match)
90 			{
91 				if (i >= matchThisLength)
92 					return true;
93 				next = matchThis.decode!(UseReplacementDchar.yes)(i);
94 				static if (!caseSensitive)
95 					next = next.toUpper;
96 			}
97 		}
98 		return false;
99 	}
100 
101 	/// ditto
102 	pragma(inline, true)
103 	bool fuzzyMatchesUni(bool caseSensitive = false, R1, R2)(scope R1 doesThis, scope R2 matchThis) @safe pure nothrow @nogc
104 		if (is(R1 == dstring) && is(R2 == dstring) && caseSensitive)
105 	{
106 		// fast code for simple dstring, dstring + case sensitive code path.
107 		return fuzzyMatchesRaw(doesThis, matchThis);
108 	}
109 
110 	/// ditto
111 	bool fuzzyMatchesString(scope const(char)[] doesThis, scope const(char)[] matchThis) @safe pure nothrow @nogc
112 	{
113 		return fuzzyMatchesUni(doesThis, matchThis);
114 	}
115 
116 	/// ditto
117 	bool fuzzyMatchesString(scope const(wchar)[] doesThis, scope const(wchar)[] matchThis) @safe pure nothrow @nogc
118 	{
119 		return fuzzyMatchesUni(doesThis, matchThis);
120 	}
121 
122 	/// ditto
123 	bool fuzzyMatchesString(scope const(dchar)[] doesThis, scope const(dchar)[] matchThis) @safe pure nothrow @nogc
124 	{
125 		return fuzzyMatchesUni(doesThis, matchThis);
126 	}
127 
128 	/// ditto
129 	bool fuzzyMatchesStringCS(scope const(char)[] doesThis, scope const(char)[] matchThis) @safe pure nothrow @nogc
130 	{
131 		return fuzzyMatchesUni!true(doesThis, matchThis);
132 	}
133 
134 	/// ditto
135 	bool fuzzyMatchesStringCS(scope const(wchar)[] doesThis, scope const(wchar)[] matchThis) @safe pure nothrow @nogc
136 	{
137 		return fuzzyMatchesUni!true(doesThis, matchThis);
138 	}
139 
140 	/// ditto
141 	bool fuzzyMatchesStringCS(scope const(dchar)[] doesThis, scope const(dchar)[] matchThis) @safe pure nothrow @nogc
142 	{
143 		return fuzzyMatchesUni!true(doesThis, matchThis);
144 	}
145 
146 	///
147 	@safe unittest
148 	{
149 		assert( "foo".fuzzyMatchesString(""));
150 		assert( "foo".fuzzyMatchesString("Fo"));
151 		assert(!"foo".fuzzyMatchesString("b"));
152 
153 		assert( "foo".fuzzyMatchesStringCS(""));
154 		assert(!"foo".fuzzyMatchesStringCS("Fo"));
155 		assert( "foo".fuzzyMatchesStringCS("fo"));
156 		assert(!"foo".fuzzyMatchesStringCS("b"));
157 
158 		assert( "path/to/game.txt".fuzzyMatchesString("path/to/game.txt"));
159 		assert( "path/to/game.txt".fuzzyMatchesString("path/to/game."));
160 		assert( "path/to/game.txt".fuzzyMatchesString("pathgametxt"));
161 		assert( "path/to/game.txt".fuzzyMatchesString("ptg"));
162 		assert(!"path/to/game.txt".fuzzyMatchesString("ptf"));
163 		assert("path/to/game.txt".fuzzyMatchesString("game.txt"));
164 		assert(!"path/to/game.txt".fuzzyMatchesString("work.txt"));
165 	}
166 
167 	///
168 	@safe unittest
169 	{
170 		import std.path : chainPath;
171 		assert( chainPath("path", "to", "game.txt").fuzzyMatchesUni("ptg"w));
172 		assert(!chainPath("path", "to", "game.txt").fuzzyMatchesUni("root"w));
173 	}
174 
175 	@safe unittest
176 	{
177 		assert("foo".fuzzyMatchesString(""));
178 		assert("foo"w.fuzzyMatchesString(""w));
179 		assert("foo"d.fuzzyMatchesString(""d));
180 		assert("foo".fuzzyMatchesString("Fo"));
181 		assert("foo"w.fuzzyMatchesString("Fo"w));
182 		assert("foo"d.fuzzyMatchesString("Fo"d));
183 		assert(!"foo".fuzzyMatchesString("b"));
184 		assert(!"foo"w.fuzzyMatchesString("b"w));
185 		assert(!"foo"d.fuzzyMatchesString("b"d));
186 
187 		assert("path/to/game.txt".fuzzyMatchesString("pathgametxt"));
188 		assert("path/to/game.txt".fuzzyMatchesString("ptg"));
189 		assert(!"path/to/game.txt".fuzzyMatchesString("ptf"));
190 		assert("path/to/game.txt"w.fuzzyMatchesString("pathgametxt"));
191 		assert("path/to/game.txt"w.fuzzyMatchesString("ptg"));
192 		assert(!"path/to/game.txt"w.fuzzyMatchesString("ptf"));
193 		assert("path/to/game.txt".fuzzyMatchesString("pathgametxt"w));
194 		assert("path/to/game.txt".fuzzyMatchesString("ptg"w));
195 		assert(!"path/to/game.txt".fuzzyMatchesString("ptf"w));
196 
197 		assert("path/to/game.txt".fuzzyMatchesStringCS("pathgametxt"));
198 		assert("path/to/game.txt".fuzzyMatchesStringCS("ptg"));
199 		assert(!"path/to/game.txt".fuzzyMatchesStringCS("ptf"));
200 		assert("path/to/game.txt"w.fuzzyMatchesStringCS("pathgametxt"));
201 		assert("path/to/game.txt"w.fuzzyMatchesStringCS("ptg"));
202 		assert(!"path/to/game.txt"w.fuzzyMatchesStringCS("ptf"));
203 		assert("path/to/game.txt".fuzzyMatchesStringCS("pathgametxt"w));
204 		assert("path/to/game.txt".fuzzyMatchesStringCS("ptg"w));
205 		assert(!"path/to/game.txt".fuzzyMatchesStringCS("ptf"w));
206 	}
207 }
208 
209 /**
210  * Works like $(LREF fuzzyMatchesUni), but does not do any UTF decoding, but
211  * rather just goes through the arrays element-by-element.
212  *
213  * This method works case-sensitive if dstrings are passed into it.
214  *
215  * This method has no dependency on the standard library and should work with
216  * betterC.
217  */
218 bool fuzzyMatchesRaw(R1, R2)(scope const R1 doesThis, scope const R2 matchThis) @safe pure nothrow @nogc
219 	if (!shouldBeDecoded!R1 && !shouldBeDecoded!R2)
220 {
221 	if (matchThis.empty)
222 		return true;
223 
224 	size_t i;
225 	dchar next = matchThis[i++];
226 	const matchThisLength = matchThis.length;
227 	foreach (c; doesThis)
228 	{
229 		if (c == next)
230 		{
231 			if (i >= matchThisLength)
232 				return true;
233 			next = matchThis[i++];
234 		}
235 	}
236 	return false;
237 }
238 
239 ///
240 @safe unittest
241 {
242 	assert( "foo"d.fuzzyMatchesRaw(""d));
243 	assert( "foo"d.fuzzyMatchesRaw("fo"d));
244 	assert(!"foo"d.fuzzyMatchesRaw("Fo"d));
245 	assert(!"foo"d.fuzzyMatchesRaw("b"d));
246 
247 	assert( "path/to/game.txt"d.fuzzyMatchesRaw("pathgametxt"d));
248 	assert( "path/to/game.txt"d.fuzzyMatchesRaw("ptg"d));
249 	assert(!"path/to/game.txt"d.fuzzyMatchesRaw("ptf"d));
250 
251 	assert([1, 2, 3, 4, 5].fuzzyMatchesRaw([1, 3, 5]));
252 	assert(![1, 2, 3, 4, 5].fuzzyMatchesRaw([1, 5, 3]));
253 	assert([1, 2, 3, 4, 5].fuzzyMatchesRaw([1, 5]));
254 	assert([1, 2, 3, 4, 5].fuzzyMatchesRaw([5]));
255 	assert(![1, 2, 3, 4, 5].fuzzyMatchesRaw([0]));
256 }
257