Subversion Repositories cheapmusic

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
93 - 1
<?php namespace Fuse;
2
 
3
use Fuse\Bitap\Bitap;
4
use function Fuse\Helpers\deep_value;
5
use function Fuse\Helpers\is_list;
6
 
7
class Fuse
8
{
9
    public $options;
10
 
11
    public function __construct($list, $options = [])
12
    {
13
        $this->options = array_merge([
14
            'location' => 0,
15
            'distance' => 100,
16
            'threshold' => 0.6,
17
            'maxPatternLength' => 32,
18
            'caseSensitive' => false,
19
            'tokenSeparator' => ' +',
20
            'findAllMatches' => false,
21
            'minMatchCharLength' => 1,
22
            'id' => null,
23
            'keys' => [],
24
            'shouldSort' => true,
25
            'getFn' => '\Fuse\Helpers\deep_value',
26
            'sortFn' => function ($a, $b) {
27
                if ($a['score'] === $b['score']) {
28
                    return $a['index'] === $b['index']
29
                        ? 0
30
                        : (
31
                            $a['index'] < $b['index']
32
                            ? -1
33
                            : 1
34
                        );
35
                } elseif ($a['score'] < $b['score']) {
36
                    return -1;
37
                } else {
38
                    return 1;
39
                }
40
            },
41
            'tokenize' => false,
42
            'matchAllTokens' => false,
43
            'includeMatches' => false,
44
            'includeScore' => false,
45
            'verbose' => false
46
        ], $options);
47
        $this->options['isCaseSensitive'] = $this->options['caseSensitive'];
48
 
49
        $this->setCollection($list);
50
    }
51
 
52
    public function setCollection($list)
53
    {
54
        $list = array_values($list);
55
        $this->list = $list;
56
        return $list;
57
    }
58
 
59
    public function search($pattern, $opts = [ 'limit' => false ])
60
    {
61
        $this->log("---------\nSearch pattern: \"$pattern\"");
62
 
63
        $searchers = $this->prepareSearchers($pattern);
64
 
65
        $search = $this->innerSearch($searchers['tokenSearchers'], $searchers['fullSearcher']);
66
 
67
        $this->computeScore($search['weights'], $search['results']);
68
 
69
        if ($this->options['shouldSort']) {
70
            $this->sort($search['results']);
71
        }
72
 
73
        if (is_int($opts['limit'])) {
74
            $search['results'] = array_slice($search['results'], 0, $opts['limit']);
75
        }
76
 
77
        return $this->format($search['results']);
78
    }
79
 
80
    protected function prepareSearchers($pattern = '')
81
    {
82
        $tokenSearchers = [];
83
 
84
        if ($this->options['tokenize']) {
85
            // Tokenize on the separator
86
            $tokens = mb_split($this->options['tokenSeparator'], $pattern);
87
 
88
            for ($i = 0, $len = sizeof($tokens); $i < $len; $i++) {
89
                $tokenSearchers[] = new Bitap($tokens[$i], $this->options);
90
            }
91
        }
92
 
93
        $fullSearcher = new Bitap($pattern, $this->options);
94
 
95
        return [
96
            'tokenSearchers' => $tokenSearchers,
97
            'fullSearcher' => $fullSearcher
98
        ];
99
    }
100
 
101
    protected function innerSearch($tokenSearchers = [], $fullSearcher = null)
102
    {
103
        $list = $this->list;
104
        $resultMap = [];
105
        $results = [];
106
 
107
        // Check the first item in the list, if it's a string, then we assume
108
        // that every item in the list is also a string, and thus it's a flattened array.
109
        if (is_string($list[0])) {
110
            // Iterate over every item
111
            for ($i = 0, $len = sizeof($list); $i < $len; $i++) {
112
                $this->analyze(
113
                    [
114
                        'key' => '',
115
                        'value' => $list[$i],
116
                        'record' => $i,
117
                        'index' => $i
118
                    ],
119
                    $resultMap,
120
                    $results,
121
                    $tokenSearchers,
122
                    $fullSearcher
123
                );
124
            }
125
 
126
            return [
127
                'weights' => null,
128
                'results' => $results
129
            ];
130
        }
131
 
132
        // Otherwise, the first item is an Object (hopefully), and thus the searching
133
        // is done on the values of the keys of each item.
134
        $weights = [];
135
        for ($i = 0, $len = sizeof($list); $i < $len; $i++) {
136
            $item = $list[$i];
137
            // Iterate over every key
138
            for ($j = 0, $keysLen = sizeof($this->options['keys']); $j < $keysLen; $j++) {
139
                $key = $this->options['keys'][$j];
140
                if (!is_string($key)) {
141
                    $weights[$key['name']] = [
142
                        'weight' => (1 - $key['weight']) ?: 1
143
                    ];
144
                    if ($key['weight'] <= 0 || $key['weight'] > 1) {
145
                        throw new \LogicException('Key weight has to be > 0 and <= 1');
146
                    }
147
                    $key = $key['name'];
148
                } else {
149
                    $weights[$key] = [
150
                        'weight' => 1
151
                    ];
152
                }
153
 
154
                $this->analyze(
155
                    [
156
                        'key' => $key,
157
                        'value' => $this->options['getFn']($item, $key),
158
                        'record' => $item,
159
                        'index' => $i
160
                    ],
161
                    $resultMap,
162
                    $results,
163
                    $tokenSearchers,
164
                    $fullSearcher
165
                );
166
            }
167
        }
168
 
169
        return [
170
            'weights' => $weights,
171
            'results' => $results
172
        ];
173
    }
174
 
175
    protected function analyze($query = [], &$resultMap = [], &$results = [], &$tokenSearchers = [], &$fullSearcher = null)
176
    {
177
        $query = array_merge([
178
            'key' => null,
179
            'arrayIndex' => -1,
180
            'value' => null,
181
            'record' => null,
182
            'index' => null
183
        ], $query);
184
 
185
        // Check if the texvaluet can be searched
186
        if (is_null($query['value'])) {
187
            return;
188
        }
189
 
190
        $exists = false;
191
        $averageScore = -1;
192
        $numTextMatches = 0;
193
 
194
        if (is_string($query['value'])) {
195
            $this->log("\nKey: " . ($query['key'] === '' ? '-' : $query['key']));
196
 
197
            $mainSearchResult = $fullSearcher->search($query['value']);
198
            $this->log('Full text: "' . $query['value'] . '", score: ' . $mainSearchResult['score']);
199
 
200
            if ($this->options['tokenize']) {
201
                $words = mb_split($this->options['tokenSeparator'], $query['value']);
202
                $scores = [];
203
 
204
                for ($i = 0; $i < sizeof($tokenSearchers); $i++) {
205
                    $tokenSearcher = $tokenSearchers[$i];
206
 
207
                    $this->log("\nPattern: \"{$tokenSearcher->pattern}\"");
208
 
209
                    // $tokenScores = []
210
                    $hasMatchInText = false;
211
 
212
                    for ($j = 0; $j < sizeof($words); $j++) {
213
                        $word = $words[$j];
214
                        $tokenSearchResult = $tokenSearcher->search($word);
215
                        $obj = [];
216
                        if ($tokenSearchResult['isMatch']) {
217
                            $obj[$word] = $tokenSearchResult['score'];
218
                            $exists = true;
219
                            $hasMatchInText = true;
220
                            $scores[] = $tokenSearchResult['score'];
221
                        } else {
222
                            $obj[$word] = 1;
223
                            if (!$this->options['matchAllTokens']) {
224
                                $scores[] = 1;
225
                            }
226
                        }
227
                        $this->log('Token: "' . $word . '", score: ' . $obj[$word]);
228
                        // tokenScores.push(obj)
229
                    }
230
 
231
                    if ($hasMatchInText) {
232
                        $numTextMatches += 1;
233
                    }
234
                }
235
 
236
                $scoresLen = sizeof($scores);
237
 
238
                if ($scoresLen > 0) {
239
                    $averageScore = $scores[0];
240
                    for ($i = 1; $i < $scoresLen; $i++) {
241
                        $averageScore += $scores[$i];
242
                    }
243
                    $averageScore = $averageScore / $scoresLen;
244
                } else {
245
                    $averageScore = -1;
246
                }
247
 
248
                $this->log('Token score average: ', $averageScore);
249
            }
250
 
251
            $finalScore = $mainSearchResult['score'];
252
            if ($averageScore > -1) {
253
                $finalScore = ($finalScore + $averageScore) / 2;
254
            }
255
 
256
            $this->log('Score average: ', $finalScore);
257
 
258
            $checkTextMatches = ($this->options['tokenize'] && $this->options['matchAllTokens'])
259
                ? $numTextMatches >= sizeof($tokenSearchers)
260
                : true;
261
 
262
            $this->log("\nCheck Matches: ", $checkTextMatches);
263
 
264
            // If a match is found, add the item to <rawResults>, including its score
265
            if (($exists || $mainSearchResult['isMatch']) && $checkTextMatches) {
266
                // Check if the item already exists in our results
267
                if (isset($resultMap[$query['index']])) {
268
                    $existingResult = &$resultMap[$query['index']];
269
                } else {
270
                    $existingResult = null;
271
                }
272
 
273
                if (!is_null($existingResult)) {
274
                    // Use the lowest score
275
                    // existingResult.score, bitapResult.score
276
                    $existingResult['output'][] = [
277
                        'key' => $query['key'],
278
                        'arrayIndex' => $query['arrayIndex'],
279
                        'value' => $query['value'],
280
                        'score' => $finalScore,
281
                        'matchedIndices' => $mainSearchResult['matchedIndices']
282
                    ];
283
                } else {
284
                    // Add it to the raw result list
285
                    $resultMap[$query['index']] = [
286
                        'item' => $query['record'],
287
                        'output' => [[
288
                            'key' => $query['key'],
289
                            'arrayIndex' => $query['arrayIndex'],
290
                            'value' => $query['value'],
291
                            'score' => $finalScore,
292
                            'matchedIndices' => $mainSearchResult['matchedIndices']
293
                        ]]
294
                    ];
295
 
296
                    $results[] = &$resultMap[$query['index']];
297
                }
298
            }
299
        } elseif (is_list($query['value'])) {
300
            for ($i = 0, $len = sizeof($query['value']); $i < $len; $i++) {
301
                $this->analyze(
302
                    [
303
                        'key' => $query['key'],
304
                        'arrayIndex' => $i,
305
                        'value' => $query['value'][$i],
306
                        'record' => $query['record'],
307
                        'index' => $query['index']
308
                    ],
309
                    $resultMap,
310
                    $results,
311
                    $tokenSearchers,
312
                    $fullSearcher
313
                );
314
            }
315
        }
316
    }
317
 
318
    protected function computeScore($weights, &$results)
319
    {
320
        $this->log("\n\nComputing score:\n");
321
 
322
        for ($i = 0, $len = sizeof($results); $i < $len; $i++) {
323
            $result = &$results[$i];
324
            $output = $result['output'];
325
            $scoreLen = sizeof($output);
326
 
327
            $currScore = 1;
328
            $bestScore = 1;
329
 
330
            for ($j = 0; $j < $scoreLen; $j++) {
331
                $weight = $weights
332
                    ? $weights[$output[$j]['key']]['weight']
333
                    : 1;
334
                $score = $weight === 1
335
                    ? $output[$j]['score']
336
                    : ($output[$j]['score'] ?: 0.001);
337
                $nScore = $score * $weight;
338
 
339
                if ($weight !== 1) {
340
                    $bestScore = min($bestScore, $nScore);
341
                } else {
342
                    $output[$j]['nScore'] = $nScore;
343
                    $currScore *= $nScore;
344
                }
345
            }
346
 
347
            $result['score'] = $bestScore == 1
348
                ? $currScore
349
                : $bestScore;
350
 
351
            $this->log($result);
352
        }
353
    }
354
 
355
    protected function sort(&$results)
356
    {
357
        $this->log("\n\nSorting....");
358
 
359
        $results = array_map(function ($result, $index) {
360
            $result['index'] = $index;
361
            return $result;
362
        }, array_values($results), array_keys($results));
363
 
364
        usort($results, $this->options['sortFn']);
365
 
366
        $results = array_map(function ($result) {
367
            unset($result['index']);
368
            return $result;
369
        }, $results);
370
    }
371
 
372
    protected function format(&$results)
373
    {
374
        $finalOutput = [];
375
 
376
        $this->log("\n\nOutput:\n\n", $results);
377
 
378
        $transformers = [];
379
 
380
        if ($this->options['includeMatches']) {
381
            $transformers[] = function ($result, &$data) {
382
                $output = $result['output'];
383
                $data['matches'] = [];
384
 
385
                for ($i = 0, $len = sizeof($output); $i < $len; $i++) {
386
                    $item = $output[$i];
387
 
388
                    if (sizeof($item['matchedIndices']) === 0) {
389
                        continue;
390
                    }
391
 
392
                    $obj = [
393
                        'indices' => $item['matchedIndices'],
394
                        'value' => $item['value']
395
                    ];
396
                    if ($item['key']) {
397
                        $obj['key'] = $item['key'];
398
                    }
399
                    if (isset($item['arrayIndex']) && $item['arrayIndex'] > -1) {
400
                        $obj['arrayIndex'] = $item['arrayIndex'];
401
                    }
402
                    $data['matches'][] = $obj;
403
                }
404
            };
405
        }
406
 
407
        if ($this->options['includeScore']) {
408
            $transformers[] = function ($result, &$data) {
409
                $data['score'] = $result['score'];
410
            };
411
        }
412
 
413
        for ($i = 0, $len = sizeof($results); $i < $len; $i += 1) {
414
            $result = &$results[$i];
415
 
416
            if ($this->options['id']) {
417
                $getterResult = $this->options['getFn']($result['item'], $this->options['id']);
418
                $result['item'] = $getterResult[0];
419
            }
420
 
421
            if (!sizeof($transformers)) {
422
                $finalOutput[] = $result['item'];
423
                continue;
424
            }
425
 
426
            $data = [
427
                'item' => $result['item']
428
            ];
429
 
430
            for ($j = 0, $tlen = sizeof($transformers); $j < $tlen; $j++) {
431
                $transformers[$j]($result, $data);
432
            }
433
 
434
            $finalOutput[] = $data;
435
        }
436
 
437
        return $finalOutput;
438
    }
439
 
440
    protected function log(...$args)
441
    {
442
        if ($this->options['verbose']) {
443
            echo "\n";
444
            foreach ($args as $arg) {
445
                if (is_array($arg) || is_object($arg)) {
446
                    var_dump($arg);
447
                } elseif (is_bool($arg)) {
448
                    echo($arg ? 'true' : 'false');
449
                } else {
450
                    echo $arg;
451
                }
452
            }
453
        }
454
    }
455
}