93 |
- |
1 |
<?php namespace Fuse\Bitap;
|
|
|
2 |
|
|
|
3 |
class Bitap
|
|
|
4 |
{
|
|
|
5 |
// Public properties to preserve compatibility with Fuse.js
|
|
|
6 |
public $options;
|
|
|
7 |
public $pattern;
|
|
|
8 |
public $patternAlphabet;
|
|
|
9 |
|
|
|
10 |
public function __construct($pattern, $options = [])
|
|
|
11 |
{
|
|
|
12 |
$this->options = array_merge([
|
|
|
13 |
// Approximately where in the text is the pattern expected to be found?
|
|
|
14 |
'location' => 0,
|
|
|
15 |
// Determines how close the match must be to the fuzzy location (specified above).
|
|
|
16 |
// An exact letter match which is 'distance' characters away from the fuzzy location
|
|
|
17 |
// would score as a complete mismatch. A distance of '0' requires the match be at
|
|
|
18 |
// the exact location specified, a threshold of '1000' would require a perfect match
|
|
|
19 |
// to be within 800 characters of the fuzzy location to be found using a 0.8 threshold.
|
|
|
20 |
'distance' => 100,
|
|
|
21 |
// At what point does the match algorithm give up. A threshold of '0.0' requires a perfect match
|
|
|
22 |
// (of both letters and location), a threshold of '1.0' would match anything.
|
|
|
23 |
'threshold' => 0.6,
|
|
|
24 |
// Machine word size
|
|
|
25 |
'maxPatternLength' => 32,
|
|
|
26 |
// Indicates whether comparisons should be case sensitive.
|
|
|
27 |
'isCaseSensitive' => false,
|
|
|
28 |
// Regex used to separate words when searching. Only applicable when `tokenize` is `true`.
|
|
|
29 |
'tokenSeparator' => '/ +/',
|
|
|
30 |
// When true, the algorithm continues searching to the end of the input even if a perfect
|
|
|
31 |
// match is found before the end of the same input.
|
|
|
32 |
'findAllMatches' => false,
|
|
|
33 |
// Minimum number of characters that must be matched before a result is considered a match
|
|
|
34 |
'minMatchCharLength' => 1
|
|
|
35 |
], $options);
|
|
|
36 |
|
|
|
37 |
$this->pattern = $this->options['isCaseSensitive']
|
|
|
38 |
? $pattern
|
|
|
39 |
: mb_strtolower($pattern);
|
|
|
40 |
|
|
|
41 |
if (mb_strlen($this->pattern) <= $this->options['maxPatternLength']) {
|
|
|
42 |
$this->patternAlphabet = pattern_alphabet($this->pattern);
|
|
|
43 |
}
|
|
|
44 |
}
|
|
|
45 |
|
|
|
46 |
public function search($text)
|
|
|
47 |
{
|
|
|
48 |
if (!$this->options['isCaseSensitive']) {
|
|
|
49 |
$text = mb_strtolower($text);
|
|
|
50 |
}
|
|
|
51 |
|
|
|
52 |
// Exact match
|
|
|
53 |
if ($this->pattern === $text) {
|
|
|
54 |
return [
|
|
|
55 |
'isMatch' => true,
|
|
|
56 |
'score' => 0,
|
|
|
57 |
'matchedIndices' => [[0, mb_strlen($text) - 1]]
|
|
|
58 |
];
|
|
|
59 |
}
|
|
|
60 |
|
|
|
61 |
// When pattern length is greater than the machine word length, just do a a regex comparison
|
|
|
62 |
if (mb_strlen($this->pattern) > $this->options['maxPatternLength']) {
|
|
|
63 |
return regex_search($text, $this->pattern, $this->options['tokenSeparator']);
|
|
|
64 |
}
|
|
|
65 |
|
|
|
66 |
// Otherwise, use Bitap algorithm
|
|
|
67 |
return search($text, $this->pattern, $this->patternAlphabet, [
|
|
|
68 |
'location' => $this->options['location'],
|
|
|
69 |
'distance' => $this->options['distance'],
|
|
|
70 |
'threshold' => $this->options['threshold'],
|
|
|
71 |
'findAllMatches' => $this->options['findAllMatches'],
|
|
|
72 |
'minMatchCharLength' => $this->options['minMatchCharLength']
|
|
|
73 |
]);
|
|
|
74 |
}
|
|
|
75 |
}
|