Subversion Repositories cheapmusic

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
93 - 1
<?php namespace Fuse\Bitap;
2
 
3
function search($text, $pattern, $patternAlphabet, $options = [])
4
{
5
    $options = array_merge([
6
        'location' => 0,
7
        'distance' => 100,
8
        'threshold' => 0.6,
9
        'findAllMatches' => false,
10
        'minMatchCharLength' => 1
11
    ], $options);
12
 
13
    $expectedLocation  = $options['location'];
14
    // Set starting location at beginning text and initialize the alphabet.
15
    $textLen = mb_strlen($text);
16
    // Highest score beyond which we give up.
17
    $currentThreshold = $options['threshold'];
18
    // Is there a nearby exact match? (speedup)
19
    $bestLocation = mb_strpos($text, $pattern, $expectedLocation);
20
 
21
    $patternLen = mb_strlen($pattern);
22
 
23
    // a mask of the matches
24
    $matchMask = [];
25
    for ($i = 0; $i < $textLen; $i++) {
26
        $matchMask[$i] = 0;
27
    }
28
 
29
    if ($bestLocation !== false) {
30
        $score = score($pattern, [
31
            'errors' => 0,
32
            'currentLocation' => $bestLocation,
33
            'expectedLocation' => $expectedLocation,
34
            'distance' => $options['distance']
35
        ]);
36
        $currentThreshold = min($score, $currentThreshold);
37
 
38
        // What about in the other direction? (speed up)
39
        $bestLocation = mb_strrpos($text, $pattern, $expectedLocation + $patternLen);
40
 
41
        if ($bestLocation !== false) {
42
            $score = score($pattern, [
43
                'errors' => 0,
44
                'currentLocation' => $bestLocation,
45
                'expectedLocation' => $expectedLocation,
46
                'distance' => $options['distance']
47
            ]);
48
            $currentThreshold = min($score, $currentThreshold);
49
        }
50
    }
51
 
52
    // Reset the best location
53
    $bestLocation = -1;
54
 
55
    $lastBitArr = [];
56
    $finalScore = 1;
57
    $binMax = $patternLen + $textLen;
58
 
59
    $mask = 1 << ($patternLen - 1);
60
 
61
    for ($i = 0; $i < $patternLen; $i++) {
62
        // Scan for the best match; each iteration allows for one more error.
63
        // Run a binary search to determine how far from the match location we can stray
64
        // at this error level.
65
        $binMin = 0;
66
        $binMid = $binMax;
67
 
68
        while ($binMin < $binMid) {
69
            $score = score($pattern, [
70
                'errors' => $i,
71
                'currentLocation' => $expectedLocation + $binMid,
72
                'expectedLocation' => $expectedLocation,
73
                'distance' => $options['distance']
74
            ]);
75
 
76
            if ($score <= $currentThreshold) {
77
                $binMin = $binMid;
78
            } else {
79
                $binMax = $binMid;
80
            }
81
 
82
            $binMid = floor(($binMax - $binMin) / 2 + $binMin);
83
        }
84
 
85
        // Use the result from this iteration as the maximum for the next.
86
        $binMax = $binMid;
87
 
88
        $start = max(1, $expectedLocation - $binMid + 1);
89
        $finish = $options['findAllMatches']
90
            ? $textLen
91
            : min($expectedLocation + $binMid, $textLen) + $patternLen;
92
 
93
        // Initialize the bit array
94
        $bitArr = [];
95
 
96
        $bitArr[$finish + 1] = (1 << $i) - 1;
97
 
98
        for ($j = $finish; $j >= $start; $j -= 1) {
99
            $currentLocation = $j - 1;
100
 
101
            $offset = mb_substr($text, $currentLocation, 1);
102
            $charMatch = isset($patternAlphabet[$offset])
103
                ? $patternAlphabet[$offset]
104
                : null;
105
 
106
            if ($charMatch) {
107
                $matchMask[$currentLocation] = 1;
108
            }
109
 
110
            // First pass: exact match
111
            $bitArr[$j] = (($bitArr[$j + 1] << 1) | 1) & $charMatch;
112
 
113
            // Subsequent passes: fuzzy match
114
            if ($i !== 0) {
115
                $bitArr[$j] |= ((($lastBitArr[$j + 1] | $lastBitArr[$j]) << 1) | 1) | $lastBitArr[$j + 1];
116
            }
117
 
118
            if ($bitArr[$j] & $mask) {
119
                $finalScore = score($pattern, [
120
                    'errors' => $i,
121
                    'currentLocation' => $currentLocation,
122
                    'expectedLocation' => $expectedLocation,
123
                    'distance' => $options['distance']
124
                ]);
125
 
126
                // This match will almost certainly be better than any existing match.
127
                // But check anyway.
128
                if ($finalScore <= $currentThreshold) {
129
                    // Indeed it is
130
                    $currentThreshold = $finalScore;
131
                    $bestLocation = $currentLocation;
132
 
133
                    // Already passed `loc`, downhill from here on in.
134
                    if ($bestLocation <= $expectedLocation) {
135
                        break;
136
                    }
137
 
138
                    // When passing `bestLocation`, don't exceed our current distance from `expectedLocation`.
139
                    $start = max(1, 2 * $expectedLocation - $bestLocation);
140
                }
141
            }
142
        }
143
 
144
        // No hope for a (better) match at greater error levels.
145
        $score = score($pattern, [
146
            'errors' => $i + 1,
147
            'currentLocation' => $expectedLocation,
148
            'expectedLocation' => $expectedLocation,
149
            'distance' => $options['distance']
150
        ]);
151
 
152
        if ($score > $currentThreshold) {
153
            break;
154
        }
155
 
156
        $lastBitArr = $bitArr;
157
    }
158
 
159
    // Count exact matches (those with a score of 0) to be "almost" exact
160
    return [
161
        'isMatch' => $bestLocation >= 0,
162
        'score' => $finalScore == 0 ? 0.001 : $finalScore,
163
        'matchedIndices' => matched_indices($matchMask, $options['minMatchCharLength'])
164
    ];
165
}