Blame | Last modification | View Log | RSS feed
<?php namespace Fuse\Bitap;function search($text, $pattern, $patternAlphabet, $options = []){$options = array_merge(['location' => 0,'distance' => 100,'threshold' => 0.6,'findAllMatches' => false,'minMatchCharLength' => 1], $options);$expectedLocation = $options['location'];// Set starting location at beginning text and initialize the alphabet.$textLen = mb_strlen($text);// Highest score beyond which we give up.$currentThreshold = $options['threshold'];// Is there a nearby exact match? (speedup)$bestLocation = mb_strpos($text, $pattern, $expectedLocation);$patternLen = mb_strlen($pattern);// a mask of the matches$matchMask = [];for ($i = 0; $i < $textLen; $i++) {$matchMask[$i] = 0;}if ($bestLocation !== false) {$score = score($pattern, ['errors' => 0,'currentLocation' => $bestLocation,'expectedLocation' => $expectedLocation,'distance' => $options['distance']]);$currentThreshold = min($score, $currentThreshold);// What about in the other direction? (speed up)$bestLocation = mb_strrpos($text, $pattern, $expectedLocation + $patternLen);if ($bestLocation !== false) {$score = score($pattern, ['errors' => 0,'currentLocation' => $bestLocation,'expectedLocation' => $expectedLocation,'distance' => $options['distance']]);$currentThreshold = min($score, $currentThreshold);}}// Reset the best location$bestLocation = -1;$lastBitArr = [];$finalScore = 1;$binMax = $patternLen + $textLen;$mask = 1 << ($patternLen - 1);for ($i = 0; $i < $patternLen; $i++) {// Scan for the best match; each iteration allows for one more error.// Run a binary search to determine how far from the match location we can stray// at this error level.$binMin = 0;$binMid = $binMax;while ($binMin < $binMid) {$score = score($pattern, ['errors' => $i,'currentLocation' => $expectedLocation + $binMid,'expectedLocation' => $expectedLocation,'distance' => $options['distance']]);if ($score <= $currentThreshold) {$binMin = $binMid;} else {$binMax = $binMid;}$binMid = floor(($binMax - $binMin) / 2 + $binMin);}// Use the result from this iteration as the maximum for the next.$binMax = $binMid;$start = max(1, $expectedLocation - $binMid + 1);$finish = $options['findAllMatches']? $textLen: min($expectedLocation + $binMid, $textLen) + $patternLen;// Initialize the bit array$bitArr = [];$bitArr[$finish + 1] = (1 << $i) - 1;for ($j = $finish; $j >= $start; $j -= 1) {$currentLocation = $j - 1;$offset = mb_substr($text, $currentLocation, 1);$charMatch = isset($patternAlphabet[$offset])? $patternAlphabet[$offset]: null;if ($charMatch) {$matchMask[$currentLocation] = 1;}// First pass: exact match$bitArr[$j] = (($bitArr[$j + 1] << 1) | 1) & $charMatch;// Subsequent passes: fuzzy matchif ($i !== 0) {$bitArr[$j] |= ((($lastBitArr[$j + 1] | $lastBitArr[$j]) << 1) | 1) | $lastBitArr[$j + 1];}if ($bitArr[$j] & $mask) {$finalScore = score($pattern, ['errors' => $i,'currentLocation' => $currentLocation,'expectedLocation' => $expectedLocation,'distance' => $options['distance']]);// This match will almost certainly be better than any existing match.// But check anyway.if ($finalScore <= $currentThreshold) {// Indeed it is$currentThreshold = $finalScore;$bestLocation = $currentLocation;// Already passed `loc`, downhill from here on in.if ($bestLocation <= $expectedLocation) {break;}// When passing `bestLocation`, don't exceed our current distance from `expectedLocation`.$start = max(1, 2 * $expectedLocation - $bestLocation);}}}// No hope for a (better) match at greater error levels.$score = score($pattern, ['errors' => $i + 1,'currentLocation' => $expectedLocation,'expectedLocation' => $expectedLocation,'distance' => $options['distance']]);if ($score > $currentThreshold) {break;}$lastBitArr = $bitArr;}// Count exact matches (those with a score of 0) to be "almost" exactreturn ['isMatch' => $bestLocation >= 0,'score' => $finalScore == 0 ? 0.001 : $finalScore,'matchedIndices' => matched_indices($matchMask, $options['minMatchCharLength'])];}