Subversion Repositories cheapmusic

Rev

Blame | Last modification | View Log | RSS feed

<?php namespace Fuse\Bitap;

class Bitap
{
    // Public properties to preserve compatibility with Fuse.js
    public $options;
    public $pattern;
    public $patternAlphabet;

    public function __construct($pattern, $options = [])
    {
        $this->options = array_merge([
            // Approximately where in the text is the pattern expected to be found?
            'location' => 0,
            // Determines how close the match must be to the fuzzy location (specified above).
            // An exact letter match which is 'distance' characters away from the fuzzy location
            // would score as a complete mismatch. A distance of '0' requires the match be at
            // the exact location specified, a threshold of '1000' would require a perfect match
            // to be within 800 characters of the fuzzy location to be found using a 0.8 threshold.
            'distance' => 100,
            // At what point does the match algorithm give up. A threshold of '0.0' requires a perfect match
            // (of both letters and location), a threshold of '1.0' would match anything.
            'threshold' => 0.6,
            // Machine word size
            'maxPatternLength' => 32,
            // Indicates whether comparisons should be case sensitive.
            'isCaseSensitive' => false,
            // Regex used to separate words when searching. Only applicable when `tokenize` is `true`.
            'tokenSeparator' => '/ +/',
            // When true, the algorithm continues searching to the end of the input even if a perfect
            // match is found before the end of the same input.
            'findAllMatches' => false,
            // Minimum number of characters that must be matched before a result is considered a match
            'minMatchCharLength' => 1
        ], $options);

        $this->pattern = $this->options['isCaseSensitive']
            ? $pattern
            : mb_strtolower($pattern);

        if (mb_strlen($this->pattern) <= $this->options['maxPatternLength']) {
            $this->patternAlphabet = pattern_alphabet($this->pattern);
        }
    }

    public function search($text)
    {
        if (!$this->options['isCaseSensitive']) {
            $text = mb_strtolower($text);
        }

        // Exact match
        if ($this->pattern === $text) {
            return [
                'isMatch' => true,
                'score' => 0,
                'matchedIndices' => [[0, mb_strlen($text) - 1]]
            ];
        }

        // When pattern length is greater than the machine word length, just do a a regex comparison
        if (mb_strlen($this->pattern) > $this->options['maxPatternLength']) {
            return regex_search($text, $this->pattern, $this->options['tokenSeparator']);
        }

        // Otherwise, use Bitap algorithm
        return search($text, $this->pattern, $this->patternAlphabet, [
            'location' => $this->options['location'],
            'distance' => $this->options['distance'],
            'threshold' => $this->options['threshold'],
            'findAllMatches' => $this->options['findAllMatches'],
            'minMatchCharLength' => $this->options['minMatchCharLength']
        ]);
    }
}