Subversion Repositories cheapmusic

Rev

Blame | Last modification | View Log | RSS feed

<?php namespace Fuse;

use Fuse\Bitap\Bitap;
use function Fuse\Helpers\deep_value;
use function Fuse\Helpers\is_list;

class Fuse
{
    public $options;

    public function __construct($list, $options = [])
    {
        $this->options = array_merge([
            'location' => 0,
            'distance' => 100,
            'threshold' => 0.6,
            'maxPatternLength' => 32,
            'caseSensitive' => false,
            'tokenSeparator' => ' +',
            'findAllMatches' => false,
            'minMatchCharLength' => 1,
            'id' => null,
            'keys' => [],
            'shouldSort' => true,
            'getFn' => '\Fuse\Helpers\deep_value',
            'sortFn' => function ($a, $b) {
                if ($a['score'] === $b['score']) {
                    return $a['index'] === $b['index']
                        ? 0
                        : (
                            $a['index'] < $b['index']
                            ? -1
                            : 1
                        );
                } elseif ($a['score'] < $b['score']) {
                    return -1;
                } else {
                    return 1;
                }
            },
            'tokenize' => false,
            'matchAllTokens' => false,
            'includeMatches' => false,
            'includeScore' => false,
            'verbose' => false
        ], $options);
        $this->options['isCaseSensitive'] = $this->options['caseSensitive'];

        $this->setCollection($list);
    }

    public function setCollection($list)
    {
        $list = array_values($list);
        $this->list = $list;
        return $list;
    }

    public function search($pattern, $opts = [ 'limit' => false ])
    {
        $this->log("---------\nSearch pattern: \"$pattern\"");

        $searchers = $this->prepareSearchers($pattern);

        $search = $this->innerSearch($searchers['tokenSearchers'], $searchers['fullSearcher']);

        $this->computeScore($search['weights'], $search['results']);

        if ($this->options['shouldSort']) {
            $this->sort($search['results']);
        }

        if (is_int($opts['limit'])) {
            $search['results'] = array_slice($search['results'], 0, $opts['limit']);
        }

        return $this->format($search['results']);
    }

    protected function prepareSearchers($pattern = '')
    {
        $tokenSearchers = [];

        if ($this->options['tokenize']) {
            // Tokenize on the separator
            $tokens = mb_split($this->options['tokenSeparator'], $pattern);

            for ($i = 0, $len = sizeof($tokens); $i < $len; $i++) {
                $tokenSearchers[] = new Bitap($tokens[$i], $this->options);
            }
        }

        $fullSearcher = new Bitap($pattern, $this->options);

        return [
            'tokenSearchers' => $tokenSearchers,
            'fullSearcher' => $fullSearcher
        ];
    }

    protected function innerSearch($tokenSearchers = [], $fullSearcher = null)
    {
        $list = $this->list;
        $resultMap = [];
        $results = [];

        // Check the first item in the list, if it's a string, then we assume
        // that every item in the list is also a string, and thus it's a flattened array.
        if (is_string($list[0])) {
            // Iterate over every item
            for ($i = 0, $len = sizeof($list); $i < $len; $i++) {
                $this->analyze(
                    [
                        'key' => '',
                        'value' => $list[$i],
                        'record' => $i,
                        'index' => $i
                    ],
                    $resultMap,
                    $results,
                    $tokenSearchers,
                    $fullSearcher
                );
            }

            return [
                'weights' => null,
                'results' => $results
            ];
        }

        // Otherwise, the first item is an Object (hopefully), and thus the searching
        // is done on the values of the keys of each item.
        $weights = [];
        for ($i = 0, $len = sizeof($list); $i < $len; $i++) {
            $item = $list[$i];
            // Iterate over every key
            for ($j = 0, $keysLen = sizeof($this->options['keys']); $j < $keysLen; $j++) {
                $key = $this->options['keys'][$j];
                if (!is_string($key)) {
                    $weights[$key['name']] = [
                        'weight' => (1 - $key['weight']) ?: 1
                    ];
                    if ($key['weight'] <= 0 || $key['weight'] > 1) {
                        throw new \LogicException('Key weight has to be > 0 and <= 1');
                    }
                    $key = $key['name'];
                } else {
                    $weights[$key] = [
                        'weight' => 1
                    ];
                }

                $this->analyze(
                    [
                        'key' => $key,
                        'value' => $this->options['getFn']($item, $key),
                        'record' => $item,
                        'index' => $i
                    ],
                    $resultMap,
                    $results,
                    $tokenSearchers,
                    $fullSearcher
                );
            }
        }

        return [
            'weights' => $weights,
            'results' => $results
        ];
    }

    protected function analyze($query = [], &$resultMap = [], &$results = [], &$tokenSearchers = [], &$fullSearcher = null)
    {
        $query = array_merge([
            'key' => null,
            'arrayIndex' => -1,
            'value' => null,
            'record' => null,
            'index' => null
        ], $query);

        // Check if the texvaluet can be searched
        if (is_null($query['value'])) {
            return;
        }

        $exists = false;
        $averageScore = -1;
        $numTextMatches = 0;

        if (is_string($query['value'])) {
            $this->log("\nKey: " . ($query['key'] === '' ? '-' : $query['key']));

            $mainSearchResult = $fullSearcher->search($query['value']);
            $this->log('Full text: "' . $query['value'] . '", score: ' . $mainSearchResult['score']);

            if ($this->options['tokenize']) {
                $words = mb_split($this->options['tokenSeparator'], $query['value']);
                $scores = [];

                for ($i = 0; $i < sizeof($tokenSearchers); $i++) {
                    $tokenSearcher = $tokenSearchers[$i];

                    $this->log("\nPattern: \"{$tokenSearcher->pattern}\"");

                    // $tokenScores = []
                    $hasMatchInText = false;

                    for ($j = 0; $j < sizeof($words); $j++) {
                        $word = $words[$j];
                        $tokenSearchResult = $tokenSearcher->search($word);
                        $obj = [];
                        if ($tokenSearchResult['isMatch']) {
                            $obj[$word] = $tokenSearchResult['score'];
                            $exists = true;
                            $hasMatchInText = true;
                            $scores[] = $tokenSearchResult['score'];
                        } else {
                            $obj[$word] = 1;
                            if (!$this->options['matchAllTokens']) {
                                $scores[] = 1;
                            }
                        }
                        $this->log('Token: "' . $word . '", score: ' . $obj[$word]);
                        // tokenScores.push(obj)
                    }

                    if ($hasMatchInText) {
                        $numTextMatches += 1;
                    }
                }

                $scoresLen = sizeof($scores);

                if ($scoresLen > 0) {
                    $averageScore = $scores[0];
                    for ($i = 1; $i < $scoresLen; $i++) {
                        $averageScore += $scores[$i];
                    }
                    $averageScore = $averageScore / $scoresLen;
                } else {
                    $averageScore = -1;
                }

                $this->log('Token score average: ', $averageScore);
            }

            $finalScore = $mainSearchResult['score'];
            if ($averageScore > -1) {
                $finalScore = ($finalScore + $averageScore) / 2;
            }

            $this->log('Score average: ', $finalScore);

            $checkTextMatches = ($this->options['tokenize'] && $this->options['matchAllTokens'])
                ? $numTextMatches >= sizeof($tokenSearchers)
                : true;

            $this->log("\nCheck Matches: ", $checkTextMatches);

            // If a match is found, add the item to <rawResults>, including its score
            if (($exists || $mainSearchResult['isMatch']) && $checkTextMatches) {
                // Check if the item already exists in our results
                if (isset($resultMap[$query['index']])) {
                    $existingResult = &$resultMap[$query['index']];
                } else {
                    $existingResult = null;
                }

                if (!is_null($existingResult)) {
                    // Use the lowest score
                    // existingResult.score, bitapResult.score
                    $existingResult['output'][] = [
                        'key' => $query['key'],
                        'arrayIndex' => $query['arrayIndex'],
                        'value' => $query['value'],
                        'score' => $finalScore,
                        'matchedIndices' => $mainSearchResult['matchedIndices']
                    ];
                } else {
                    // Add it to the raw result list
                    $resultMap[$query['index']] = [
                        'item' => $query['record'],
                        'output' => [[
                            'key' => $query['key'],
                            'arrayIndex' => $query['arrayIndex'],
                            'value' => $query['value'],
                            'score' => $finalScore,
                            'matchedIndices' => $mainSearchResult['matchedIndices']
                        ]]
                    ];

                    $results[] = &$resultMap[$query['index']];
                }
            }
        } elseif (is_list($query['value'])) {
            for ($i = 0, $len = sizeof($query['value']); $i < $len; $i++) {
                $this->analyze(
                    [
                        'key' => $query['key'],
                        'arrayIndex' => $i,
                        'value' => $query['value'][$i],
                        'record' => $query['record'],
                        'index' => $query['index']
                    ],
                    $resultMap,
                    $results,
                    $tokenSearchers,
                    $fullSearcher
                );
            }
        }
    }

    protected function computeScore($weights, &$results)
    {
        $this->log("\n\nComputing score:\n");

        for ($i = 0, $len = sizeof($results); $i < $len; $i++) {
            $result = &$results[$i];
            $output = $result['output'];
            $scoreLen = sizeof($output);

            $currScore = 1;
            $bestScore = 1;

            for ($j = 0; $j < $scoreLen; $j++) {
                $weight = $weights
                    ? $weights[$output[$j]['key']]['weight']
                    : 1;
                $score = $weight === 1
                    ? $output[$j]['score']
                    : ($output[$j]['score'] ?: 0.001);
                $nScore = $score * $weight;

                if ($weight !== 1) {
                    $bestScore = min($bestScore, $nScore);
                } else {
                    $output[$j]['nScore'] = $nScore;
                    $currScore *= $nScore;
                }
            }

            $result['score'] = $bestScore == 1
                ? $currScore
                : $bestScore;

            $this->log($result);
        }
    }

    protected function sort(&$results)
    {
        $this->log("\n\nSorting....");

        $results = array_map(function ($result, $index) {
            $result['index'] = $index;
            return $result;
        }, array_values($results), array_keys($results));

        usort($results, $this->options['sortFn']);

        $results = array_map(function ($result) {
            unset($result['index']);
            return $result;
        }, $results);
    }

    protected function format(&$results)
    {
        $finalOutput = [];

        $this->log("\n\nOutput:\n\n", $results);

        $transformers = [];

        if ($this->options['includeMatches']) {
            $transformers[] = function ($result, &$data) {
                $output = $result['output'];
                $data['matches'] = [];

                for ($i = 0, $len = sizeof($output); $i < $len; $i++) {
                    $item = $output[$i];

                    if (sizeof($item['matchedIndices']) === 0) {
                        continue;
                    }

                    $obj = [
                        'indices' => $item['matchedIndices'],
                        'value' => $item['value']
                    ];
                    if ($item['key']) {
                        $obj['key'] = $item['key'];
                    }
                    if (isset($item['arrayIndex']) && $item['arrayIndex'] > -1) {
                        $obj['arrayIndex'] = $item['arrayIndex'];
                    }
                    $data['matches'][] = $obj;
                }
            };
        }

        if ($this->options['includeScore']) {
            $transformers[] = function ($result, &$data) {
                $data['score'] = $result['score'];
            };
        }

        for ($i = 0, $len = sizeof($results); $i < $len; $i += 1) {
            $result = &$results[$i];

            if ($this->options['id']) {
                $getterResult = $this->options['getFn']($result['item'], $this->options['id']);
                $result['item'] = $getterResult[0];
            }

            if (!sizeof($transformers)) {
                $finalOutput[] = $result['item'];
                continue;
            }

            $data = [
                'item' => $result['item']
            ];

            for ($j = 0, $tlen = sizeof($transformers); $j < $tlen; $j++) {
                $transformers[$j]($result, $data);
            }

            $finalOutput[] = $data;
        }

        return $finalOutput;
    }

    protected function log(...$args)
    {
        if ($this->options['verbose']) {
            echo "\n";
            foreach ($args as $arg) {
                if (is_array($arg) || is_object($arg)) {
                    var_dump($arg);
                } elseif (is_bool($arg)) {
                    echo($arg ? 'true' : 'false');
                } else {
                    echo $arg;
                }
            }
        }
    }
}