Subversion Repositories cheapmusic

Rev

Rev 58 | Blame | Compare with Previous | Last modification | View Log | RSS feed

<?php
require_once('php/XmlStreamer.php');
require_once('php/tools.php');
require_once('php/clsLibGTIN.php');
require_once('php/constants.php');
ini_set("memory_limit", "256M");

$cnt = 0;

class ReleasesXmlStreamer extends XmlStreamer
{
        public function processNode($xmlString, $elementName, $nodeIndex)
        {
                global $cnt;
                global $fh;
                global $fhTracks;
                global $fhArtists;
                global $fhLabels;
                global $fhBarcodes;

                $xml = simplexml_load_string($xmlString, 'simple_xml_extended');

                if ($elementName == 'release') {
                        ++$cnt;

                        $id = $xml->Attribute('id');
                        $master_id = !empty($xml->master_id) ? $xml->master_id : 0;
                        $title = $xml->title;
                        $country = $xml->country;
// bugbug               $notes = $xml->notes;
                        $released = substr($xml->released, 0, 4);
                        if (empty($released) || !is_numeric($released)) {
                                $released = 'NULL';
                        }

                        $identifiers = [];
                        $barcode = "";
                        if (isset($xml->identifiers)) {
                                foreach ($xml->identifiers->identifier as $n) {
                                        if ($n->Attribute('type') == 'Barcode') {
                                                $barcodeNumbers = preg_replace('/[^0-9]/', '', $n->Attribute('value'));
                                                if (clsLibGTIN::GTINCheck($barcodeNumbers, false, 1) !== false) {
                                                        $barcode = $barcodeNumbers;
                                                }
                                        }
                                }
                        }

                        $trackStr = "";
                        if (isset($xml->tracklist)) {
                                $trackStr .= '<ul class="small list-unstyled">';
                                foreach ($xml->tracklist->track as $track) {
                                        if (isset($track->sub_tracks)) {
                                                if ((isset($track->position) && !empty($track->position)) || (isset($track->duration) && !empty($track->duration))) {
                                                        $trackStr .= '<span class="font-italic">';
                                                        $trackStr .= processTrack($track, true);
                                                        $trackStr .= '</span>';
                                                }
                                                foreach ($track->sub_tracks->track as $subtrack) {
                                                        $trackStr .= '<ul class="list-unstyled">';
                                                        $trackStr .= processTrack($subtrack, true);
                                                        $trackStr .= "</ul>";
                                                }
                                        } else {
                                                if (isset($track->position) && empty($track->position) && isset($track->duration) && empty($track->duration)) {
                                                        $trackStr .= "<li class=\"font-weight-bold\">" . $track->title . "</li>";
                                                } else {
                                                        $trackStr .= processTrack($track, true);
                                                }
                                        }
                                }
                                $trackStr .= "</ul>";
                        }
                        $trackStr = rtrim($trackStr);
                        if (!empty($trackStr)) {
                                fputcsv2($fhTracks, array($id, $trackStr), ',', '"');
                        }

                        $formats = [];
                        $quantity = 1;
                        $formatName = "";
                        $formatExt = "";
                        if (isset($xml->formats)) {
                                if ($xml->formats->format->count() == 1) {
                                        if ($xml->formats->format->Attribute('qty') < 1000) {
                                                $quantity = $xml->formats->format->Attribute('qty');
                                        }
                                        $formatName = $xml->formats->format->Attribute('name');
                                        $formatExt = $xml->formats->format->Attribute('text');
                                        if (isset($xml->formats->format->descriptions)) {
                                                foreach ($xml->formats->format->descriptions->description as $description) {
                                                        $formats[] = $description;
                                                }
                                        }
                                } else {
                                        $quantity = 0;
                                        $formatName = "Various";
                                        $formatExt = "";
                                        foreach ($xml->formats->format as $format) {
                                                $tempQuantity = $format->Attribute('qty');
                                                $quantity += $tempQuantity;
                                                $tempName = $format->Attribute('name');
                                                $tempText = $format->Attribute('text');
                                                $str = "";
                                                if (!empty($tempQuantity)) {
                                                        $str .= $tempQuantity . " x ";
                                                }
                                                if (!empty($tempName)) {
                                                        $str .= $tempName;
                                                }
                                                if (!empty($tempText)) {
                                                        $str .= " " . $tempText;
                                                }

                                                if (!empty($str)) {
                                                        $formats[] = $str;
                                                }

                                                if (isset($format->descriptions)) {
                                                        foreach ($format->descriptions->description as $description) {
                                                                $formats[] = $description;
                                                        }
                                                }
                                        }
                                }
                        }

                        if (isset($xml->labels)) {
                                foreach ($xml->labels->label as $label) {
                                        fputcsv2($fhLabels, array($id, $label->Attribute('id'), $label->Attribute('catno')), ',', '"');
                                }
                        }

                        $genres = [];
                        if (isset($xml->genres)) {
                                foreach ($xml->genres->genre as $n) {
                                        $genres[] = $n;
                                }
                        }

                        $styles = [];
                        if (isset($xml->styles)) {
                                foreach ($xml->styles->style as $n) {
                                        $styles[] = $n;
                                }
                        }

                        if (isset($xml->artists)) {
                                foreach ($xml->artists->artist as $n) {
                                        fputcsv2($fhArtists, array($id, $n->id), ',', '"');
                                }
                        }

            if (!empty($barcode)) {
                        $arr = array(
                                $id,
                                $master_id,
                                $country,
                                $released,
                                $barcode,
                                $quantity,
                                $formatName,
                                $formatExt,
                        );

                        fputcsv2($fhBarcodes, $arr, ',', '"');
            }

                        $arr = array(
                                $id,
                                $master_id,
                                $title,
                                $country,
                                $released,
                                $barcode,
                                $quantity,
                                $formatName,
                                $formatExt,
                                join(", ", $formats),
                                join(", ", $genres),
                                join(", ", $styles)
                        );

                        fputcsv2($fh, $arr, ',', '"');
                }

                return true;
        }
}

$xmlfile = "./in/discogs_" . XMLFILEDATE . "_releases.xml.gz";
//$xmlfile = "./in/releases.xml.gz";

$fh = fopen("out/releases.load", "w+");
$fhTracks = fopen("out/releaseTracks.load", "w+");
$fhArtists = fopen("out/releaseArtists.load", "w+");
$fhLabels = fopen("out/releaseLabels.load", "w+");
$fhBarcodes = fopen("out/barcodes.load", "w+");

$xmlstream = "compress.zlib://$xmlfile";
// zcat in/discogs_CCYYMMDD_releases.xml.gz | wc -c
$xmlfileSize = RELEASEXMLFILESIZE;
//$xmlfileSize = 9524162;

$streamer = new ReleasesXmlStreamer($xmlstream, $xmlfileSize);
if ($streamer->parse()) {
        echo "Finished $cnt releases." . PHP_EOL;
} else {
        echo "Couldn't find root node" . PHP_EOL;
}

fclose($fh);
fclose($fhTracks);
fclose($fhArtists);
fclose($fhLabels);

function processTrack($track, $posFlag)
{
        $str = "<li>";
        if ($posFlag && !empty($track->position)) {
                if (!preg_match("/^[a-zA-Z][0-9]/", $track->position) && !preg_match("/^[a-zA-Z]$/", $track->position)) {
                        $str .= $track->{'position'} . '. ';
                }
        }

        $str .= $track->title;

        $trackArtists = [];
        if (isset($track->artists)) {
                foreach ($track->artists->artist as $artist) {
                        $trackArtists[] = trim(preg_replace('/\([0-9]+\)$/', "", $artist->name));
                }
                if (count($trackArtists)) {
                        $str .= " - " . join(", ", $trackArtists);
                }
        }

        if (!empty($track->duration)) {
                $str .= " [" . $track->duration . "]";
        }

        $str .= "</li>";

        return $str;
}