Subversion Repositories cheapmusic

Rev

Rev 58 | Blame | Compare with Previous | Last modification | View Log | RSS feed

<?php
require_once('php/XmlStreamer.php');
require_once('php/tools.php');
require_once('php/constants.php');
ini_set("memory_limit", "256M");

$cnt = 0;

class ArtistsXmlStreamer extends XmlStreamer
{
        public function processNode($xmlString, $elementName, $nodeIndex)
        {
                global $cnt;
                global $fh;
                global $fhVariations;
                global $fhAlias;
                global $fhGroups;
                global $fhMembers;
                global $fhUrls;
                global $fhProfiles;

                $xml = simplexml_load_string($xmlString, 'simple_xml_extended');

                if ($elementName == 'artist') {
                        ++$cnt;

                        $id = $xml->id;
                        $name = cleanName($xml->name);
                        $realname = $xml->realname;

                        if (isset($xml->profile) && !empty($xml->profile)) {
                                $profile = str_replace("\r", '', $xml->profile);
                                $profile = rtrim($profile);
                                if (!empty($profile)) {
                                        fputcsv2($fhProfiles, array($id, $profile), ',', '"');
                                }
                        }

                        if (isset($xml->namevariations)) {
                                foreach ($xml->namevariations->name as $n) {
                                        $cn = cleanName($n);
                                        if (!empty($cn)) {
                                                fputcsv2($fhVariations, array($id, $cn), ',', '"');
                                        }
                                }
                        }

                        if (isset($xml->aliases)) {
                                foreach ($xml->aliases->name as $n) {
                                        $cn = cleanName($n);
                                        if (!empty($cn)) {
                                                fputcsv2($fhAlias, array($id, $cn), ',', '"');
                                        }
                                }
                        }

                        if (isset($xml->members)) {
                                foreach ($xml->members as $n) {
                                        if (!empty($n->id)) {
                                                fputcsv2($fhMembers, array($id, $n->id), ',', '"');
                                        }
                                }
                        }

                        if (isset($xml->groups)) {
                                foreach ($xml->groups->name as $n) {
                                        $cn = cleanName($n);
                                        if (!empty($cn)) {
                                                fputcsv2($fhGroups, array($id, $cn), ',', '"');
                                        }
                                }
                        }

                        if (isset($xml->urls)) {
                                foreach ($xml->urls->url as $n) {
                                        if (!empty($n)) {
                                                fputcsv2($fhUrls, array($id, $n), ',', '"');
                                        }
                                }
                        }

                        $arr = array(
                                $id,
                                $name,
                                $realname
                        );

                        fputcsv2($fh, $arr, ',', '"');
                }

                return true;
        }
}

$xmlfile = "./in/discogs_" . XMLFILEDATE . "_artists.xml.gz";

$fh = fopen("out/artists.load", "w+");
$fhVariations = fopen("out/artistVariations.load", "w+");
$fhAlias = fopen("out/artistAliases.load", "w+");
$fhGroups = fopen("out/artistGroups.load", "w+");
$fhMembers = fopen("out/artistMembers.load", "w+");
$fhUrls = fopen("out/artistUrls.load", "w+");
$fhProfiles = fopen("out/artistProfiles.load", "w+");

$xmlstream = "compress.zlib://$xmlfile";
$xmlfileSize = gzfilesize($xmlfile);

$streamer = new ArtistsXmlStreamer($xmlstream, $xmlfileSize);
if ($streamer->parse()) {
        echo "Finished $cnt artists." . PHP_EOL;
} else {
        echo "Couldn't find root node" . PHP_EOL;
}

fclose($fh);
fclose($fhVariations);
fclose($fhAlias);
fclose($fhGroups);
fclose($fhMembers);
fclose($fhUrls);
fclose($fhProfiles);