Rev 58 | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?php
require_once('php/XmlStreamer.php');
require_once('php/tools.php');
require_once('php/constants.php');
ini_set("memory_limit", "256M");
$cnt = 0;
class ArtistsXmlStreamer extends XmlStreamer
{
public function processNode($xmlString, $elementName, $nodeIndex)
{
global $cnt;
global $fh;
global $fhVariations;
global $fhAlias;
global $fhGroups;
global $fhMembers;
global $fhUrls;
global $fhProfiles;
$xml = simplexml_load_string($xmlString, 'simple_xml_extended');
if ($elementName == 'artist') {
++$cnt;
$id = $xml->id;
$name = cleanName($xml->name);
$realname = $xml->realname;
if (isset($xml->profile) && !empty($xml->profile)) {
$profile = str_replace("\r", '', $xml->profile);
$profile = rtrim($profile);
if (!empty($profile)) {
fputcsv2($fhProfiles, array($id, $profile), ',', '"');
}
}
if (isset($xml->namevariations)) {
foreach ($xml->namevariations->name as $n) {
$cn = cleanName($n);
if (!empty($cn)) {
fputcsv2($fhVariations, array($id, $cn), ',', '"');
}
}
}
if (isset($xml->aliases)) {
foreach ($xml->aliases->name as $n) {
$cn = cleanName($n);
if (!empty($cn)) {
fputcsv2($fhAlias, array($id, $cn), ',', '"');
}
}
}
if (isset($xml->members)) {
foreach ($xml->members as $n) {
if (!empty($n->id)) {
fputcsv2($fhMembers, array($id, $n->id), ',', '"');
}
}
}
if (isset($xml->groups)) {
foreach ($xml->groups->name as $n) {
$cn = cleanName($n);
if (!empty($cn)) {
fputcsv2($fhGroups, array($id, $cn), ',', '"');
}
}
}
if (isset($xml->urls)) {
foreach ($xml->urls->url as $n) {
if (!empty($n)) {
fputcsv2($fhUrls, array($id, $n), ',', '"');
}
}
}
$arr = array(
$id,
$name,
$realname
);
fputcsv2($fh, $arr, ',', '"');
}
return true;
}
}
$xmlfile = "./in/discogs_" . XMLFILEDATE . "_artists.xml.gz";
$fh = fopen("out/artists.load", "w+");
$fhVariations = fopen("out/artistVariations.load", "w+");
$fhAlias = fopen("out/artistAliases.load", "w+");
$fhGroups = fopen("out/artistGroups.load", "w+");
$fhMembers = fopen("out/artistMembers.load", "w+");
$fhUrls = fopen("out/artistUrls.load", "w+");
$fhProfiles = fopen("out/artistProfiles.load", "w+");
$xmlstream = "compress.zlib://$xmlfile";
$xmlfileSize = gzfilesize($xmlfile);
$streamer = new ArtistsXmlStreamer($xmlstream, $xmlfileSize);
if ($streamer->parse()) {
echo "Finished $cnt artists." . PHP_EOL;
} else {
echo "Couldn't find root node" . PHP_EOL;
}
fclose($fh);
fclose($fhVariations);
fclose($fhAlias);
fclose($fhGroups);
fclose($fhMembers);
fclose($fhUrls);
fclose($fhProfiles);