Rev 18 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?phprequire_once('php/XmlStreamer.php');require_once('php/tools.php');require_once('php/clsLibGTIN.php');ini_set("memory_limit", "256M");$cnt = 0;class ReleasesXmlStreamer extends XmlStreamer{public function processNode($xmlString, $elementName, $nodeIndex){global $cnt;global $fh;global $fhTracks;global $fhArtists;global $fhLabels;global $fhBarcodes;$xml = simplexml_load_string($xmlString, 'simple_xml_extended');if ($elementName == 'release') {++$cnt;$id = $xml->Attribute('id');$master_id = !empty($xml->master_id) ? $xml->master_id : 0;$title = $xml->title;$country = $xml->country;// bugbug $notes = $xml->notes;$released = substr($xml->released, 0, 4);if (empty($released) || !is_numeric($released)) {$released = 'NULL';}$identifiers = [];$barcode = "";if (isset($xml->identifiers)) {foreach ($xml->identifiers->identifier as $n) {if ($n->Attribute('type') == 'Barcode') {$barcodeNumbers = preg_replace('/[^0-9]/', '', $n->Attribute('value'));if (clsLibGTIN::GTINCheck($barcodeNumbers, false, 1) !== false) {$barcode = $barcodeNumbers;}}}}$trackStr = "";if (isset($xml->tracklist)) {$trackStr .= '<ul class="small list-unstyled">';foreach ($xml->tracklist->track as $track) {if (isset($track->sub_tracks)) {if ((isset($track->position) && !empty($track->position)) || (isset($track->duration) && !empty($track->duration))) {$trackStr .= '<span class="font-italic">';$trackStr .= processTrack($track, true);$trackStr .= '</span>';}foreach ($track->sub_tracks->track as $subtrack) {$trackStr .= '<ul class="list-unstyled">';$trackStr .= processTrack($subtrack, true);$trackStr .= "</ul>";}} else {if (isset($track->position) && empty($track->position) && isset($track->duration) && empty($track->duration)) {$trackStr .= "<li class=\"font-weight-bold\">" . $track->title . "</li>";} else {$trackStr .= processTrack($track, true);}}}$trackStr .= "</ul>";}$trackStr = rtrim($trackStr);if (!empty($trackStr)) {fputcsv2($fhTracks, array($id, $trackStr), ',', '"');}$formats = [];$quantity = 1;$formatName = "";$formatExt = "";if (isset($xml->formats)) {if ($xml->formats->format->count() == 1) {if ($xml->formats->format->Attribute('qty') < 1000) {$quantity = $xml->formats->format->Attribute('qty');}$formatName = $xml->formats->format->Attribute('name');$formatExt = $xml->formats->format->Attribute('text');if (isset($xml->formats->format->descriptions)) {foreach ($xml->formats->format->descriptions->description as $description) {$formats[] = $description;}}} else {$quantity = 0;$formatName = "Various";$formatExt = "";foreach ($xml->formats->format as $format) {$tempQuantity = $format->Attribute('qty');$quantity += $tempQuantity;$tempName = $format->Attribute('name');$tempText = $format->Attribute('text');$str = "";if (!empty($tempQuantity)) {$str .= $tempQuantity . " x ";}if (!empty($tempName)) {$str .= $tempName;}if (!empty($tempText)) {$str .= " " . $tempText;}if (!empty($str)) {$formats[] = $str;}if (isset($format->descriptions)) {foreach ($format->descriptions->description as $description) {$formats[] = $description;}}}}}if (isset($xml->labels)) {foreach ($xml->labels->label as $label) {fputcsv2($fhLabels, array($id, $label->Attribute('id'), $label->Attribute('catno')), ',', '"');}}$genres = [];if (isset($xml->genres)) {foreach ($xml->genres->genre as $n) {$genres[] = $n;}}$styles = [];if (isset($xml->styles)) {foreach ($xml->styles->style as $n) {$styles[] = $n;}}if (isset($xml->artists)) {foreach ($xml->artists->artist as $n) {fputcsv2($fhArtists, array($id, $n->id), ',', '"');}}if (!empty($barcode)) {$arr = array($id,$master_id,$country,$released,$barcode,$quantity,$formatName,$formatExt,);fputcsv2($fhBarcodes, $arr, ',', '"');}$arr = array($id,$master_id,$title,$country,$released,$barcode,$quantity,$formatName,$formatExt,join(", ", $formats),join(", ", $genres),join(", ", $styles));fputcsv2($fh, $arr, ',', '"');}return true;}}$xmlfile = "./in/discogs_20190701_releases.xml.gz";//$xmlfile = "./in/releases.xml.gz";$fh = fopen("out/releases.load", "w+");$fhTracks = fopen("out/releaseTracks.load", "w+");$fhArtists = fopen("out/releaseArtists.load", "w+");$fhLabels = fopen("out/releaseLabels.load", "w+");$fhBarcodes = fopen("out/barcodes.load", "w+");$xmlstream = "compress.zlib://$xmlfile";// zcat in/discogs_CCYYMMDD_releases.xml.gz | wc -c$xmlfileSize = 45602358358;//$xmlfileSize = 9524162;$streamer = new ReleasesXmlStreamer($xmlstream, $xmlfileSize);if ($streamer->parse()) {echo "Finished $cnt releases." . PHP_EOL;} else {echo "Couldn't find root node" . PHP_EOL;}fclose($fh);fclose($fhTracks);fclose($fhArtists);fclose($fhLabels);function processTrack($track, $posFlag){$str = "<li>";if ($posFlag && !empty($track->position)) {if (!preg_match("/^[a-zA-Z][0-9]/", $track->position) && !preg_match("/^[a-zA-Z]$/", $track->position)) {$str .= $track->{'position'} . '. ';}}$str .= $track->title;$trackArtists = [];if (isset($track->artists)) {foreach ($track->artists->artist as $artist) {$trackArtists[] = trim(preg_replace('/\([0-9]+\)$/', "", $artist->name));}if (count($trackArtists)) {$str .= " - " . join(", ", $trackArtists);}}if (!empty($track->duration)) {$str .= " [" . $track->duration . "]";}$str .= "</li>";return $str;}