Rev 18 | Rev 58 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?php
require_once('php/XmlStreamer.php');
require_once('php/tools.php');
require_once('php/clsLibGTIN.php');
ini_set("memory_limit", "256M");
$cnt = 0;
class ReleasesXmlStreamer extends XmlStreamer
{
public function processNode($xmlString, $elementName, $nodeIndex)
{
global $cnt;
global $fh;
global $fhTracks;
global $fhArtists;
global $fhLabels;
global $fhBarcodes;
$xml = simplexml_load_string($xmlString, 'simple_xml_extended');
if ($elementName == 'release') {
++$cnt;
$id = $xml->Attribute('id');
$master_id = !empty($xml->master_id) ? $xml->master_id : 0;
$title = $xml->title;
$country = $xml->country;
// bugbug $notes = $xml->notes;
$released = substr($xml->released, 0, 4);
if (empty($released) || !is_numeric($released)) {
$released = 'NULL';
}
$identifiers = [];
$barcode = "";
if (isset($xml->identifiers)) {
foreach ($xml->identifiers->identifier as $n) {
if ($n->Attribute('type') == 'Barcode') {
$barcodeNumbers = preg_replace('/[^0-9]/', '', $n->Attribute('value'));
if (clsLibGTIN::GTINCheck($barcodeNumbers, false, 1) !== false) {
$barcode = $barcodeNumbers;
}
}
}
}
$trackStr = "";
if (isset($xml->tracklist)) {
$trackStr .= '<ul class="small list-unstyled">';
foreach ($xml->tracklist->track as $track) {
if (isset($track->sub_tracks)) {
if ((isset($track->position) && !empty($track->position)) || (isset($track->duration) && !empty($track->duration))) {
$trackStr .= '<span class="font-italic">';
$trackStr .= processTrack($track, true);
$trackStr .= '</span>';
}
foreach ($track->sub_tracks->track as $subtrack) {
$trackStr .= '<ul class="list-unstyled">';
$trackStr .= processTrack($subtrack, true);
$trackStr .= "</ul>";
}
} else {
if (isset($track->position) && empty($track->position) && isset($track->duration) && empty($track->duration)) {
$trackStr .= "<li class=\"font-weight-bold\">" . $track->title . "</li>";
} else {
$trackStr .= processTrack($track, true);
}
}
}
$trackStr .= "</ul>";
}
$trackStr = rtrim($trackStr);
if (!empty($trackStr)) {
fputcsv2($fhTracks, array($id, $trackStr), ',', '"');
}
$formats = [];
$quantity = 1;
$formatName = "";
$formatExt = "";
if (isset($xml->formats)) {
if ($xml->formats->format->count() == 1) {
if ($xml->formats->format->Attribute('qty') < 1000) {
$quantity = $xml->formats->format->Attribute('qty');
}
$formatName = $xml->formats->format->Attribute('name');
$formatExt = $xml->formats->format->Attribute('text');
if (isset($xml->formats->format->descriptions)) {
foreach ($xml->formats->format->descriptions->description as $description) {
$formats[] = $description;
}
}
} else {
$quantity = 0;
$formatName = "Various";
$formatExt = "";
foreach ($xml->formats->format as $format) {
$tempQuantity = $format->Attribute('qty');
$quantity += $tempQuantity;
$tempName = $format->Attribute('name');
$tempText = $format->Attribute('text');
$str = "";
if (!empty($tempQuantity)) {
$str .= $tempQuantity . " x ";
}
if (!empty($tempName)) {
$str .= $tempName;
}
if (!empty($tempText)) {
$str .= " " . $tempText;
}
if (!empty($str)) {
$formats[] = $str;
}
if (isset($format->descriptions)) {
foreach ($format->descriptions->description as $description) {
$formats[] = $description;
}
}
}
}
}
if (isset($xml->labels)) {
foreach ($xml->labels->label as $label) {
fputcsv2($fhLabels, array($id, $label->Attribute('id'), $label->Attribute('catno')), ',', '"');
}
}
$genres = [];
if (isset($xml->genres)) {
foreach ($xml->genres->genre as $n) {
$genres[] = $n;
}
}
$styles = [];
if (isset($xml->styles)) {
foreach ($xml->styles->style as $n) {
$styles[] = $n;
}
}
if (isset($xml->artists)) {
foreach ($xml->artists->artist as $n) {
fputcsv2($fhArtists, array($id, $n->id), ',', '"');
}
}
if (!empty($barcode)) {
$arr = array(
$id,
$master_id,
$country,
$released,
$barcode,
$quantity,
$formatName,
$formatExt,
);
fputcsv2($fhBarcodes, $arr, ',', '"');
}
$arr = array(
$id,
$master_id,
$title,
$country,
$released,
$barcode,
$quantity,
$formatName,
$formatExt,
join(", ", $formats),
join(", ", $genres),
join(", ", $styles)
);
fputcsv2($fh, $arr, ',', '"');
}
return true;
}
}
$xmlfile = "./in/discogs_20190701_releases.xml.gz";
//$xmlfile = "./in/releases.xml.gz";
$fh = fopen("out/releases.load", "w+");
$fhTracks = fopen("out/releaseTracks.load", "w+");
$fhArtists = fopen("out/releaseArtists.load", "w+");
$fhLabels = fopen("out/releaseLabels.load", "w+");
$fhBarcodes = fopen("out/barcodes.load", "w+");
$xmlstream = "compress.zlib://$xmlfile";
// zcat in/discogs_CCYYMMDD_releases.xml.gz | wc -c
$xmlfileSize = 45602358358;
//$xmlfileSize = 9524162;
$streamer = new ReleasesXmlStreamer($xmlstream, $xmlfileSize);
if ($streamer->parse()) {
echo "Finished $cnt releases." . PHP_EOL;
} else {
echo "Couldn't find root node" . PHP_EOL;
}
fclose($fh);
fclose($fhTracks);
fclose($fhArtists);
fclose($fhLabels);
function processTrack($track, $posFlag)
{
$str = "<li>";
if ($posFlag && !empty($track->position)) {
if (!preg_match("/^[a-zA-Z][0-9]/", $track->position) && !preg_match("/^[a-zA-Z]$/", $track->position)) {
$str .= $track->{'position'} . '. ';
}
}
$str .= $track->title;
$trackArtists = [];
if (isset($track->artists)) {
foreach ($track->artists->artist as $artist) {
$trackArtists[] = trim(preg_replace('/\([0-9]+\)$/', "", $artist->name));
}
if (count($trackArtists)) {
$str .= " - " . join(", ", $trackArtists);
}
}
if (!empty($track->duration)) {
$str .= " [" . $track->duration . "]";
}
$str .= "</li>";
return $str;
}