Subversion Repositories cheapmusic

Rev

Rev 44 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
18 - 1
<?php
2
require_once('php/XmlStreamer.php');
3
require_once('php/tools.php');
4
require_once('php/clsLibGTIN.php');
5
ini_set("memory_limit", "256M");
6
 
7
$cnt = 0;
8
 
9
class ReleasesXmlStreamer extends XmlStreamer
10
{
11
	public function processNode($xmlString, $elementName, $nodeIndex)
12
	{
13
		global $cnt;
14
		global $fh;
15
		global $fhTracks;
16
		global $fhArtists;
17
		global $fhLabels;
18
 
19
		$xml = simplexml_load_string($xmlString, 'simple_xml_extended');
20
 
21
		if ($elementName == 'release') {
22
			++$cnt;
23
 
24
			$id = $xml->Attribute('id');
25
			$master_id = !empty($xml->master_id) ? $xml->master_id : 0;
26
			$title = $xml->title;
27
			$country = $xml->country;
28
// bugbug		$notes = $xml->notes;
29
			$released = substr($xml->released, 0, 4);
30
			if (empty($released)) {
31
				$released = 0;
32
			}
33
 
34
			$identifiers = [];
35
			$barcode = "";
36
			if (isset($xml->identifiers)) {
37
				foreach ($xml->identifiers->identifier as $n) {
38
					if ($n->Attribute('type') == 'Barcode') {
39
						$barcodeNumbers = preg_replace('/[^0-9]/', '', $n->Attribute('value'));
40
						if (clsLibGTIN::GTINCheck($barcodeNumbers, false, 1) !== false) {
41
							$barcode = $barcodeNumbers;
42
						}
43
					}
44
				}
45
			}
46
 
47
			$trackStr = "";
48
			if (isset($xml->tracklist)) {
49
				$trackStr .= '<ul class="small list-unstyled">';
50
				foreach ($xml->tracklist->track as $track) {
51
					if (isset($track->sub_tracks)) {
52
						if ((isset($track->position) && !empty($track->position)) || (isset($track->duration) && !empty($track->duration))) {
53
							$trackStr .= '<span class="font-italic">';
54
							$trackStr .= processTrack($track, true);
55
							$trackStr .= '</span>';
56
						}
57
						foreach ($track->sub_tracks->track as $subtrack) {
58
							$trackStr .= '<ul class="list-unstyled">';
59
							$trackStr .= processTrack($subtrack, true);
60
							$trackStr .= "</ul>";
61
						}
62
					} else {
63
						if (isset($track->position) && empty($track->position) && isset($track->duration) && empty($track->duration)) {
64
							$trackStr .= "<li class=\"font-weight-bold\">" . $track->title . "</li>";
65
						} else {
66
							$trackStr .= processTrack($track, true);
67
						}
68
					}
69
				}
70
				$trackStr .= "</ul>";
71
			}
72
			$trackStr = rtrim($trackStr);
73
			if (!empty($trackStr)) {
74
				fputcsv2($fhTracks, array($id, $trackStr), ',', '"', true);
75
			}
76
 
77
			$formats = [];
78
			$quantity = 1;
79
			$formatName = "";
80
			$formatText = "";
81
			if (isset($xml->formats)) {
82
				if ($xml->formats->format->count() == 1) {
83
					$quantity = $xml->formats->format->Attribute('qty');
84
					$formatName = $xml->formats->format->Attribute('name');
85
					$formatText = $xml->formats->format->Attribute('text');
86
					if (isset($xml->formats->format->descriptions)) {
87
						foreach ($xml->formats->format->descriptions->description as $description) {
88
							$formats[] = $description;
89
						}
90
					}
91
				} else {
92
					$quantity = 0;
93
					$formatName = "Various";
94
					$formatText = "";
95
					foreach ($xml->formats->format as $format) {
96
						$tempQuantity = $format->Attribute('qty');
97
						$quantity += $tempQuantity;
98
						$tempName = $format->Attribute('name');
99
						$tempText = $format->Attribute('text');
100
						$str = "";
101
						if (!empty($tempQuantity)) {
102
							$str .= $tempQuantity . " x ";
103
						}
104
						if (!empty($tempName)) {
105
							$str .= $tempName;
106
						}
107
						if (!empty($tempText)) {
108
							$str .= " " . $tempText;
109
						}
110
 
111
						if (!empty($str)) {
112
							$formats[] = $str;
113
						}
114
 
115
						if (isset($format->descriptions)) {
116
							foreach ($format->descriptions->description as $description) {
117
								$formats[] = $description;
118
							}
119
						}
120
					}
121
				}
122
			}
123
 
124
			if (isset($xml->labels)) {
125
				foreach ($xml->labels->label as $label) {
126
					fputcsv2($fhLabels, array($id, $label->Attribute('id'), $label->Attribute('catno')), ',', '"', true);
127
				}
128
			}
129
 
130
			$genres = [];
131
			if (isset($xml->genres)) {
132
				foreach ($xml->genres->genre as $n) {
133
					$genres[] = $n;
134
				}
135
			}
136
 
137
			$styles = [];
138
			if (isset($xml->styles)) {
139
				foreach ($xml->styles->style as $n) {
140
					$styles[] = $n;
141
				}
142
			}
143
 
144
			if (isset($xml->artists)) {
145
				foreach ($xml->artists->artist as $n) {
146
					fputcsv2($fhArtists, array($id, $n->id), ',', '"', true);
147
				}
148
			}
149
 
150
			$arr = array(
151
				$id,
152
				$master_id,
153
				$title,
154
				$country,
155
				$released,
156
				$barcode,
157
				$quantity,
158
				$formatName,
159
				$formatText,
160
				join(", ", $formats),
161
				join(", ", $genres),
162
				join(", ", $styles)
163
			);
164
 
165
			fputcsv2($fh, $arr, ',', '"', true);
166
		}
167
 
168
		return true;
169
	}
170
}
171
 
172
$xmlfile = "./in/discogs_20190601_releases.xml.gz";
173
//$xmlfile = "./in/releases.xml.gz";
174
 
175
$fh = fopen("out/releases.load", "w+");
176
$fhTracks = fopen("out/releaseTracks.load", "w+");
177
$fhArtists = fopen("out/releaseArtists.load", "w+");
178
$fhLabels = fopen("out/releaseLabels.load", "w+");
179
 
180
$xmlstream = "compress.zlib://$xmlfile";
181
// zcat in/discogs_CCYYMMDD_releases.xml.gz | wc -c
182
$xmlfileSize = 45092265603;
183
//$xmlfileSize = 9524162;
184
 
185
$streamer = new ReleasesXmlStreamer($xmlstream, $xmlfileSize);
186
if ($streamer->parse()) {
187
	echo "Finished $cnt releases." . PHP_EOL;
188
} else {
189
	echo "Couldn't find root node" . PHP_EOL;
190
}
191
 
192
fclose($fh);
193
fclose($fhTracks);
194
fclose($fhArtists);
195
fclose($fhLabels);
196
 
197
function processTrack($track, $posFlag)
198
{
199
	$str = "<li>";
200
	if ($posFlag && !empty($track->position)) {
201
		if (!preg_match("/^[a-zA-Z][0-9]/", $track->position) && !preg_match("/^[a-zA-Z]$/", $track->position)) {
202
			$str .= $track->{'position'} . '. ';
203
		}
204
	}
205
 
206
	$str .= $track->title;
207
 
208
	$trackArtists = [];
209
	if (isset($track->artists)) {
210
		foreach ($track->artists->artist as $artist) {
211
			$trackArtists[] = trim(preg_replace('/\([0-9]+\)$/', "", $artist->name));
212
		}
213
		if (count($trackArtists)) {
214
			$str .= " - " . join(", ", $trackArtists);
215
		}
216
	}
217
 
218
	if (!empty($track->duration)) {
219
		$str .= " [" . $track->duration . "]";
220
	}
221
 
222
	$str .= "</li>";
223
 
224
	return $str;
225
}