Subversion Repositories cheapmusic

Rev

Rev 44 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
18 - 1
<?php
2
require_once('php/XmlStreamer.php');
3
require_once('php/tools.php');
4
require_once('php/clsLibGTIN.php');
5
ini_set("memory_limit", "256M");
6
 
7
$cnt = 0;
8
 
9
class ReleasesXmlStreamer extends XmlStreamer
10
{
11
	public function processNode($xmlString, $elementName, $nodeIndex)
12
	{
13
		global $cnt;
14
		global $fh;
15
		global $fhTracks;
16
		global $fhArtists;
17
		global $fhLabels;
44 - 18
		global $fhBarcodes;
18 - 19
 
20
		$xml = simplexml_load_string($xmlString, 'simple_xml_extended');
21
 
22
		if ($elementName == 'release') {
23
			++$cnt;
24
 
25
			$id = $xml->Attribute('id');
26
			$master_id = !empty($xml->master_id) ? $xml->master_id : 0;
27
			$title = $xml->title;
28
			$country = $xml->country;
29
// bugbug		$notes = $xml->notes;
30
			$released = substr($xml->released, 0, 4);
44 - 31
			if (empty($released) || !is_numeric($released)) {
32
				$released = 'NULL';
18 - 33
			}
34
 
35
			$identifiers = [];
36
			$barcode = "";
37
			if (isset($xml->identifiers)) {
38
				foreach ($xml->identifiers->identifier as $n) {
39
					if ($n->Attribute('type') == 'Barcode') {
40
						$barcodeNumbers = preg_replace('/[^0-9]/', '', $n->Attribute('value'));
41
						if (clsLibGTIN::GTINCheck($barcodeNumbers, false, 1) !== false) {
42
							$barcode = $barcodeNumbers;
43
						}
44
					}
45
				}
46
			}
47
 
48
			$trackStr = "";
49
			if (isset($xml->tracklist)) {
50
				$trackStr .= '<ul class="small list-unstyled">';
51
				foreach ($xml->tracklist->track as $track) {
52
					if (isset($track->sub_tracks)) {
53
						if ((isset($track->position) && !empty($track->position)) || (isset($track->duration) && !empty($track->duration))) {
54
							$trackStr .= '<span class="font-italic">';
55
							$trackStr .= processTrack($track, true);
56
							$trackStr .= '</span>';
57
						}
58
						foreach ($track->sub_tracks->track as $subtrack) {
59
							$trackStr .= '<ul class="list-unstyled">';
60
							$trackStr .= processTrack($subtrack, true);
61
							$trackStr .= "</ul>";
62
						}
63
					} else {
64
						if (isset($track->position) && empty($track->position) && isset($track->duration) && empty($track->duration)) {
65
							$trackStr .= "<li class=\"font-weight-bold\">" . $track->title . "</li>";
66
						} else {
67
							$trackStr .= processTrack($track, true);
68
						}
69
					}
70
				}
71
				$trackStr .= "</ul>";
72
			}
73
			$trackStr = rtrim($trackStr);
74
			if (!empty($trackStr)) {
44 - 75
				fputcsv2($fhTracks, array($id, $trackStr), ',', '"');
18 - 76
			}
77
 
78
			$formats = [];
79
			$quantity = 1;
80
			$formatName = "";
44 - 81
			$formatExt = "";
18 - 82
			if (isset($xml->formats)) {
83
				if ($xml->formats->format->count() == 1) {
44 - 84
					if ($xml->formats->format->Attribute('qty') < 1000) {
85
						$quantity = $xml->formats->format->Attribute('qty');
86
					}
18 - 87
					$formatName = $xml->formats->format->Attribute('name');
44 - 88
					$formatExt = $xml->formats->format->Attribute('text');
18 - 89
					if (isset($xml->formats->format->descriptions)) {
90
						foreach ($xml->formats->format->descriptions->description as $description) {
91
							$formats[] = $description;
92
						}
93
					}
94
				} else {
95
					$quantity = 0;
96
					$formatName = "Various";
44 - 97
					$formatExt = "";
18 - 98
					foreach ($xml->formats->format as $format) {
99
						$tempQuantity = $format->Attribute('qty');
100
						$quantity += $tempQuantity;
101
						$tempName = $format->Attribute('name');
102
						$tempText = $format->Attribute('text');
103
						$str = "";
104
						if (!empty($tempQuantity)) {
105
							$str .= $tempQuantity . " x ";
106
						}
107
						if (!empty($tempName)) {
108
							$str .= $tempName;
109
						}
110
						if (!empty($tempText)) {
111
							$str .= " " . $tempText;
112
						}
113
 
114
						if (!empty($str)) {
115
							$formats[] = $str;
116
						}
117
 
118
						if (isset($format->descriptions)) {
119
							foreach ($format->descriptions->description as $description) {
120
								$formats[] = $description;
121
							}
122
						}
123
					}
124
				}
125
			}
126
 
127
			if (isset($xml->labels)) {
128
				foreach ($xml->labels->label as $label) {
44 - 129
					fputcsv2($fhLabels, array($id, $label->Attribute('id'), $label->Attribute('catno')), ',', '"');
18 - 130
				}
131
			}
132
 
133
			$genres = [];
134
			if (isset($xml->genres)) {
135
				foreach ($xml->genres->genre as $n) {
136
					$genres[] = $n;
137
				}
138
			}
139
 
140
			$styles = [];
141
			if (isset($xml->styles)) {
142
				foreach ($xml->styles->style as $n) {
143
					$styles[] = $n;
144
				}
145
			}
146
 
147
			if (isset($xml->artists)) {
148
				foreach ($xml->artists->artist as $n) {
44 - 149
					fputcsv2($fhArtists, array($id, $n->id), ',', '"');
18 - 150
				}
151
			}
152
 
44 - 153
            if (!empty($barcode)) {
154
    			$arr = array(
155
    				$id,
156
    				$master_id,
157
    				$country,
158
    				$released,
159
    				$barcode,
160
    				$quantity,
161
    				$formatName,
162
    				$formatExt,
163
    			);
164
 
165
    			fputcsv2($fhBarcodes, $arr, ',', '"');
166
            }
167
 
18 - 168
			$arr = array(
169
				$id,
170
				$master_id,
171
				$title,
172
				$country,
173
				$released,
174
				$barcode,
175
				$quantity,
176
				$formatName,
44 - 177
				$formatExt,
18 - 178
				join(", ", $formats),
179
				join(", ", $genres),
180
				join(", ", $styles)
181
			);
182
 
44 - 183
			fputcsv2($fh, $arr, ',', '"');
18 - 184
		}
185
 
186
		return true;
187
	}
188
}
189
 
58 - 190
$xmlfile = "./in/discogs_20190801_releases.xml.gz";
18 - 191
//$xmlfile = "./in/releases.xml.gz";
192
 
193
$fh = fopen("out/releases.load", "w+");
194
$fhTracks = fopen("out/releaseTracks.load", "w+");
195
$fhArtists = fopen("out/releaseArtists.load", "w+");
196
$fhLabels = fopen("out/releaseLabels.load", "w+");
44 - 197
$fhBarcodes = fopen("out/barcodes.load", "w+");
18 - 198
 
199
$xmlstream = "compress.zlib://$xmlfile";
200
// zcat in/discogs_CCYYMMDD_releases.xml.gz | wc -c
44 - 201
$xmlfileSize = 45602358358;
18 - 202
//$xmlfileSize = 9524162;
203
 
204
$streamer = new ReleasesXmlStreamer($xmlstream, $xmlfileSize);
205
if ($streamer->parse()) {
206
	echo "Finished $cnt releases." . PHP_EOL;
207
} else {
208
	echo "Couldn't find root node" . PHP_EOL;
209
}
210
 
211
fclose($fh);
212
fclose($fhTracks);
213
fclose($fhArtists);
214
fclose($fhLabels);
215
 
216
function processTrack($track, $posFlag)
217
{
218
	$str = "<li>";
219
	if ($posFlag && !empty($track->position)) {
220
		if (!preg_match("/^[a-zA-Z][0-9]/", $track->position) && !preg_match("/^[a-zA-Z]$/", $track->position)) {
221
			$str .= $track->{'position'} . '. ';
222
		}
223
	}
224
 
225
	$str .= $track->title;
226
 
227
	$trackArtists = [];
228
	if (isset($track->artists)) {
229
		foreach ($track->artists->artist as $artist) {
230
			$trackArtists[] = trim(preg_replace('/\([0-9]+\)$/', "", $artist->name));
231
		}
232
		if (count($trackArtists)) {
233
			$str .= " - " . join(", ", $trackArtists);
234
		}
235
	}
236
 
237
	if (!empty($track->duration)) {
238
		$str .= " [" . $track->duration . "]";
239
	}
240
 
241
	$str .= "</li>";
242
 
243
	return $str;
244
}