Subversion Repositories cheapmusic

Rev

Rev 58 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
18 - 1
<?php
2
require_once('php/XmlStreamer.php');
3
require_once('php/tools.php');
4
require_once('php/clsLibGTIN.php');
78 - 5
require_once('php/constants.php');
18 - 6
ini_set("memory_limit", "256M");
7
 
8
$cnt = 0;
9
 
10
class ReleasesXmlStreamer extends XmlStreamer
11
{
12
	public function processNode($xmlString, $elementName, $nodeIndex)
13
	{
14
		global $cnt;
15
		global $fh;
16
		global $fhTracks;
17
		global $fhArtists;
18
		global $fhLabels;
44 - 19
		global $fhBarcodes;
18 - 20
 
21
		$xml = simplexml_load_string($xmlString, 'simple_xml_extended');
22
 
23
		if ($elementName == 'release') {
24
			++$cnt;
25
 
26
			$id = $xml->Attribute('id');
27
			$master_id = !empty($xml->master_id) ? $xml->master_id : 0;
28
			$title = $xml->title;
29
			$country = $xml->country;
30
// bugbug		$notes = $xml->notes;
31
			$released = substr($xml->released, 0, 4);
44 - 32
			if (empty($released) || !is_numeric($released)) {
33
				$released = 'NULL';
18 - 34
			}
35
 
36
			$identifiers = [];
37
			$barcode = "";
38
			if (isset($xml->identifiers)) {
39
				foreach ($xml->identifiers->identifier as $n) {
40
					if ($n->Attribute('type') == 'Barcode') {
41
						$barcodeNumbers = preg_replace('/[^0-9]/', '', $n->Attribute('value'));
42
						if (clsLibGTIN::GTINCheck($barcodeNumbers, false, 1) !== false) {
43
							$barcode = $barcodeNumbers;
44
						}
45
					}
46
				}
47
			}
48
 
49
			$trackStr = "";
50
			if (isset($xml->tracklist)) {
51
				$trackStr .= '<ul class="small list-unstyled">';
52
				foreach ($xml->tracklist->track as $track) {
53
					if (isset($track->sub_tracks)) {
54
						if ((isset($track->position) && !empty($track->position)) || (isset($track->duration) && !empty($track->duration))) {
55
							$trackStr .= '<span class="font-italic">';
56
							$trackStr .= processTrack($track, true);
57
							$trackStr .= '</span>';
58
						}
59
						foreach ($track->sub_tracks->track as $subtrack) {
60
							$trackStr .= '<ul class="list-unstyled">';
61
							$trackStr .= processTrack($subtrack, true);
62
							$trackStr .= "</ul>";
63
						}
64
					} else {
65
						if (isset($track->position) && empty($track->position) && isset($track->duration) && empty($track->duration)) {
66
							$trackStr .= "<li class=\"font-weight-bold\">" . $track->title . "</li>";
67
						} else {
68
							$trackStr .= processTrack($track, true);
69
						}
70
					}
71
				}
72
				$trackStr .= "</ul>";
73
			}
74
			$trackStr = rtrim($trackStr);
75
			if (!empty($trackStr)) {
44 - 76
				fputcsv2($fhTracks, array($id, $trackStr), ',', '"');
18 - 77
			}
78
 
79
			$formats = [];
80
			$quantity = 1;
81
			$formatName = "";
44 - 82
			$formatExt = "";
18 - 83
			if (isset($xml->formats)) {
84
				if ($xml->formats->format->count() == 1) {
44 - 85
					if ($xml->formats->format->Attribute('qty') < 1000) {
86
						$quantity = $xml->formats->format->Attribute('qty');
87
					}
18 - 88
					$formatName = $xml->formats->format->Attribute('name');
44 - 89
					$formatExt = $xml->formats->format->Attribute('text');
18 - 90
					if (isset($xml->formats->format->descriptions)) {
91
						foreach ($xml->formats->format->descriptions->description as $description) {
92
							$formats[] = $description;
93
						}
94
					}
95
				} else {
96
					$quantity = 0;
97
					$formatName = "Various";
44 - 98
					$formatExt = "";
18 - 99
					foreach ($xml->formats->format as $format) {
100
						$tempQuantity = $format->Attribute('qty');
101
						$quantity += $tempQuantity;
102
						$tempName = $format->Attribute('name');
103
						$tempText = $format->Attribute('text');
104
						$str = "";
105
						if (!empty($tempQuantity)) {
106
							$str .= $tempQuantity . " x ";
107
						}
108
						if (!empty($tempName)) {
109
							$str .= $tempName;
110
						}
111
						if (!empty($tempText)) {
112
							$str .= " " . $tempText;
113
						}
114
 
115
						if (!empty($str)) {
116
							$formats[] = $str;
117
						}
118
 
119
						if (isset($format->descriptions)) {
120
							foreach ($format->descriptions->description as $description) {
121
								$formats[] = $description;
122
							}
123
						}
124
					}
125
				}
126
			}
127
 
128
			if (isset($xml->labels)) {
129
				foreach ($xml->labels->label as $label) {
44 - 130
					fputcsv2($fhLabels, array($id, $label->Attribute('id'), $label->Attribute('catno')), ',', '"');
18 - 131
				}
132
			}
133
 
134
			$genres = [];
135
			if (isset($xml->genres)) {
136
				foreach ($xml->genres->genre as $n) {
137
					$genres[] = $n;
138
				}
139
			}
140
 
141
			$styles = [];
142
			if (isset($xml->styles)) {
143
				foreach ($xml->styles->style as $n) {
144
					$styles[] = $n;
145
				}
146
			}
147
 
148
			if (isset($xml->artists)) {
149
				foreach ($xml->artists->artist as $n) {
44 - 150
					fputcsv2($fhArtists, array($id, $n->id), ',', '"');
18 - 151
				}
152
			}
153
 
44 - 154
            if (!empty($barcode)) {
155
    			$arr = array(
156
    				$id,
157
    				$master_id,
158
    				$country,
159
    				$released,
160
    				$barcode,
161
    				$quantity,
162
    				$formatName,
163
    				$formatExt,
164
    			);
165
 
166
    			fputcsv2($fhBarcodes, $arr, ',', '"');
167
            }
168
 
18 - 169
			$arr = array(
170
				$id,
171
				$master_id,
172
				$title,
173
				$country,
174
				$released,
175
				$barcode,
176
				$quantity,
177
				$formatName,
44 - 178
				$formatExt,
18 - 179
				join(", ", $formats),
180
				join(", ", $genres),
181
				join(", ", $styles)
182
			);
183
 
44 - 184
			fputcsv2($fh, $arr, ',', '"');
18 - 185
		}
186
 
187
		return true;
188
	}
189
}
190
 
78 - 191
$xmlfile = "./in/discogs_" . XMLFILEDATE . "_releases.xml.gz";
18 - 192
//$xmlfile = "./in/releases.xml.gz";
193
 
194
$fh = fopen("out/releases.load", "w+");
195
$fhTracks = fopen("out/releaseTracks.load", "w+");
196
$fhArtists = fopen("out/releaseArtists.load", "w+");
197
$fhLabels = fopen("out/releaseLabels.load", "w+");
44 - 198
$fhBarcodes = fopen("out/barcodes.load", "w+");
18 - 199
 
200
$xmlstream = "compress.zlib://$xmlfile";
201
// zcat in/discogs_CCYYMMDD_releases.xml.gz | wc -c
78 - 202
$xmlfileSize = RELEASEXMLFILESIZE;
18 - 203
//$xmlfileSize = 9524162;
204
 
205
$streamer = new ReleasesXmlStreamer($xmlstream, $xmlfileSize);
206
if ($streamer->parse()) {
207
	echo "Finished $cnt releases." . PHP_EOL;
208
} else {
209
	echo "Couldn't find root node" . PHP_EOL;
210
}
211
 
212
fclose($fh);
213
fclose($fhTracks);
214
fclose($fhArtists);
215
fclose($fhLabels);
216
 
217
function processTrack($track, $posFlag)
218
{
219
	$str = "<li>";
220
	if ($posFlag && !empty($track->position)) {
221
		if (!preg_match("/^[a-zA-Z][0-9]/", $track->position) && !preg_match("/^[a-zA-Z]$/", $track->position)) {
222
			$str .= $track->{'position'} . '. ';
223
		}
224
	}
225
 
226
	$str .= $track->title;
227
 
228
	$trackArtists = [];
229
	if (isset($track->artists)) {
230
		foreach ($track->artists->artist as $artist) {
231
			$trackArtists[] = trim(preg_replace('/\([0-9]+\)$/', "", $artist->name));
232
		}
233
		if (count($trackArtists)) {
234
			$str .= " - " . join(", ", $trackArtists);
235
		}
236
	}
237
 
238
	if (!empty($track->duration)) {
239
		$str .= " [" . $track->duration . "]";
240
	}
241
 
242
	$str .= "</li>";
243
 
244
	return $str;
245
}