| 91 |
- |
1 |
<?php
|
|
|
2 |
error_reporting(E_ALL);
|
|
|
3 |
|
|
|
4 |
// Get itunes listings
|
|
|
5 |
function get_amazon_scrape($query, $searchCondition) {
|
|
|
6 |
$arr = [];
|
|
|
7 |
$products = [];
|
|
|
8 |
|
|
|
9 |
libxml_use_internal_errors(true);
|
|
|
10 |
$html = getUrl("https://www.amazon.com/s?k=" . rawurlencode($query) . "&sf=qz&unfiltered=1&ref=nb_sb_noss");
|
|
|
11 |
$dom = new DOMDocument;
|
|
|
12 |
$dom->loadHTML($html);
|
|
|
13 |
$xpath = new DOMXPath($dom);
|
|
|
14 |
$nodes = $xpath->query('//a/@href');
|
|
|
15 |
foreach($nodes as $href) {
|
|
|
16 |
if (strpos($href->nodeValue, "/gp/offer-listing/B") === 0) {
|
|
|
17 |
$products[] = $href->nodeValue;
|
|
|
18 |
}
|
|
|
19 |
}
|
|
|
20 |
|
|
|
21 |
$productCnt = 0;
|
|
|
22 |
foreach($products as $product) {
|
|
|
23 |
$url = "https://www.amazon.com" . $product . "&tag=uj024-20&language=en_US";
|
|
|
24 |
$asin = explode('/', $product)[3];
|
|
|
25 |
|
|
|
26 |
$html = getUrl("https://www.amazon.com/dp/" . $asin, "Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/70.0");
|
|
|
27 |
|
|
|
28 |
$dom = new DOMDocument;
|
|
|
29 |
$dom->loadHTML($html);
|
|
|
30 |
$xpath = new DOMXPath($dom);
|
|
|
31 |
|
|
|
32 |
$nodes = $xpath->query('//table[@id="productDetailsTable"]//ul/li');
|
|
|
33 |
if ($nodes->length < 1 || ++$productCnt > 5) {
|
|
|
34 |
continue;
|
|
|
35 |
}
|
|
|
36 |
$str = trim($nodes->item(0)->nodeValue);
|
|
|
37 |
$p = strpos($str, " (");
|
|
|
38 |
$format = ($p > 0 ? substr($str, 0, $p) : $str);
|
|
|
39 |
foreach($nodes as $node) {
|
|
|
40 |
$str = trim($node->nodeValue);
|
|
|
41 |
if (strpos($str, "Amazon Best Sellers Rank:") === 0) {
|
|
|
42 |
$p = strpos($str, " (");
|
|
|
43 |
$rank = substr($str, 11, $p-11);
|
|
|
44 |
}
|
|
|
45 |
}
|
|
|
46 |
|
|
|
47 |
if (strpos($format, "Audio CD") === 0 ||
|
|
|
48 |
strpos($format, "Vinyl") === 0 ||
|
|
|
49 |
strpos($format, "Hardcover") === 0 ||
|
|
|
50 |
strpos($format, "Paperback") === 0) {
|
|
|
51 |
if (strpos($format, "Audio CD") !== false) {
|
|
|
52 |
$mediaType = "CD";
|
|
|
53 |
} else if (strpos($format, "Vinyl") !== false) {
|
|
|
54 |
$mediaType = "Record";
|
|
|
55 |
} else if (strpos($format, "Paperback") !== false ||
|
|
|
56 |
strpos($format, "Sheet") !== false ||
|
|
|
57 |
strpos($format, "Hardcover") !== false) {
|
|
|
58 |
$mediaType = "Book";
|
|
|
59 |
}
|
|
|
60 |
|
|
|
61 |
$str = "https://www.amazon.com" . $product;
|
|
|
62 |
$html = getUrl($str, "Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/70.0");
|
|
|
63 |
|
|
|
64 |
$dom = new DOMDocument;
|
|
|
65 |
$dom->loadHTML($html);
|
|
|
66 |
$xpath = new DOMXPath($dom);
|
|
|
67 |
|
|
|
68 |
$nodes = $xpath->query('//div[@id="olpProductImage"]//img');
|
|
|
69 |
$pic = $nodes->item(0)->getAttribute("src");
|
|
|
70 |
|
|
|
71 |
$nodes = $xpath->query('//div[@id="olpProductDetails"]/h1');
|
|
|
72 |
$title = trim($nodes->item(0)->nodeValue);
|
|
|
73 |
|
|
|
74 |
$nodes = $xpath->query('//div[@id="olpProductByline"]');
|
|
|
75 |
if ($nodes->length > 0) {
|
|
|
76 |
$artists = trim($nodes->item(0)->nodeValue);
|
|
|
77 |
$title .= " " . $artists;
|
|
|
78 |
}
|
|
|
79 |
|
|
|
80 |
$listings = $xpath->query('//div[contains(concat(" ", normalize-space(@class), " "), " olpOffer ")]');
|
|
|
81 |
|
|
|
82 |
foreach($listings as $listing) {
|
|
|
83 |
$nodes = $xpath->query('.//h3[contains(concat(" ", normalize-space(@class), " "), " olpSellerName ")]', $listing);
|
|
|
84 |
$str = trim($nodes->item(0)->nodeValue);
|
|
|
85 |
$sellerName = (empty($str) ? "Amazon" : $str);
|
|
|
86 |
$merchantName = "Amazon";
|
|
|
87 |
$feedbackPercent = -1;
|
|
|
88 |
$feedbackScore = -1;
|
|
|
89 |
|
|
|
90 |
if ($sellerName != "Amazon") {
|
|
|
91 |
$merchantName .= " Marketplace";
|
|
|
92 |
$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpSellerColumn ")]//p', $listing);
|
|
|
93 |
if ($nodes->length > 0) {
|
|
|
94 |
$str = trim($nodes->item(0)->nodeValue);
|
|
|
95 |
$sellerrating = substr($str, 17);
|
|
|
96 |
$num = preg_match_all('/((?:[0-9]+,)*[0-9]+(?:\.[0-9]+)?)/', $sellerrating, $matches);
|
|
|
97 |
if ($num == 3) {
|
|
|
98 |
$feedbackPercent = (int)$matches[0][0];
|
|
|
99 |
$feedbackScore = (int)str_replace( ',', '', $matches[0][2]);
|
|
|
100 |
}
|
|
|
101 |
}
|
|
|
102 |
}
|
|
|
103 |
|
|
|
104 |
$nodes = $xpath->query('.//span[contains(concat(" ", normalize-space(@class), " "), " olpCondition ")]', $listing);
|
|
|
105 |
$str = trim($nodes->item(0)->nodeValue);
|
|
|
106 |
$pos = strpos($str, " - ");
|
|
|
107 |
if ($pos !== false) {
|
|
|
108 |
$condition = trim(substr($str, 0, $pos));
|
|
|
109 |
$detailCondition = trim(substr($str, $pos+3));
|
|
|
110 |
} else {
|
|
|
111 |
$condition = $str;
|
|
|
112 |
$detailCondition = $str;
|
|
|
113 |
}
|
|
|
114 |
if ($condition == "Collectible" || $condition == "Refurbished") {
|
|
|
115 |
$condition = 'Used';
|
|
|
116 |
}
|
|
|
117 |
|
|
|
118 |
$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpConditionColumn ")]//div[contains(concat(" ", normalize-space(@class), " "), " comments ")]', $listing);
|
|
|
119 |
if ($nodes->length > 0) {
|
|
|
120 |
$conditionComment = trim($nodes->item(0)->nodeValue);
|
|
|
121 |
}
|
|
|
122 |
|
|
|
123 |
$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//span[contains(concat(" ", normalize-space(@class), " "), " olpOfferPrice ")]', $listing);
|
|
|
124 |
$price = substr(trim($nodes->item(0)->nodeValue), 1);
|
|
|
125 |
$currency = 'USD';
|
|
|
126 |
|
|
|
127 |
$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//span[contains(concat(" ", normalize-space(@class), " "), " olpShippingPrice ")]', $listing);
|
|
|
128 |
if ($nodes->length > 0) {
|
|
|
129 |
$shippingCost = substr(trim($nodes->item(0)->nodeValue), 1);
|
|
|
130 |
$shippingCurrency = 'USD';
|
|
|
131 |
$freeShippingCap = 0;
|
|
|
132 |
} else {
|
|
|
133 |
$shippingCost = 0.00;
|
|
|
134 |
$shippingCurrency = 'USD';
|
|
|
135 |
$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//p[contains(concat(" ", normalize-space(@class), " "), " olpShippingInfo ")]', $listing);
|
|
|
136 |
$str= trim($nodes->item(0)->nodeValue);
|
|
|
137 |
if (strpos($str, "FREE Shipping") !== false) {
|
|
|
138 |
$freeShippingCap = 0.00;
|
|
|
139 |
}
|
|
|
140 |
if (strpos($str, "on orders over") !== false) {
|
|
|
141 |
$freeShippingCap = 25.00;
|
|
|
142 |
}
|
|
|
143 |
}
|
|
|
144 |
|
|
|
145 |
$country = 'US';
|
|
|
146 |
$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpDeliveryColumn ")]//ul/li', $listing);
|
|
|
147 |
foreach($nodes as $node) {
|
|
|
148 |
$str = trim($node->nodeValue);
|
|
|
149 |
if (strpos($str, "Ships from") === 0) {
|
|
|
150 |
$p = strpos($str, ".");
|
|
|
151 |
$country = getCountryCode(substr($str, 11, $p-11));
|
|
|
152 |
}
|
|
|
153 |
}
|
|
|
154 |
|
|
|
155 |
$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//i[contains(concat(" ", normalize-space(@class), " "), " a-icon-prime ")]', $listing);
|
|
|
156 |
if ($nodes->length > 0) {
|
|
|
157 |
$sellerName .= " Prime";
|
|
|
158 |
}
|
|
|
159 |
|
|
|
160 |
$arr[] = array(
|
|
|
161 |
"Merchant" => $merchantName,
|
|
|
162 |
"Condition" => $condition,
|
|
|
163 |
"Title" => $title,
|
|
|
164 |
"Barcode" => "",
|
|
|
165 |
"BarcodeType" => "",
|
|
|
166 |
"Image" => $pic,
|
|
|
167 |
"URL" => $url,
|
|
|
168 |
"MediaType" => $mediaType,
|
|
|
169 |
"DetailCondition" => $detailCondition,
|
|
|
170 |
"Country" => $country,
|
|
|
171 |
"BestOffer" => false,
|
|
|
172 |
"TimeLeft" => 0,
|
|
|
173 |
"Price" => $price,
|
|
|
174 |
"Currency" => $currency,
|
|
|
175 |
"ListingType" => "Fixed",
|
|
|
176 |
"Location" => "US",
|
|
|
177 |
"Zip" => "",
|
|
|
178 |
"FeedbackScore" => $feedbackScore,
|
|
|
179 |
"FeedbackPercent" => $feedbackPercent,
|
|
|
180 |
"SellerName" => $sellerName,
|
|
|
181 |
"HandlingTime" => 1,
|
|
|
182 |
"ShippingCost" => $shippingCost,
|
|
|
183 |
"ShippingEstimated" => false,
|
|
|
184 |
"ShippingCurrency" => $shippingCurrency,
|
|
|
185 |
"FreeShippingCap" => $freeShippingCap,
|
|
|
186 |
"Show" => true
|
|
|
187 |
);
|
|
|
188 |
|
|
|
189 |
}
|
|
|
190 |
}
|
|
|
191 |
|
|
|
192 |
}
|
|
|
193 |
|
|
|
194 |
return ($arr);
|
|
|
195 |
}
|