Rev 93 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?phperror_reporting(E_ALL);// Get itunes listingsfunction get_amazon_scrape($query, $searchCondition) {$arr = [];$products = [];libxml_use_internal_errors(true);$html = getUrl("https://www.amazon.com/s?k=" . rawurlencode($query) . "&sf=qz&unfiltered=1&ref=nb_sb_noss");$dom = new DOMDocument;$dom->loadHTML($html);$xpath = new DOMXPath($dom);$nodes = $xpath->query('//a/@href');foreach($nodes as $href) {if (strpos($href->nodeValue, "/gp/offer-listing/B") === 0) {$products[] = $href->nodeValue;}}$productCnt = 0;foreach($products as $product) {$url = "https://www.amazon.com" . $product . "&tag=uj024-20&language=en_US";$asin = explode('/', $product)[3];$html = getUrl("https://www.amazon.com/dp/" . $asin, "Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/70.0");$dom = new DOMDocument;$dom->loadHTML($html);$xpath = new DOMXPath($dom);$nodes = $xpath->query('//table[@id="productDetailsTable"]//ul/li');if ($nodes->length < 1 || ++$productCnt > 5) {continue;}$str = trim($nodes->item(0)->nodeValue);$p = strpos($str, " (");$format = ($p > 0 ? substr($str, 0, $p) : $str);foreach($nodes as $node) {$str = trim($node->nodeValue);if (strpos($str, "Amazon Best Sellers Rank:") === 0) {$p = strpos($str, " (");$rank = substr($str, 11, $p-11);}}if (strpos($format, "Audio CD") === 0 ||strpos($format, "Vinyl") === 0 ||strpos($format, "Hardcover") === 0 ||strpos($format, "Paperback") === 0) {if (strpos($format, "Audio CD") !== false) {$mediaType = "CD";} else if (strpos($format, "Vinyl") !== false) {$mediaType = "Record";} else if (strpos($format, "Paperback") !== false ||strpos($format, "Sheet") !== false ||strpos($format, "Hardcover") !== false) {$mediaType = "Book";}$str = "https://www.amazon.com" . $product;$html = getUrl($str, "Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/70.0");$dom = new DOMDocument;$dom->loadHTML($html);$xpath = new DOMXPath($dom);$nodes = $xpath->query('//div[@id="olpProductImage"]//img');$pic = $nodes->item(0)->getAttribute("src");$nodes = $xpath->query('//div[@id="olpProductDetails"]/h1');$title = trim($nodes->item(0)->nodeValue);$nodes = $xpath->query('//div[@id="olpProductByline"]');if ($nodes->length > 0) {$artists = trim($nodes->item(0)->nodeValue);$title .= " " . $artists;}$listings = $xpath->query('//div[contains(concat(" ", normalize-space(@class), " "), " olpOffer ")]');foreach($listings as $listing) {$nodes = $xpath->query('.//h3[contains(concat(" ", normalize-space(@class), " "), " olpSellerName ")]', $listing);$str = trim($nodes->item(0)->nodeValue);$sellerName = (empty($str) ? "Amazon" : $str);$merchantName = "Amazon";$feedbackPercent = -1;$feedbackScore = -1;if ($sellerName != "Amazon") {$merchantName .= " Marketplace";$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpSellerColumn ")]//p', $listing);if ($nodes->length > 0) {$str = trim($nodes->item(0)->nodeValue);$sellerrating = substr($str, 17);$num = preg_match_all('/((?:[0-9]+,)*[0-9]+(?:\.[0-9]+)?)/', $sellerrating, $matches);if ($num == 3) {$feedbackPercent = (int)$matches[0][0];$feedbackScore = (int)str_replace( ',', '', $matches[0][2]);}}}$nodes = $xpath->query('.//span[contains(concat(" ", normalize-space(@class), " "), " olpCondition ")]', $listing);$str = trim($nodes->item(0)->nodeValue);$pos = strpos($str, " - ");if ($pos !== false) {$condition = trim(substr($str, 0, $pos));$detailCondition = trim(substr($str, $pos+3));} else {$condition = $str;$detailCondition = $str;}if ($condition == "Collectible" || $condition == "Refurbished") {$condition = 'Used';}$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpConditionColumn ")]//div[contains(concat(" ", normalize-space(@class), " "), " comments ")]', $listing);if ($nodes->length > 0) {$conditionComment = trim($nodes->item(0)->nodeValue);}$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//span[contains(concat(" ", normalize-space(@class), " "), " olpOfferPrice ")]', $listing);$price = substr(trim($nodes->item(0)->nodeValue), 1);$currency = 'USD';$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//span[contains(concat(" ", normalize-space(@class), " "), " olpShippingPrice ")]', $listing);if ($nodes->length > 0) {$shippingCost = substr(trim($nodes->item(0)->nodeValue), 1);$shippingCurrency = 'USD';$freeShippingCap = 0;} else {$shippingCost = 0.00;$shippingCurrency = 'USD';$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//p[contains(concat(" ", normalize-space(@class), " "), " olpShippingInfo ")]', $listing);$str= trim($nodes->item(0)->nodeValue);if (strpos($str, "FREE Shipping") !== false) {$freeShippingCap = 0.00;}if (strpos($str, "on orders over") !== false) {$freeShippingCap = 25.00;}}$country = 'US';$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpDeliveryColumn ")]//ul/li', $listing);foreach($nodes as $node) {$str = trim($node->nodeValue);if (strpos($str, "Ships from") === 0) {$p = strpos($str, ".");$country = getCountryCode(substr($str, 11, $p-11));}}$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//i[contains(concat(" ", normalize-space(@class), " "), " a-icon-prime ")]', $listing);if ($nodes->length > 0) {$sellerName .= " Prime";}$arr[] = array("Merchant" => $merchantName,"Condition" => $condition,"Title" => $title,"Barcode" => "","BarcodeType" => "","Image" => $pic,"URL" => $url,"MediaType" => $mediaType,"DetailCondition" => $detailCondition,"Country" => $country,"BestOffer" => false,"TimeLeft" => 0,"Price" => $price,"Currency" => $currency,"ListingType" => "Fixed","Location" => "US","Zip" => "","FeedbackScore" => $feedbackScore,"FeedbackPercent" => $feedbackPercent,"SellerName" => $sellerName,"HandlingTime" => 1,"ShippingCost" => $shippingCost,"ShippingEstimated" => false,"ShippingCurrency" => $shippingCurrency,"FreeShippingCap" => $freeShippingCap,"Show" => true);}}}return ($arr);}