Rev 97 | Rev 101 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?phperror_reporting(E_ALL);// Get itunes listingsfunction get_amazon_scrape($query, $searchCondition) {$vendors = Vendors::getInstance();$config = $vendors->getVendor(Vendors::AMAZON);$numListings = $config['numListings'];$needMatches = empty($_SESSION["discogs"]);if ($needMatches) {$_SESSION["discogs"] = startMatches();}$arr = [];$products = [];$cnt = 0;libxml_use_internal_errors(true);$html = getSearchCache("amazon_scrape", $query, $searchCondition);if ($html === false) {$html = getUrl("https://www.amazon.com/s?k=" . rawurlencode($query) . "&sf=qz&unfiltered=1&ref=nb_sb_noss");saveSearchCache("amazon_scrape", $query, $searchCondition, $html);}$dom = new DOMDocument;$dom->loadHTML($html);$xpath = new DOMXPath($dom);$nodes = $xpath->query('//a/@href');foreach($nodes as $href) {if (strpos($href->nodeValue, "/gp/offer-listing/B") === 0) {$products[] = $href->nodeValue;}}foreach($products as $product) {$url = "https://www.amazon.com" . $product . "&tag=uj024-20&language=en_US";$asin = explode('/', $product)[3];$html = getSearchCache("amazon_scrape", $asin, "");if ($html === false) {$html = getUrl("https://www.amazon.com/dp/" . $asin, "Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/70.0");saveSearchCache("amazon_scrape", $asin, "", $html);}$dom = new DOMDocument;$dom->loadHTML($html);$xpathPrd = new DOMXPath($dom);$nodes = $xpathPrd->query('//table[@id="productDetailsTable"]//ul/li');if ($nodes->length < 1) {continue;}$format = "";foreach($nodes as $node) {$str = trim($node->nodeValue);if (strpos($str, "Audio CD") === 0 ||strpos($str, "Vinyl") === 0 ||strpos($str, "Sheet") === 0 ||strpos($str, "Hardcover") === 0 ||strpos($str, "Paperback") === 0) {$p = strpos($str, " (");$format = ($p > 0 ? substr($str, 0, $p) : $str);$releaseDate = ($p > 0 ? substr($str, $p+2, strlen($str) - $p - 3) : "");}if (strpos($str, "Amazon Best Sellers Rank:") === 0) {$p = strpos($str, " (");$rank = substr($str, 11, $p-11);}}if (strpos($format, "Audio CD") === 0 ||strpos($format, "Vinyl") === 0 ||strpos($str, "Sheet") === 0 ||strpos($format, "Hardcover") === 0 ||strpos($format, "Paperback") === 0) {if (strpos($format, "Audio CD") !== false) {$mediaType = "CD";} else if (strpos($format, "Vinyl") !== false) {$mediaType = "Record";} else if (strpos($format, "Paperback") !== false ||strpos($format, "Sheet") !== false ||strpos($format, "Hardcover") !== false) {$mediaType = "Book";}$p = substr($product, 0, strpos($product, "/ref="));$html = getSearchCache("amazon_scrape", $p, "");if ($html === false) {$str = "https://www.amazon.com" . $product;$html = getUrl($str, "Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/70.0");saveSearchCache("amazon_scrape", $p, "", $html);}$dom = new DOMDocument;$dom->loadHTML($html);$xpath = new DOMXPath($dom);$nodes = $xpath->query('//div[@id="olpProductImage"]//img');$pic = "";if ($nodes->length > 0) {$pic = $nodes->item(0)->getAttribute("src");}$nodes = $xpath->query('//div[@id="olpProductDetails"]/h1');$title = trim($nodes->item(0)->nodeValue);$fullTitle = $title;$nodes = $xpath->query('//div[@id="olpProductByline"]');if ($nodes->length > 0) {$artists = trim($nodes->item(0)->nodeValue);$artists = str_replace(" (Artist)", "", $artists);if (strpos($artists, "~ ") === 0) {$artists = substr($artists, 2);}$fullTitle = $title . " by " . $artists;}if (strpos($format, "Audio CD") === 0 ||strpos($format, "Vinyl") === 0) {if ($needMatches) {addMatch_scrape($xpathPrd, ++$cnt, $title, $artists, $format, $releaseDate, $asin, $url, $pic);}}$listings = $xpath->query('//div[contains(concat(" ", normalize-space(@class), " "), " olpOffer ")]');$listingCnt = 0;foreach($listings as $listing) {$nodes = $xpath->query('.//h3[contains(concat(" ", normalize-space(@class), " "), " olpSellerName ")]', $listing);$str = trim($nodes->item(0)->nodeValue);$sellerName = (empty($str) ? "Amazon" : $str);$merchantName = "Amazon";$feedbackPercent = -1;$feedbackScore = -1;if ($sellerName != "Amazon") {$merchantName .= " Marketplace";$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpSellerColumn ")]//p', $listing);if ($nodes->length > 0) {$str = trim($nodes->item(0)->nodeValue);$sellerrating = substr($str, 17);$num = preg_match_all('/((?:[0-9]+,)*[0-9]+(?:\.[0-9]+)?)/', $sellerrating, $matches);if ($num == 3) {$feedbackPercent = (int)$matches[0][0];$feedbackScore = (int)str_replace( ',', '', $matches[0][2]);}}}$nodes = $xpath->query('.//span[contains(concat(" ", normalize-space(@class), " "), " olpCondition ")]', $listing);$str = trim($nodes->item(0)->nodeValue);$pos = strpos($str, " - ");if ($pos !== false) {$condition = trim(substr($str, 0, $pos));$detailCondition = trim(substr($str, $pos+3));} else {$condition = $str;$detailCondition = $str;}if ($condition == "Collectible" || $condition == "Refurbished") {$condition = 'Used';}$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpConditionColumn ")]//div[contains(concat(" ", normalize-space(@class), " "), " comments ")]', $listing);if ($nodes->length > 0) {$conditionComment = trim($nodes->item(0)->nodeValue);}$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//span[contains(concat(" ", normalize-space(@class), " "), " olpOfferPrice ")]', $listing);$price = substr(trim($nodes->item(0)->nodeValue), 1);$currency = 'USD';$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//span[contains(concat(" ", normalize-space(@class), " "), " olpShippingPrice ")]', $listing);if ($nodes->length > 0) {$shippingCost = substr(trim($nodes->item(0)->nodeValue), 1);$shippingCurrency = 'USD';$freeShippingCap = 0;} else {$shippingCost = 0.00;$shippingCurrency = 'USD';$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//p[contains(concat(" ", normalize-space(@class), " "), " olpShippingInfo ")]', $listing);$str= trim($nodes->item(0)->nodeValue);if (strpos($str, "FREE Shipping") !== false) {$freeShippingCap = 0.00;}if (strpos($str, "on orders over") !== false) {$freeShippingCap = 25.00;}}$country = 'US';$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpDeliveryColumn ")]//ul/li', $listing);foreach($nodes as $node) {$str = trim($node->nodeValue);if (strpos($str, "Ships from") === 0) {$p = strpos($str, ".");$country = getCountryCode(substr($str, 11, $p-11));}}$nodes = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " olpPriceColumn ")]//i[contains(concat(" ", normalize-space(@class), " "), " a-icon-prime ")]', $listing);if ($nodes->length > 0) {$sellerName .= " Prime";}if (++$listingCnt > $numListings) {continue;}$arr[] = array("Merchant" => $merchantName,"Condition" => $condition,"Title" => $fullTitle,"Barcode" => "","BarcodeType" => "","Image" => $pic,"URL" => $url,"MediaType" => $mediaType,"DetailCondition" => $detailCondition,"Country" => $country,"BestOffer" => false,"TimeLeft" => 0,"Price" => $price,"Currency" => $currency,"ListingType" => "Fixed","Location" => "US","Zip" => "","FeedbackScore" => $feedbackScore,"FeedbackPercent" => $feedbackPercent,"SellerName" => $sellerName,"HandlingTime" => 1,"ShippingCost" => $shippingCost,"ShippingEstimated" => false,"ShippingCurrency" => $shippingCurrency,"FreeShippingCap" => $freeShippingCap,"Show" => true);}}}if ($needMatches) {if ($cnt = 0) {$_SESSION["discogs"] = "";} else {$_SESSION["discogs"] .= endMatches();}}return ($arr);}function addMatch_scrape($xpath, $cnt, $title, $artists, $mediaType, $releaseDate, $asin, $url, $pic) {$nodes = $xpath->query('//table[@id="productDetailsTable"]//ul/li');if ($nodes->length < 1) {return;}$runTime = "";$noDiscs = "";$label = "";$edition = "";$genre = "";foreach($nodes as $node) {$str = trim($node->nodeValue);$p = strpos($str, "Run Time:");if ($p === 0) {$runTime = substr($str, 10);}$p = strpos($str, "Number of Discs:");if ($p === 0) {$noDiscs = substr($str, 17);}$p = strpos($str, "Label:");if ($p === 0) {$label = substr($str, 7);}$p = strpos($str, "Edition:");if ($p === 0) {$edition = substr($str, 9);}$p = strpos($str, "SPARS Code:");if ($p === 0) {$edition = (strlen($edition) > 0 ? ", " : "") . substr($str, 12);}$p = strpos($str, "Format:");if ($p === 0) {$edition = (strlen($edition) > 0 ? ", " : "") . substr($str, 8);}$p = strpos($str, "Performer:");if ($p === 0) {$artists = substr($str, 11);}$p = strpos($str, "Original Release Date:");if ($p === 0) {$releaseDate = substr($str, 23);}if (strpos($str, "Amazon Best Sellers Rank:") === 0) {$pieces = explode("\n", $str);$genres = [];foreach($pieces as $piece) {$piece = trim($piece);$p1 = strpos($piece, "in ");$p2 = strpos($piece, " (CDs & Vinyl)") ;if ($p1 === 0 && $p2 > 0) {$genres[] = substr($piece, 4, $p2 - 4);}}$genre = join(", ", $genres);}}$item = new SimpleXMLElement("<item></item>");$item->addChild('ASIN', $asin);$item->addChild('DetailPageURL', $url);$item->addChild('MediumImage');$item->MediumImage->addChild('URL', $pic);$nodes = $xpath->query('//table[@id="dmusic_tracklist_content"]//div[contains(concat(" ", normalize-space(@class), " "), " a-section ")]//a[contains(concat(" ", normalize-space(@class), " "), " TitleLink ")]');if ($nodes->length > 0) {$item->addChild('Tracks');$item->Tracks->addChild('Disc', '1');foreach($nodes as $node) {$line = trim(preg_replace("/[\n\r]/","", $node->nodeValue));$item->Tracks->Disc->addChild('Track', $line);}} else {$nodes = $xpath->query('//div[@id="dmusic_tracklist_player"]//div[contains(concat(" ", normalize-space(@class), " "), " a-row ")]');if ($nodes->length > 0) {$item->addChild('Tracks');$item->Tracks->addChild('Disc', '1');foreach($nodes as $node) {$line = trim($node->nodeValue);if ($noDiscs == 1 && strpos($line, "Disc") === 0) {continue;}$line = trim(preg_replace("/[\n\r]/","", $line));if (bin2hex(substr($line, 0, 2)) == "c2a0") {$line = trim(substr($line, 2));}$item->Tracks->Disc->addChild('Track', $line);}}}$item->addChild('ItemAttributes');$item->ItemAttributes->addChild('Title', $title);$item->ItemAttributes->addChild('Artist', $artists);$item->ItemAttributes->addChild('Edition', $edition);$item->ItemAttributes->addChild('Genre', $genre);$item->ItemAttributes->addChild('Label', $label);$item->ItemAttributes->addChild('MediaType', $mediaType);$item->ItemAttributes->addChild('NumberOfDiscs', $noDiscs);$item->ItemAttributes->addChild('ReleaseDate', $releaseDate);$item->ItemAttributes->addChild('RunningTime', (int)$runTime);$_SESSION["discogs"] .= addMatch($item, $cnt, $mediaType);}