Blame | Last modification | View Log | RSS feed
<?php/*miniProxy - A simple PHP web proxy. <https://github.com/joshdick/miniProxy>Written and maintained by Joshua Dick <http://joshdick.net>.miniProxy is licensed under the GNU GPL v3 <http://www.gnu.org/licenses/gpl.html>.*//****************************** START CONFIGURATION ******************************///To allow proxying any URL, set $whitelistPatterns to an empty array (the default).//To only allow proxying of specific URLs (whitelist), add corresponding regular expressions//to the $whitelistPatterns array. Enter the most specific patterns possible, to prevent possible abuse.//You can optionally use the "getHostnamePattern()" helper function to build a regular expression that//matches all URLs for a given hostname.$whitelistPatterns = array(//Usage example: To support any URL at example.net, including sub-domains, uncomment the//line below (which is equivalent to [ @^https?://([a-z0-9-]+\.)*example\.net@i ])://getHostnamePattern("example.net"));//To enable CORS (cross-origin resource sharing) for proxied sites, set $forceCORS to true.$forceCORS = false;//Set to false to report the client machine's IP address to proxied sites via the HTTP `x-forwarded-for` header.//Setting to false may improve compatibility with some sites, but also exposes more information about end users to proxied sites.$anonymize = true;//Start/default URL that that will be proxied when miniProxy is first loaded in a browser/accessed directly with no URL to proxy.//If empty, miniProxy will show its own landing page.$startURL = "";//When no $startURL is configured above, miniProxy will show its own landing page with a URL form field//and the configured example URL. The example URL appears in the instructional text on the miniProxy landing page,//and is proxied when pressing the 'Proxy It!' button on the landing page if its URL form is left blank.$landingExampleURL = "https://example.net";/****************************** END CONFIGURATION ******************************/ob_start("ob_gzhandler");if (version_compare(PHP_VERSION, "5.4.7", "<")) {die("miniProxy requires PHP version 5.4.7 or later.");}$requiredExtensions = ['curl', 'mbstring', 'xml'];foreach($requiredExtensions as $requiredExtension) {if (!extension_loaded($requiredExtension)) {die("miniProxy requires PHP's \"" . $requiredExtension . "\" extension. Please install/enable it on your server and try again.");}}//Helper function for use inside $whitelistPatterns.//Returns a regex that matches all HTTP[S] URLs for a given hostname.function getHostnamePattern($hostname) {$escapedHostname = str_replace(".", "\.", $hostname);return "@^https?://([a-z0-9-]+\.)*" . $escapedHostname . "@i";}//Helper function used to removes/unset keys from an associative array using case insensitive matchingfunction removeKeys(&$assoc, $keys2remove) {$keys = array_keys($assoc);$map = array();$removedKeys = array();foreach ($keys as $key) {$map[strtolower($key)] = $key;}foreach ($keys2remove as $key) {$key = strtolower($key);if (isset($map[$key])) {unset($assoc[$map[$key]]);$removedKeys[] = $map[$key];}}return $removedKeys;}if (!function_exists("getallheaders")) {//Adapted from http://www.php.net/manual/en/function.getallheaders.php#99814function getallheaders() {$result = array();foreach($_SERVER as $key => $value) {if (substr($key, 0, 5) == "HTTP_") {$key = str_replace(" ", "-", ucwords(strtolower(str_replace("_", " ", substr($key, 5)))));$result[$key] = $value;}}return $result;}}$usingDefaultPort = (!isset($_SERVER["HTTPS"]) && $_SERVER["SERVER_PORT"] === 80) || (isset($_SERVER["HTTPS"]) && $_SERVER["SERVER_PORT"] === 443);$prefixPort = $usingDefaultPort ? "" : ":" . $_SERVER["SERVER_PORT"];//Use HTTP_HOST to support client-configured DNS (instead of SERVER_NAME), but remove the port if one is present$prefixHost = $_SERVER["HTTP_HOST"];$prefixHost = strpos($prefixHost, ":") ? implode(":", explode(":", $_SERVER["HTTP_HOST"], -1)) : $prefixHost;define("PROXY_PREFIX", "http" . (isset($_SERVER["HTTPS"]) ? "s" : "") . "://" . $prefixHost . $prefixPort . $_SERVER["SCRIPT_NAME"] . "?");//Makes an HTTP request via cURL, using request data that was passed directly to this script.function makeRequest($url) {global $anonymize;//Tell cURL to make the request using the brower's user-agent if there is one, or a fallback user-agent otherwise.$user_agent = $_SERVER["HTTP_USER_AGENT"];if (empty($user_agent)) {$user_agent = "Mozilla/5.0 (compatible; miniProxy)";}$ch = curl_init();curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);//Get ready to proxy the browser's request headers...$browserRequestHeaders = getallheaders();//...but let cURL set some headers on its own.$removedHeaders = removeKeys($browserRequestHeaders, array("Accept-Encoding", //Throw away the browser's Accept-Encoding header if any and let cURL make the request using gzip if possible."Content-Length","Host","Origin"));array_change_key_case($removedHeaders, CASE_LOWER);curl_setopt($ch, CURLOPT_ENCODING, "");//Transform the associative array from getallheaders() into an//indexed array of header strings to be passed to cURL.$curlRequestHeaders = array();foreach ($browserRequestHeaders as $name => $value) {$curlRequestHeaders[] = $name . ": " . $value;}if (!$anonymize) {$curlRequestHeaders[] = "X-Forwarded-For: " . $_SERVER["REMOTE_ADDR"];}//Any `origin` header sent by the browser will refer to the proxy itself.//If an `origin` header is present in the request, rewrite it to point to the correct origin.if (array_key_exists('origin', $removedHeaders)) {$urlParts = parse_url($url);$port = $urlParts['port'];$curlRequestHeaders[] = "Origin: " . $urlParts['scheme'] . "://" . $urlParts['host'] . (empty($port) ? "" : ":" . $port);};curl_setopt($ch, CURLOPT_HTTPHEADER, $curlRequestHeaders);//Proxy any received GET/POST/PUT data.switch ($_SERVER["REQUEST_METHOD"]) {case "POST":curl_setopt($ch, CURLOPT_POST, true);//For some reason, $HTTP_RAW_POST_DATA isn't working as documented at//http://php.net/manual/en/reserved.variables.httprawpostdata.php//but the php://input method works. This is likely to be flaky//across different server environments.//More info here: http://stackoverflow.com/questions/8899239/http-raw-post-data-not-being-populated-after-upgrade-to-php-5-3//If the miniProxyFormAction field appears in the POST data, remove it so the destination server doesn't receive it.$postData = Array();parse_str(file_get_contents("php://input"), $postData);if (isset($postData["miniProxyFormAction"])) {unset($postData["miniProxyFormAction"]);}curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($postData));break;case "PUT":curl_setopt($ch, CURLOPT_PUT, true);curl_setopt($ch, CURLOPT_INFILE, fopen("php://input", "r"));break;}//Other cURL options.curl_setopt($ch, CURLOPT_HEADER, true);curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);//Set the request URL.curl_setopt($ch, CURLOPT_URL, $url);//Make the request.$response = curl_exec($ch);$responseInfo = curl_getinfo($ch);$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);curl_close($ch);//Setting CURLOPT_HEADER to true above forces the response headers and body//to be output together--separate them.$responseHeaders = substr($response, 0, $headerSize);$responseBody = substr($response, $headerSize);return array("headers" => $responseHeaders, "body" => $responseBody, "responseInfo" => $responseInfo);}//Converts relative URLs to absolute ones, given a base URL.//Modified version of code found at http://nashruddin.com/PHP_Script_for_Converting_Relative_to_Absolute_URLfunction rel2abs($rel, $base) {if (empty($rel)) $rel = ".";if (parse_url($rel, PHP_URL_SCHEME) != "" || strpos($rel, "//") === 0) return $rel; //Return if already an absolute URLif ($rel[0] == "#" || $rel[0] == "?") return $base.$rel; //Queries and anchorsextract(parse_url($base)); //Parse base URL and convert to local variables: $scheme, $host, $path$path = isset($path) ? preg_replace("#/[^/]*$#", "", $path) : "/"; //Remove non-directory element from pathif ($rel[0] == "/") $path = ""; //Destroy path if relative url points to root$port = isset($port) && $port != 80 ? ":" . $port : "";$auth = "";if (isset($user)) {$auth = $user;if (isset($pass)) {$auth .= ":" . $pass;}$auth .= "@";}$abs = "$auth$host$port$path/$rel"; //Dirty absolute URLfor ($n = 1; $n > 0; $abs = preg_replace(array("#(/\.?/)#", "#/(?!\.\.)[^/]+/\.\./#"), "/", $abs, -1, $n)) {} //Replace '//' or '/./' or '/foo/../' with '/'return $scheme . "://" . $abs; //Absolute URL is ready.}//Proxify contents of url() references in blocks of CSS text.function proxifyCSS($css, $baseURL) {// Add a "url()" wrapper to any CSS @import rules that only specify a URL without the wrapper,// so that they're proxified when searching for "url()" wrappers below.$sourceLines = explode("\n", $css);$normalizedLines = [];foreach ($sourceLines as $line) {if (preg_match("/@import\s+url/i", $line)) {$normalizedLines[] = $line;} else {$normalizedLines[] = preg_replace_callback("/(@import\s+)([^;\s]+)([\s;])/i",function($matches) use ($baseURL) {return $matches[1] . "url(" . $matches[2] . ")" . $matches[3];},$line);}}$normalizedCSS = implode("\n", $normalizedLines);return preg_replace_callback("/url\((.*?)\)/i",function($matches) use ($baseURL) {$url = $matches[1];//Remove any surrounding single or double quotes from the URL so it can be passed to rel2abs - the quotes are optional in CSS//Assume that if there is a leading quote then there should be a trailing quote, so just use trim() to remove themif (strpos($url, "'") === 0) {$url = trim($url, "'");}if (strpos($url, "\"") === 0) {$url = trim($url, "\"");}if (stripos($url, "data:") === 0) return "url(" . $url . ")"; //The URL isn't an HTTP URL but is actual binary data. Don't proxify it.return "url(" . PROXY_PREFIX . rel2abs($url, $baseURL) . ")";},$normalizedCSS);}//Proxify "srcset" attributes (normally associated with <img> tags.)function proxifySrcset($srcset, $baseURL) {$sources = array_map("trim", explode(",", $srcset)); //Split all contents by comma and trim each value$proxifiedSources = array_map(function($source) use ($baseURL) {$components = array_map("trim", str_split($source, strrpos($source, " "))); //Split by last space and trim$components[0] = PROXY_PREFIX . rel2abs(ltrim($components[0], "/"), $baseURL); //First component of the split source string should be an image URL; proxify itreturn implode($components, " "); //Recombine the components into a single source}, $sources);$proxifiedSrcset = implode(", ", $proxifiedSources); //Recombine the sources into a single "srcset"return $proxifiedSrcset;}//Extract and sanitize the requested URL, handling cases where forms have been rewritten to point to the proxy.if (isset($_POST["miniProxyFormAction"])) {$url = $_POST["miniProxyFormAction"];unset($_POST["miniProxyFormAction"]);} else {$queryParams = Array();parse_str($_SERVER["QUERY_STRING"], $queryParams);//If the miniProxyFormAction field appears in the query string, make $url start with its value, and rebuild the the query string without it.if (isset($queryParams["miniProxyFormAction"])) {$formAction = $queryParams["miniProxyFormAction"];unset($queryParams["miniProxyFormAction"]);$url = $formAction . "?" . http_build_query($queryParams);} else {$url = substr($_SERVER["REQUEST_URI"], strlen($_SERVER["SCRIPT_NAME"]) + 1);}}if (empty($url)) {if (empty($startURL)) {die("<html><head><title>miniProxy</title></head><body><h1>Welcome to miniProxy!</h1>miniProxy can be directly invoked like this: <a href=\"" . PROXY_PREFIX . $landingExampleURL . "\">" . PROXY_PREFIX . $landingExampleURL . "</a><br /><br />Or, you can simply enter a URL below:<br /><br /><form onsubmit=\"if (document.getElementById('site').value) { window.location.href='" . PROXY_PREFIX . "' + document.getElementById('site').value; return false; } else { window.location.href='" . PROXY_PREFIX . $landingExampleURL . "'; return false; }\" autocomplete=\"off\"><input id=\"site\" type=\"text\" size=\"50\" /><input type=\"submit\" value=\"Proxy It!\" /></form></body></html>");} else {$url = $startURL;}} else if (strpos($url, ":/") !== strpos($url, "://")) {//Work around the fact that some web servers (e.g. IIS 8.5) change double slashes appearing in the URL to a single slash.//See https://github.com/joshdick/miniProxy/pull/14$pos = strpos($url, ":/");$url = substr_replace($url, "://", $pos, strlen(":/"));}$scheme = parse_url($url, PHP_URL_SCHEME);if (empty($scheme)) {//Assume that any supplied URLs starting with // are HTTP URLs.if (strpos($url, "//") === 0) {$url = "http:" . $url;}} else if (!preg_match("/^https?$/i", $scheme)) {die('Error: Detected a "' . $scheme . '" URL. miniProxy exclusively supports http[s] URLs.');}//Validate the requested URL against the whitelist.$urlIsValid = count($whitelistPatterns) === 0;foreach ($whitelistPatterns as $pattern) {if (preg_match($pattern, $url)) {$urlIsValid = true;break;}}if (!$urlIsValid) {die("Error: The requested URL was disallowed by the server administrator.");}$response = makeRequest($url);$rawResponseHeaders = $response["headers"];$responseBody = $response["body"];$responseInfo = $response["responseInfo"];//If CURLOPT_FOLLOWLOCATION landed the proxy at a diferent URL than//what was requested, explicitly redirect the proxy there.$responseURL = $responseInfo["url"];if ($responseURL !== $url) {header("Location: " . PROXY_PREFIX . $responseURL, true);exit(0);}//A regex that indicates which server response headers should be stripped out of the proxified response.$header_blacklist_pattern = "/^Content-Length|^Transfer-Encoding|^Content-Encoding.*gzip/i";//cURL can make multiple requests internally (for example, if CURLOPT_FOLLOWLOCATION is enabled), and reports//headers for every request it makes. Only proxy the last set of received response headers,//corresponding to the final request made by cURL for any given call to makeRequest().$responseHeaderBlocks = array_filter(explode("\r\n\r\n", $rawResponseHeaders));$lastHeaderBlock = end($responseHeaderBlocks);$headerLines = explode("\r\n", $lastHeaderBlock);foreach ($headerLines as $header) {$header = trim($header);if (!preg_match($header_blacklist_pattern, $header)) {header($header, false);}}//Prevent robots from indexing proxified pagesheader("X-Robots-Tag: noindex, nofollow", true);if ($forceCORS) {//This logic is based on code found at: http://stackoverflow.com/a/9866124/278810//CORS headers sent below may conflict with CORS headers from the original response,//so these headers are sent after the original response headers to ensure their values//are the ones that actually end up getting sent to the browser.//Explicit [ $replace = true ] is used for these headers even though this is PHP's default behavior.//Allow access from any origin.header("Access-Control-Allow-Origin: *", true);header("Access-Control-Allow-Credentials: true", true);//Handle CORS headers received during OPTIONS requests.if ($_SERVER["REQUEST_METHOD"] == "OPTIONS") {if (isset($_SERVER["HTTP_ACCESS_CONTROL_REQUEST_METHOD"])) {header("Access-Control-Allow-Methods: GET, POST, OPTIONS", true);}if (isset($_SERVER["HTTP_ACCESS_CONTROL_REQUEST_HEADERS"])) {header("Access-Control-Allow-Headers: {$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']}", true);}//No further action is needed for OPTIONS requests.exit(0);}}$contentType = "";if (isset($responseInfo["content_type"])) $contentType = $responseInfo["content_type"];//This is presumably a web page, so attempt to proxify the DOM.if (stripos($contentType, "text/html") !== false) {//Attempt to normalize character encoding.$detectedEncoding = mb_detect_encoding($responseBody, "UTF-8, ISO-8859-1");if ($detectedEncoding) {$responseBody = mb_convert_encoding($responseBody, "HTML-ENTITIES", $detectedEncoding);}//Parse the DOM.$doc = new DomDocument();@$doc->loadHTML($responseBody);$xpath = new DOMXPath($doc);//Rewrite forms so that their actions point back to the proxy.foreach($xpath->query("//form") as $form) {$method = $form->getAttribute("method");$action = $form->getAttribute("action");//If the form doesn't have an action, the action is the page itself.//Otherwise, change an existing action to an absolute version.$action = empty($action) ? $url : rel2abs($action, $url);//Rewrite the form action to point back at the proxy.$form->setAttribute("action", rtrim(PROXY_PREFIX, "?"));//Add a hidden form field that the proxy can later use to retreive the original form action.$actionInput = $doc->createDocumentFragment();$actionInput->appendXML('<input type="hidden" name="miniProxyFormAction" value="' . htmlspecialchars($action) . '" />');$form->appendChild($actionInput);}//Proxify <meta> tags with an 'http-equiv="refresh"' attribute.foreach ($xpath->query("//meta[@http-equiv]") as $element) {if (strcasecmp($element->getAttribute("http-equiv"), "refresh") === 0) {$content = $element->getAttribute("content");if (!empty($content)) {$splitContent = preg_split("/=/", $content);if (isset($splitContent[1])) {$element->setAttribute("content", $splitContent[0] . "=" . PROXY_PREFIX . rel2abs($splitContent[1], $url));}}}}//Profixy <style> tags.foreach($xpath->query("//style") as $style) {$style->nodeValue = proxifyCSS($style->nodeValue, $url);}//Proxify tags with a "style" attribute.foreach ($xpath->query("//*[@style]") as $element) {$element->setAttribute("style", proxifyCSS($element->getAttribute("style"), $url));}//Proxify "srcset" attributes in <img> tags.foreach ($xpath->query("//img[@srcset]") as $element) {$element->setAttribute("srcset", proxifySrcset($element->getAttribute("srcset"), $url));}//Proxify any of these attributes appearing in any tag.$proxifyAttributes = array("href", "src");foreach($proxifyAttributes as $attrName) {foreach($xpath->query("//*[@" . $attrName . "]") as $element) { //For every element with the given attribute...$attrContent = $element->getAttribute($attrName);if ($attrName == "href" && preg_match("/^(about|javascript|magnet|mailto):/i", $attrContent)) continue;$attrContent = rel2abs($attrContent, $url);$attrContent = PROXY_PREFIX . $attrContent;$element->setAttribute($attrName, $attrContent);}}//Attempt to force AJAX requests to be made through the proxy by//wrapping window.XMLHttpRequest.prototype.open in order to make//all request URLs absolute and point back to the proxy.//The rel2abs() JavaScript function serves the same purpose as the server-side one in this file,//but is used in the browser to ensure all AJAX request URLs are absolute and not relative.//Uses code from these sources://http://stackoverflow.com/questions/7775767/javascript-overriding-xmlhttprequest-open//https://gist.github.com/1088850//TODO: This is obviously only useful for browsers that use XMLHttpRequest but//it's better than nothing.$head = $xpath->query("//head")->item(0);$body = $xpath->query("//body")->item(0);$prependElem = $head != NULL ? $head : $body;//Only bother trying to apply this hack if the DOM has a <head> or <body> element;//insert some JavaScript at the top of whichever is available first.//Protects against cases where the server sends a Content-Type of "text/html" when//what's coming back is most likely not actually HTML.//TODO: Do this check before attempting to do any sort of DOM parsing?if ($prependElem != NULL) {$scriptElem = $doc->createElement("script",'(function() {if (window.XMLHttpRequest) {function parseURI(url) {var m = String(url).replace(/^\s+|\s+$/g, "").match(/^([^:\/?#]+:)?(\/\/(?:[^:@]*(?::[^:@]*)?@)?(([^:\/?#]*)(?::(\d*))?))?([^?#]*)(\?[^#]*)?(#[\s\S]*)?/);// authority = "//" + user + ":" + pass "@" + hostname + ":" portreturn (m ? {href : m[0] || "",protocol : m[1] || "",authority: m[2] || "",host : m[3] || "",hostname : m[4] || "",port : m[5] || "",pathname : m[6] || "",search : m[7] || "",hash : m[8] || ""} : null);}function rel2abs(base, href) { // RFC 3986function removeDotSegments(input) {var output = [];input.replace(/^(\.\.?(\/|$))+/, "").replace(/\/(\.(\/|$))+/g, "/").replace(/\/\.\.$/, "/../").replace(/\/?[^\/]*/g, function (p) {if (p === "/..") {output.pop();} else {output.push(p);}});return output.join("").replace(/^\//, input.charAt(0) === "/" ? "/" : "");}href = parseURI(href || "");base = parseURI(base || "");return !href || !base ? null : (href.protocol || base.protocol) +(href.protocol || href.authority ? href.authority : base.authority) +removeDotSegments(href.protocol || href.authority || href.pathname.charAt(0) === "/" ? href.pathname : (href.pathname ? ((base.authority && !base.pathname ? "/" : "") + base.pathname.slice(0, base.pathname.lastIndexOf("/") + 1) + href.pathname) : base.pathname)) +(href.protocol || href.authority || href.pathname ? href.search : (href.search || base.search)) +href.hash;}var proxied = window.XMLHttpRequest.prototype.open;window.XMLHttpRequest.prototype.open = function() {if (arguments[1] !== null && arguments[1] !== undefined) {var url = arguments[1];url = rel2abs("' . $url . '", url);url = "' . PROXY_PREFIX . '" + url;arguments[1] = url;}return proxied.apply(this, [].slice.call(arguments));};}})();');$scriptElem->setAttribute("type", "text/javascript");$prependElem->insertBefore($scriptElem, $prependElem->firstChild);}echo "<!-- Proxified page constructed by miniProxy -->\n" . $doc->saveHTML();} else if (stripos($contentType, "text/css") !== false) { //This is CSS, so proxify url() references.echo proxifyCSS($responseBody, $url);} else { //This isn't a web page or CSS, so serve unmodified through the proxy with the correct headers (images, JavaScript, etc.)header("Content-Length: " . strlen($responseBody), true);echo $responseBody;}