Subversion Repositories cheapmusic

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
103 - 1
<?php
2
 
3
/***************************************************************************
4
 *   Copyright (C) 2009-2011 by Geo Varghese(www.seopanel.in)  	           *
5
 *   sendtogeo@gmail.com   												   *
6
 *                                                                         *
7
 *   This program is free software; you can redistribute it and/or modify  *
8
 *   it under the terms of the GNU General Public License as published by  *
9
 *   the Free Software Foundation; either version 2 of the License, or     *
10
 *   (at your option) any later version.                                   *
11
 *                                                                         *
12
 *   This program is distributed in the hope that it will be useful,       *
13
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15
 *   GNU General Public License for more details.                          *
16
 *                                                                         *
17
 *   You should have received a copy of the GNU General Public License     *
18
 *   along with this program; if not, write to the                         *
19
 *   Free Software Foundation, Inc.,                                       *
20
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
21
 ***************************************************************************/
22
 
23
include_once(SP_CTRLPATH."/proxy.ctrl.php");
24
 
25
class Spider{
26
 
27
	# settings of the spider
28
	var $_CURL_RESOURCE = null;
29
	var $_CURLOPT_FAILONERROR = false;
30
	var $_CURLOPT_FOLLOWLOCATION = true;
31
	var $_CURLOPT_RETURNTRANSFER = true;
32
	var $_CURLOPT_MAXREDIRS = 4; //Don't get caught in redirect loop
33
	var $_CURLOPT_TIMEOUT = 15;
34
	var $_CURLOPT_POST = true;
35
	var $_CURLOPT_POSTFIELDS = null;
36
	var $_CURLOPT_USERAGENT = "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))";
37
	var $_CURLOPT_USERPWD = null;
38
	var $_CURLOPT_COOKIEJAR = '';
39
	var $_CURLOPT_COOKIEFILE = '';
40
	var $_CURLOPT_REFERER = "";
41
	var $_CURL_sleep = 1;
42
	var $_CURLOPT_COOKIE = "";
43
	var $_CURLOPT_HEADER = 0;
44
	var $_CURL_HTTPHEADER = array();
45
	var $userAgentList = array();
46
	var $effectiveUrl = null;
47
 
48
	# spider constructor
49
	function __construct()	{
50
		$this -> _CURLOPT_COOKIEJAR = SP_TMPPATH.'/'.$this -> _CURLOPT_COOKIEJAR;
51
		$this -> _CURLOPT_COOKIEFILE = SP_TMPPATH.'/'.$this -> _CURLOPT_COOKIEFILE;
52
		$this -> _CURL_RESOURCE = curl_init( );
53
		if(!empty($_SERVER['HTTP_USER_AGENT'])) $this->_CURLOPT_USERAGENT = $_SERVER['HTTP_USER_AGENT'];
54
 
55
		// user agents
56
		$this->userAgentList['google'] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0";
57
		$this->userAgentList['bing'] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0";
58
		$this->userAgentList['default'] = defined('SP_USER_AGENT') ? SP_USER_AGENT : $this->_CURLOPT_USERAGENT;
59
	}
60
 
61
	# func to format urls
62
	public static function formatUrl($url){
63
	    $scheme = "";
64
		if(stristr($url,'http://')){
65
			$scheme = "http://";
66
		}elseif(stristr($url,'https://')){
67
			$scheme = "https://";
68
		}
69
		$url = str_replace(array('http://','https://', '"', '"'), '',$url);
70
		$url = preg_replace('/\/{2,}/', '/', $url);
71
		$url = preg_replace('/&{2,}/', '&', $url);
72
		$url = preg_replace('/#{2,}/', '#', $url);
73
		$url = Spider::removeTrailingSlash($url);
74
		return $scheme.$url;
75
	}
76
 
77
	# func to get relative url to append with relative links found in the page
78
	function getRelativeUrl($relativeUrl) {
79
 
80
	    $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH);
81
 
82
	    // if link contains script names
83
        if(preg_match('/.htm$|.html$|.php$|.pl$|.jsp$|.asp$|.aspx$|.do$|.cgi$|.cfm$/i', $relativeUrl)) {
84
            if (preg_match('/(.*)\//', $relativeUrl, $matches) ) {
85
                return $matches[1];
86
            }
87
        } elseif (preg_match('/\/$/', $relativeUrl)) {
88
            return $this->removeTrailingSlash($relativeUrl);
89
	    } else {
90
            return $relativeUrl;
91
        }
92
	}
93
 
94
    # func to get backlink page info
95
	function getPageInfo($url, $domainUrl, $returnUrls=false){
96
 
97
	    $urlWithTrailingSlash = Spider::addTrailingSlash($url);
98
		$ret = $this->getContent($urlWithTrailingSlash);
99
		$pageInfo = array(
100
			'external' => 0,
101
			'total_links' => 0,
102
		);
103
 
104
		$checkUrl = formatUrl($domainUrl);
105
 
106
		// if relative links of a page needs to be checked
107
		if (SP_RELATIVE_LINK_CRAWL) {
108
		    $relativeUrl = $domainUrl . $this->getRelativeUrl($url);
109
		}
110
 
111
		// find main domain host link
112
		$domainHostInfo = parse_url($domainUrl);
113
		$domainHostLink = $domainHostInfo['scheme'] . "://" . $domainHostInfo['host'] . "/";
114
 
115
		if( !empty($ret['page'])){
116
			$string = str_replace(array("\n",'\n\r','\r\n','\r'), "", $ret['page']);
117
			$pageInfo = WebsiteController::crawlMetaData($url, '', $string, true);
118
 
119
			// check whether base url tag is there
120
			$baseTagUrl = "";
121
			if (preg_match("/<base (.*?)>/is", $string, $match)) {
122
				$baseTagUrl = $this->__getTagParam("href", $match[1]);
123
				$baseTagUrl = $this->addTrailingSlash($baseTagUrl);
124
			}
125
 
126
			$pattern = "/<a(.*?)>(.*?)<\/a>/is";
127
			preg_match_all($pattern, $string, $matches, PREG_PATTERN_ORDER);
128
 
129
			// loop through matches
130
			for($i=0; $i < count($matches[1]); $i++){
131
 
132
				// check links foudn valid or not
133
				$href = $this->__getTagParam("href",$matches[1][$i]);
134
				if ( !empty($href) || !empty($matches[2][$i])) {
135
 
136
    				if( !preg_match( '/mailto:/', $href ) && !preg_match( '/javascript:|;/', $href ) ){
137
 
138
    					// find external links
139
    				    $pageInfo['total_links'] += 1;
140
    				    $external = 0;
141
    				    if (stristr($href, 'http://') ||  stristr($href, 'https://')) {
142
 
143
    					    if (!preg_match("/^".preg_quote($checkUrl, '/')."/", formatUrl($href))) {
144
    					        $external = 1;
145
    					        $pageInfo['external'] += 1;
146
    					    }
147
 
148
    				    } else {
149
 
150
    				        // if url starts with / then append with base url of site
151
    				    	if (preg_match('/^\//', $href)) {
152
    				    		$href = $domainHostLink . $href;
153
    				    	} elseif (!empty($baseTagUrl)) {
154
    				        	$href = $baseTagUrl . $href;
155
    				        } elseif ( $url == $domainUrl) {
156
    				            $href = $domainUrl ."/". $href;
157
    				        } elseif ( SP_RELATIVE_LINK_CRAWL) {
158
    				            $href = $relativeUrl ."/". $href;
159
    				        } else {
160
    				            $pageInfo['total_links'] -= 1;
161
    				            continue;
162
    				        }
163
 
164
    				        // if contains back directory operator
165
    				        if (stristr($href, '/../')) {
166
                            	$hrefParts = explode('/../', $href);
167
                            	preg_match('/.*\//', $hrefParts[0], $matchpart);
168
                            	$href = $matchpart[0]. $hrefParts[1];
169
                            }
170
    				    }
171
 
172
    				    // if details of urls to be checked
173
    				    if($returnUrls){
174
    				        $linkInfo['link_url'] = $href;
175
    						if(stristr($matches[2][$i], '<img')) {
176
    							$linkInfo['link_anchor'] = $this->__getTagParam("alt", $matches[2][$i]);
177
    						} else {
178
    							$linkInfo['link_anchor'] = strip_tags($matches[2][$i]);
179
    						}
180
    						$linkInfo['nofollow'] = stristr($matches[1][$i], 'nofollow') ? 1 : 0;
181
    						$linkInfo['link_title'] = $this->__getTagParam("title", $matches[1][$i]);
182
    						if ($external) {
183
    						    $pageInfo['external_links'][] = $linkInfo;
184
    						} else {
185
    						    $pageInfo['site_links'][] = $linkInfo;
186
    						}
187
    				    }
188
 
189
    				}
190
				}
191
			}
192
		}
193
 
194
		return $pageInfo;
195
	}
196
 
197
	# function to remove last trailing slash
198
	public static function removeTrailingSlash($url) {
199
		$url = preg_replace('/\/$/', '', $url);
200
		return $url;
201
	}
202
 
203
    # function to remove last trailing slash
204
	public static function addTrailingSlash($url) {
205
	    if (!stristr($url, '?') && !stristr($url, '#')) {
206
	        if (!preg_match("/\.([^\/]+)$/", $url)) {
207
        		if (!preg_match('/\/$/', $url)) {
208
        		    $url .= "/";
209
        		}
210
	        }
211
	    }
212
		return $url;
213
	}
214
 
215
	# func to get unique urls of a page
216
	function getUniqueUrls($url){
217
 
218
		$ret = $this->getContent($url);
219
		$urlList = array();
220
 
221
		if( !empty($ret['page'])){
222
			$string = strtolower($ret['page']);
223
			$string = str_replace("\n","",$string);
224
 
225
			$pattern = "/<a (.*)>(.*\n*.*|.*\n*)<\/a>/U";
226
			preg_match_all($pattern,$string,$matches, PREG_PATTERN_ORDER);
227
			for($i=0;$i<count($matches[1]);$i++){
228
				$href = $this->getTagParam("href",$matches[1][$i]);
229
				$href = preg_replace('/\/{3}/', '/', $href);
230
				if(!empty($href)){
231
					if( !preg_match( '/mailto:/', $href ) && ($href!="#") && !preg_match( '/javascript:|;/', $href ) ){
232
						if($href != "/"){
233
							$urlList[] = $href;
234
						}
235
					}
236
				}
237
			}
238
		}
239
		return $urlList;
240
	}
241
 
242
	# function to get value of a parameter in a tag
243
    function __getTagParam($param, $tag){
244
		preg_match('/'.$param.'="(.*?)"/is', $tag, $matches);
245
		if(empty($matches[1])){
246
			preg_match("/$param='(.*?)'/is", $tag, $matches);
247
			if(empty($matches[1])){
248
				preg_match("/$param=(.*?) /is", $tag, $matches);
249
			}
250
		}
251
		if(isset($matches[1])) return trim($matches[1]) ;
252
	}
253
 
254
	# function to get the useragent
255
	function getUserAgent($key = false) {
256
	    $userAgentKey = !empty($key) ? $key : 'default';
257
	    return $this->userAgentList[$userAgentKey];
258
	}
259
 
260
	# function to create custome headers
261
	function setCustomHeaders() {
262
 
263
		// if sending custom header with curl is enabled
264
		if (SP_SEND_CUSTOM_HEADER_IN_CURL) {
265
			$sessionId = session_id();
266
			$sessionId = !empty($sessionId) ? $sessionId : session_regenerate_id();
267
			array_push($this ->_CURL_HTTPHEADER, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
268
			array_push($this ->_CURL_HTTPHEADER, "Connection: keep-alive");
269
			array_push($this ->_CURL_HTTPHEADER, "Cache-Control: max-age=0");
270
			array_push($this ->_CURL_HTTPHEADER, "Cookie: PHPSESSID=" . $sessionId);
271
			array_push($this ->_CURL_HTTPHEADER, "User-Agent: " . $this -> _CURLOPT_USERAGENT);
272
		}
273
 
274
	}
275
 
276
	# get contents of a web page
277
	function getContent( $url, $enableProxy=true, $logCrawl = true)	{
278
 
279
		curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_URL , $url );
280
		curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_FAILONERROR , $this -> _CURLOPT_FAILONERROR );
281
		curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_MAXREDIRS , $this -> _CURLOPT_MAXREDIRS );
282
		@curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_FOLLOWLOCATION , $this -> _CURLOPT_FOLLOWLOCATION );
283
		curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_RETURNTRANSFER , $this -> _CURLOPT_RETURNTRANSFER );
284
		curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_TIMEOUT , $this -> _CURLOPT_TIMEOUT );
285
		curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_COOKIEJAR , $this -> _CURLOPT_COOKIEJAR );
286
		curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_COOKIEFILE , $this -> _CURLOPT_COOKIEFILE );
287
		curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_HEADER , $this -> _CURLOPT_HEADER);
288
 
289
		// to fix the ssl related issues
290
		curl_setopt($this->_CURL_RESOURCE, CURLOPT_SSL_VERIFYHOST, 0);
291
		curl_setopt($this->_CURL_RESOURCE, CURLOPT_SSL_VERIFYPEER, 0);
292
 
293
		// user agent assignment, if the url is not the main website
294
		if (stristr($url, SP_MAIN_SITE)) {
295
		    $this -> _CURLOPT_USERAGENT = "";
296
		} else {
297
		    $ugKey = false;
298
		    if (stristr($url, 'google.')) {
299
		        $ugKey = 'google';
300
		    } else if (stristr($url, 'bing.')) {
301
		        $ugKey = 'bing';
302
		    }
303
 
304
    		$this->_CURLOPT_USERAGENT = $this->getUserAgent($ugKey);
305
    		if( strlen( $this -> _CURLOPT_USERAGENT ) > 0 ) {
306
    			curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_USERAGENT, $this -> _CURLOPT_USERAGENT );
307
    		}
308
		}
309
 
310
		// set custom headers for google domains
311
		if (stristr($url, 'google.')) {
312
			$this->setCustomHeaders();
313
		}
314
 
315
		// to add the curl http headers
316
		if (!empty($this ->_CURL_HTTPHEADER)) {
317
			curl_setopt($this->_CURL_RESOURCE, CURLOPT_HTTPHEADER, $this ->_CURL_HTTPHEADER);
318
		}
319
 
320
		if(!empty($this -> _CURLOPT_COOKIE)) curl_setopt( $this -> _CURL_RESOURCE, CURLOPT_COOKIE , $this -> _CURLOPT_COOKIE );
321
		if(!empty($this-> _CURLOPT_REFERER)){
322
			curl_setopt($this -> _CURL_RESOURCE, CURLOPT_REFERER, $this-> _CURLOPT_REFERER);
323
		}
324
 
325
		if( strlen( $this -> _CURLOPT_POSTFIELDS ) > 1 ) {
326
			curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_POST , $this -> _CURLOPT_POST );
327
			curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_POSTFIELDS , $this -> _CURLOPT_POSTFIELDS );
328
		}
329
 
330
		if( strlen( $this -> _CURLOPT_USERPWD ) > 2 ) {
331
			curl_setopt( $this -> _CURL_RESOURCE , CURLOPT_USERPWD, $this -> _CURLOPT_USERPWD );
332
		}
333
 
334
		// to use proxy if proxy enabled
335
		if (SP_ENABLE_PROXY && $enableProxy) {
336
			$proxyCtrler = New ProxyController();
337
			if ($proxyInfo = $proxyCtrler->getRandomProxy()) {
338
				curl_setopt($this -> _CURL_RESOURCE, CURLOPT_PROXY, $proxyInfo['proxy'].":".$proxyInfo['port']);
339
 
340
				if (CURLOPT_HTTPPROXYTUNNEL_VAL) {
341
					curl_setopt($this -> _CURL_RESOURCE, CURLOPT_HTTPPROXYTUNNEL, CURLOPT_HTTPPROXYTUNNEL_VAL);
342
				}
343
 
344
				if (!empty($proxyInfo['proxy_auth'])) {
345
					curl_setopt ($this -> _CURL_RESOURCE, CURLOPT_PROXYUSERPWD, $proxyInfo['proxy_username'].":".$proxyInfo['proxy_password']);
346
				}
347
 
348
			} else {
349
			    showErrorMsg("No active proxies found!! Please check your proxy settings from Admin Panel.");
350
			}
351
		}
352
 
353
		$ret = [];
354
		$ret['page'] = curl_exec( $this -> _CURL_RESOURCE );
355
		$ret['error'] = curl_errno( $this -> _CURL_RESOURCE );
356
		$ret['errmsg'] = curl_error( $this -> _CURL_RESOURCE );
357
 
358
		$this->effectiveUrl = curl_getinfo($this -> _CURL_RESOURCE, CURLINFO_EFFECTIVE_URL);
359
 
360
		// update crawl log in database for future reference
361
		if ($logCrawl) {
362
			$crawlLogCtrl = new CrawlLogController();
363
			$crawlInfo = [];
364
			$crawlInfo['crawl_status'] = $ret['error'] ? 0 : 1;
365
			$crawlInfo['ref_id'] = $crawlInfo['crawl_link'] = addslashes($this->effectiveUrl);
366
			$crawlInfo['crawl_referer'] = addslashes($this-> _CURLOPT_REFERER);
367
			$crawlInfo['crawl_cookie'] = addslashes($this -> _CURLOPT_COOKIE);
368
			$crawlInfo['crawl_post_fields'] = addslashes($this -> _CURLOPT_POSTFIELDS);
369
			$crawlInfo['crawl_useragent'] = addslashes($this->_CURLOPT_USERAGENT);
370
			$crawlInfo['proxy_id'] = intval($proxyInfo['id']);
371
			$crawlInfo['log_message'] = addslashes($ret['errmsg']);
372
			$ret['log_id'] = $crawlLogCtrl->createCrawlLog($crawlInfo);
373
		}
374
 
375
		// disable proxy if not working
376
		if (SP_ENABLE_PROXY && $enableProxy && !empty($ret['error']) && !empty($proxyInfo['id'])) {
377
 
378
			// deactivate proxy
379
			if (PROXY_DEACTIVATE_CRAWL) {
380
				$proxyCtrler->__changeStatus($proxyInfo['id'], 0);
381
			}
382
 
383
			// chekc with another proxy
384
			if (CHECK_WITH_ANOTHER_PROXY_IF_FAILED) {
385
				$ret = $this->getContent($url, $enableProxy);
386
			}
387
		}
388
 
389
		// debug run time if enabled
390
		$this->debugRunTime($ret);
391
 
392
		return $ret;
393
	}
394
 
395
	# function to debug runtime
396
	function debugRunTime($ret) {
397
 
398
		// check debug request is enabled
399
		if (!empty($_GET['debug']) || !empty($_POST['debug'])) {
400
			?>
401
			<div style="width: 760px; margin-top: 30px; padding: 14px; height: 900px; overflow: auto; border: 1px solid #B0C2CC;">
402
				<?php
403
				if ( ($_GET['debug_format'] == 'html') || ($_POST['debug_format'] == 'html') ) {
404
					highlight_string($ret['page']);
405
				} else {
406
					debugVar($ret, false);
407
				}
408
				?>
409
			</div>
410
			<?php
411
		}
412
 
413
	}
414
 
415
	# func to get session id
416
	function getSessionId($page){
417
		if (preg_match('/PHPSESSID=(.*?);/', $page, $result)) {
418
			return $result[1];
419
		} else {
420
			return false;
421
		}
422
	}
423
 
424
	# func to check proxy
425
	function checkProxy($proxyInfo) {
426
		$this->_CURLOPT_USERAGENT = $this->getUserAgent();
427
		$ch = curl_init();
428
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
429
		curl_setopt($ch, CURLOPT_PROXY, $proxyInfo['proxy'].":".$proxyInfo['port']);
430
		curl_setopt($ch, CURLOPT_HEADER, 1);
431
		curl_setopt($ch, CURLOPT_USERAGENT, $this->_CURLOPT_USERAGENT);
432
		curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
433
 
434
		if (CURLOPT_HTTPPROXYTUNNEL_VAL) {
435
			curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, CURLOPT_HTTPPROXYTUNNEL_VAL);
436
		}
437
 
438
		if (!empty($proxyInfo['proxy_auth'])) {
439
			curl_setopt ($ch, CURLOPT_PROXYUSERPWD, $proxyInfo['proxy_username'].":".$proxyInfo['proxy_password']);
440
		}
441
 
442
		// set custom headers
443
		$this->setCustomHeaders();
444
 
445
		// to add the curl http headers
446
		if (!empty($this ->_CURL_HTTPHEADER)) {
447
			curl_setopt($ch, CURLOPT_HTTPHEADER, $this ->_CURL_HTTPHEADER);
448
		}
449
 
450
		// to fix the ssl related issues
451
		curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
452
		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
453
 
454
		curl_setopt($ch, CURLOPT_URL, "http://www.google.com/search?q=twitter");
455
		$ret['page'] = curl_exec( $ch );
456
		$ret['error'] = curl_errno( $ch );
457
		$ret['errmsg'] = curl_error( $ch );
458
		curl_close($ch);
459
 
460
		// if no error check whether the ouput contains twitter keyword
461
		if (empty($ret['error'])) {
462
 
463
			// is captcha found in search results
464
			if (SearchEngineController::isCaptchInSearchResults($ret['page'])) {
465
				$ret['error'] = "Capctha found in the results";
466
				$ret['errmsg'] = strtok($ret['page'], "\n");
467
			} elseif(!stristr($ret['page'], 'twitter')) {
468
				$ret['error'] = "Page not contains twitter keyword";
469
				$ret['errmsg'] = strtok($ret['page'], "\n");
470
			}
471
 
472
		}
473
 
474
		// debug run time if enabled
475
		$this->debugRunTime($ret);
476
 
477
		return $ret;
478
	}
479
 
480
	// function to get the header of url
481
  public static function getHeader($url, $followRedirects = true){
482
		$ch = curl_init();
483
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
484
		curl_setopt($ch, CURLOPT_URL, $url);
485
		curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
486
		curl_setopt($ch, CURLOPT_USERAGENT, SP_USER_AGENT);
487
		if($followRedirects){
488
			curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
489
		}
490
		curl_setopt($ch, CURLOPT_MAXREDIRS, 4);
491
 
492
		// Only calling the head
493
		curl_setopt($ch, CURLOPT_HEADER, true); // header will be at output
494
		curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'HEAD'); // HTTP request is 'HEAD'
495
 
496
		// to fix the ssl related issues
497
		curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
498
		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
499
 
500
		$content = curl_exec ($ch);
501
		curl_close ($ch);
502
		return $content;
503
	}
504
 
505
	// function to check whether link is brocke
506
	public static function isLInkBrocken($url) {
507
	    $header = Spider::getHeader($url);
508
	    if (stristr($header, '404 Not Found')) {
509
	        return true;
510
	    } else {
511
	        return 0;
512
	    }
513
	}
514
 
515
	// function to check whether link is a redirect
516
	public static function isLinkRedirect($url) {
517
			$followRedirects = false; //don't follow with cURL as we need that info.
518
			$header = $this->getHeader($url, $followRedirects);
519
			if (stristr($header, '301 Moved Permanently') || stristr($header, '308 Permanent Redirect')) {
520
					return true;
521
			} else {
522
					return 0;
523
			}
524
	}
525
}
526
?>