Subversion Repositories cheapmusic

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
103 - 1
<?php
2
 
3
/***************************************************************************
4
 *   Copyright (C) 2009-2011 by Geo Varghese(www.seopanel.in)  	   *
5
 *   sendtogeo@gmail.com   												   *
6
 *                                                                         *
7
 *   This program is free software; you can redistribute it and/or modify  *
8
 *   it under the terms of the GNU General Public License as published by  *
9
 *   the Free Software Foundation; either version 2 of the License, or     *
10
 *   (at your option) any later version.                                   *
11
 *                                                                         *
12
 *   This program is distributed in the hope that it will be useful,       *
13
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15
 *   GNU General Public License for more details.                          *
16
 *                                                                         *
17
 *   You should have received a copy of the GNU General Public License     *
18
 *   along with this program; if not, write to the                         *
19
 *   Free Software Foundation, Inc.,                                       *
20
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
21
 ***************************************************************************/
22
 
23
# class defines all sitemap controller functions
24
class SitemapController extends Controller{
25
 
26
	var $smLimit = 50000;			# number of pages in a sitemap
27
 	var $baseUrl;					# base url of page
28
 	var $smType = 'xml';			# the type of sitemap file should be created
29
 	var $urlList;					# the list of urls crawled from a site
30
 	var $hostName;					# hostname of the site
31
 	var $spider;					# spider object
32
 	var $sleep = 0;					# sleep b/w the page crawl in seconds
33
 	var $excludeUrl = "";			# url to be excluded
34
 	var $changefreq = "always";		# page modification frequency
35
 	var $priority = 0.5;			# priority of a page
36
 	var $lastmod;				 	# page last modification date
37
 	var $smheader;					# sitemap header
38
 	var $smfooter;					# sitemap footer
39
 	var $smfile = "";				# sitemap file
40
 	var $section = "";              # sitemap website
41
 	var $sitemapDir = "";			# sitemap directory where sitemap is created
42
 
43
	# func to show sitemap generator interface
44
	function showSitemapGenerator() {
45
 
46
		$userId = isLoggedIn();
47
		$saCtrler = $this->createController('SiteAuditor');
48
	    $where = isAdmin() ? "" : " and w.user_id=$userId";
49
	    $pList = $saCtrler->getAllProjects($where);
50
	    $projectList = array();
51
	    foreach($pList as $pInfo) {
52
		    $pInfo['total_links'] = $saCtrler->getCountcrawledLinks($pInfo['id']);
53
		    if ($pInfo['total_links'] > 0) {
54
	            $projectList[] = $pInfo;
55
		    }
56
	    }
57
 
58
	    if(empty($projectList)) {
59
            $spTextSA = $this->getLanguageTexts('siteauditor', $_SESSION['lang_code']);
60
	        showErrorMsg($spTextSA['No active projects found'].'!');
61
        }
62
 
63
	    $this->set('projectList', $projectList);
64
		$this->render('sitemap/showsitemap');
65
	}
66
 
67
	# func to generate sitemap
68
 	function generateSitemapFile($sitemapInfo){
69
 
70
 		$sitemapInfo['project_id'] = intval($sitemapInfo['project_id']);
71
 		if(!empty($sitemapInfo['project_id'])){
72
 
73
 			# check whether the sitemap directory is writable
74
 			if(!is_writable(SP_TMPPATH ."/".$this->sitemapDir)){
75
 				hideDiv('message');
76
 				showErrorMsg("Directory '<b>".SP_TMPPATH ."/".$this->sitemapDir."</b>' is not <b>writable</b>. Please change its <b>permission</b> !");
77
 			}
78
 
79
	        $saCtrler = $this->createController('SiteAuditor');
80
 			$projectInfo = $saCtrler->__getProjectInfo($sitemapInfo['project_id']);
81
 			$this->section = formatFileName($projectInfo['name']);
82
 
83
			$this->smType = $sitemapInfo['sm_type'];
84
			$this->excludeUrl = $sitemapInfo['exclude_url'];
85
			if(!empty($sitemapInfo['freq'])) $this->changefreq = $sitemapInfo['freq'];
86
			if(!empty($sitemapInfo['priority'])) $this->priority = $sitemapInfo['priority'];
87
			$auditorComp = $this->createComponent('AuditorComponent');
88
			$pageList = $auditorComp->getAllreportPages(" and project_id=".$sitemapInfo['project_id']);
89
			$urlList = array();
90
			foreach ($pageList as $pageInfo) {
91
			    $pageInfo['page_url'] = Spider::addTrailingSlash($pageInfo['page_url']);
92
			    if ($auditorComp->isExcludeLink($pageInfo['page_url'], trim($sitemapInfo['exclude_url']))) continue;
93
			    $urlList[] = $pageInfo['page_url'];
94
			}
95
			$this->createSitemap($this->smType, $urlList);
96
 		}else{
97
 			hideDiv('message');
98
 			showErrorMsg("No Website Found!");
99
 		}
100
 	}
101
 
102
 	# Create new sitemaps and index file
103
 	function createSitemap($smType="", $urlList="") {
104
 
105
 		if(!empty($smType)){
106
 			$this->smType = $smType;
107
 		}
108
 
109
 		print("<p class=\"note noteleft\">".$_SESSION['text']['common']['Found']." <a>".count($urlList)."</a> Sitemap Urls</p>");
110
 		$function = $this->smType ."SitemapFile";
111
 		$this->deleteSitemapFiles();
112
 		$this->$function($urlList);
113
 		$this->showSitemapFiles();
114
 
115
	}
116
 
117
	# func to get a sitemap urls of a site
118
	function getSitemapUrls(){
119
		$this->urlList = array();
120
		$this->crawlSitemapUrls($this->baseUrl, true);
121
	}
122
 
123
	# func to crawl sitemap urls
124
	function crawlSitemapUrls($baseUrl, $recursive=false){
125
 
126
		if($this->urlList[$baseUrl]['visit'] == 1) return;
127
		$this->urlList[$baseUrl]['visit'] = 1;
128
 
129
		$urlList = $this->spider->getUniqueUrls($baseUrl);
130
		$hostName = $this->hostName;
131
 
132
		foreach($urlList as $href){
133
			if(preg_match('/\.zip$|\.gz$|\.tar$|\.png$|\.jpg$|\.jpeg$|\.gif$|\.mp3$/i', $href)) continue;
134
			$urlInfo = @parse_url($href);
135
 
136
			$urlHostName = str_replace('www.', '', $urlInfo['host']);
137
			if(empty($urlHostName)){
138
				$href = $this->baseUrl.$href;
139
			}else{
140
				if($urlHostName != $hostName){
141
					continue;
142
				}
143
			}
144
 
145
			$href = $this->spider->formatUrl($href);
146
			$href = preg_replace('/http:\/\/.*?\//i', $this->baseUrl, $href);
147
			if(!empty( $this->excludeUrl) && stristr($href, $this->excludeUrl)) continue;
148
			if(!isset($this->urlList[$href]['visit']) && !isset($this->urlList[$href.'/']['visit'])){
149
				$this->urlList[$href]['visit'] = 0;
150
				if($recursive){
151
					sleep($this->sleep);
152
					$this->crawlSitemapUrls($href,true);
153
				}
154
			}
155
		}
156
	}
157
 
158
	# create text sitemap file
159
	function txtSitemapFile($urlList) {
160
		$this->smheader = '';
161
		$this->smfooter = '';
162
		$smxml = "";
163
		foreach($urlList as $this->loc){
164
			$smxml .= $this->loc ."\n";
165
		}
166
		$this->smfile = $this->section ."_sitemap1.".$this->smType;
167
		$this->createSitemapFile($smxml);
168
	}
169
 
170
	# create Html sitemap file
171
	function htmlSitemapFile($urlList) {
172
		$this->smheader = '';
173
		$this->smfooter = '';
174
		$smxml = "";
175
		foreach($urlList as $this->loc){
176
			$smxml .= "<a href='$this->loc'>$this->loc</a><br>";
177
		}
178
		$this->smfile = $this->section ."_sitemap1.".$this->smType;
179
		$this->createSitemapFile($smxml);
180
	}
181
 
182
 
183
	# create xml sitemap file
184
	function xmlSitemapFile($urlList) {
185
		$this->lastmod = Date("Y-m-d");
186
		$this->smheader = '<?xml version="1.0" encoding="UTF-8"?>
187
		<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
188
		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
189
		xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
190
		http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"><!-- created with Seo Panel:www.seopanel.in -->';
191
		$this->smfooter = '</urlset>';
192
		$index = 1;
193
		$rowcount = 0;
194
		$smxml = "";
195
 
196
		foreach($urlList as $this->loc){
197
			$smxml .= $this->createUrlXmlText();
198
			if(($this->smLimit -1) == $rowcount++){
199
 
200
				# create sitemap file when tot url count equal max count
201
				$this->smfile = $this->section ."_sitemap". $index . ".".$this->smType;
202
				$this->createSitemapFile($smxml);
203
				$rowcount = 0;
204
				$smxml = "";
205
				$index++;
206
			}
207
		}
208
 
209
		# to create sitemap file with rest of urls
210
		if(!empty($smxml)){
211
			$this->smfile = $this->section ."_sitemap". $index . ".xml";
212
			$this->createSitemapFile($smxml);
213
		}
214
	}
215
 
216
	function showSitemapFiles(){
217
		if ($handle = opendir(SP_TMPPATH ."/".$this->sitemapDir)) {
218
		    while (false !== ($file = readdir($handle))) {
219
		        if ( ($file != ".") && ($file != "..") ) {
220
		        	if(preg_match("/".$this->section."_sitemap\d+\.".$this->smType."/", $file, $matches)){
221
		        		echo "<p class=\"note noteleft\">
222
		        				".$this->spTextSitemap['Download sitemap file from'].":
223
		        				<a href='".SP_WEBPATH."/download.php?filesec=sitemap&filetype=$this->smType&file=".urlencode($matches[0])."' target='_blank'>$file</a>
224
		        			</p>";
225
		        	}
226
		        }
227
		    }
228
		    closedir($handle);
229
		}
230
	}
231
 
232
	function deleteSitemapFiles(){
233
		if ($handle = opendir(SP_TMPPATH ."/".$this->sitemapDir)) {
234
		    while (false !== ($file = readdir($handle))) {
235
		        if ( ($file != ".") && ($file != "..") ) {
236
		        	if(preg_match("/".preg_quote($this->section, '/')."_sitemap\d+\.".$this->smType."/", $file, $matches)){
237
		        		unlink(SP_TMPPATH ."/".$this->sitemapDir."/$file");
238
		        	}
239
		        }
240
		    }
241
		    closedir($handle);
242
		}
243
	}
244
 
245
	# create url xml text
246
	function createUrlXmlText() {
247
		$xmltext =
248
		'
249
		<url>
250
			<loc><![CDATA['.$this->loc.']]></loc>
251
		   	<lastmod>'.$this->lastmod.'</lastmod>
252
		    <changefreq>'.$this->changefreq.'</changefreq>
253
		    <priority>'.$this->priority.'</priority>
254
	 	</url>
255
	 	';
256
	 	return $xmltext;
257
	}
258
 
259
	# create sitemap file
260
	function createSitemapFile($smxml) {
261
		$fp = fopen(SP_TMPPATH ."/".$this->sitemapDir."/" .$this->smfile, 'w');
262
		$smxml = $this->smheader . $smxml . $this->smfooter;
263
		fwrite($fp, $smxml);
264
		fclose($fp);
265
	}
266
 
267
 
268
	# function to create encoded url for sitemap
269
	function getEncodedUrl($url){
270
 
271
		# convert url to entity encoded
272
		$url = str_replace(array('&',"'",'"','>','<'," "), array('&amp;','&apos;','&quot;','&gt;','&lt;','_'), $url);
273
		return $url;
274
	}
275
 
276
}
277
?>