Subversion Repositories cheapmusic

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
103 - 1
<?php
2
 
3
/***************************************************************************
4
 *   Copyright (C) 2009-2011 by Geo Varghese(www.seopanel.in)  	   		   *
5
 *   sendtogeo@gmail.com   												   *
6
 *                                                                         *
7
 *   This program is free software; you can redistribute it and/or modify  *
8
 *   it under the terms of the GNU General Public License as published by  *
9
 *   the Free Software Foundation; either version 2 of the License, or     *
10
 *   (at your option) any later version.                                   *
11
 *                                                                         *
12
 *   This program is distributed in the hope that it will be useful,       *
13
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15
 *   GNU General Public License for more details.                          *
16
 *                                                                         *
17
 *   You should have received a copy of the GNU General Public License     *
18
 *   along with this program; if not, write to the                         *
19
 *   Free Software Foundation, Inc.,                                       *
20
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
21
 ***************************************************************************/
22
 
23
# class defines all site auditor controller functions
24
class AuditorComponent extends Controller{
25
 
26
    var $commentInfo = array(); // to store the details about the score of each page
27
 
28
    // function to save report info
29
    function saveReportInfo($reportInfo, $action='create') {
30
        if ($action == 'create') {
31
			$dateTime = date('Y-m-d H:i:s');
32
            $reportKeys = array_keys($reportInfo);
33
            $reportValues = array_values($reportInfo);
34
            $sql = "insert into auditorreports(".implode(',', $reportKeys).", updated) values('".implode("','", $reportValues)."', '$dateTime')";
35
        } elseif($action == 'update') {
36
            $sql = "Update auditorreports set ";
37
            foreach ($reportInfo as $col => $value) {
38
                if ($col != 'id') {
39
                    $sql .= "$col='$value',";
40
                }
41
            }
42
            $sql = preg_replace('/\,$/', '', $sql);
43
            $sql .= " where id=".$reportInfo['id'];
44
        }
45
        $this->db->query($sql);
46
    }
47
 
48
    // func to run report for a project
49
    function runReport($reportUrl, $projectInfo, $totalLinks) {
50
        $spider = new Spider();
51
 
52
        if ($rInfo = $this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='$reportUrl'") ) {
53
 
54
        	$pageInfo = $spider->getPageInfo($reportUrl, $projectInfo['url'], true);
55
 
56
            // handle redirects
57
            if(!empty($spider->effectiveUrl)) {
58
                $effectiveUrl = rtrim($spider->effectiveUrl, '/'); //remove trailing slash
59
                $reportId = $rInfo['id'];
60
 
61
                if ($effectiveUrl != $reportUrl){ //redirect occurred. Could be simply www vs. no www
62
					$parse = parse_url($effectiveUrl);
63
					$effectiveDomain = str_replace("www.", '', $parse['host']);
64
                  	$parse = parse_url($projectInfo['url']);
65
                  	$projectDomain = str_replace("www.", '', $parse['host']);
66
 
67
                  	if ($effectiveDomain == $projectDomain) { //still on same domain
68
 
69
                      	// check if we already have an entry for the effective URL
70
						if ($rInfoForEffectiveUrl = $this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='$effectiveUrl'")){
71
 
72
							// If we already have an entry then we can delete this new one and not continue running tests on it as it's a duplicate
73
							$this->db->query("delete from auditorreports where id=$reportId");
74
 
75
                        	// if already existing effective url is not crawled, continue with the page crawl details and save
76
	                        if ($rInfoForEffectiveUrl['crawled'] == 0) {
77
							$rInfo = $rInfoForEffectiveUrl;
78
	                        	$reportId = $rInfo['id'];
79
	                        } else {
80
	                        	return $effectiveUrl; //Redirected to existing URL
81
	                        }
82
 
83
						} else { //if we don't already have an entry, update this one
84
                        	$this->db->query("update auditorreports set page_url='$effectiveUrl' where id=$reportId");
85
                        	$reportUrl = $effectiveUrl;
86
                      	}
87
 
88
					} else { //external link -- delete it from report
89
						$this->db->query("delete from auditorreports where id=$reportId");
90
                    	return "Error: External Link Found";
91
					}
92
 
93
				}
94
            }
95
 
96
            $reportInfo['id'] = $rInfo['id'];
97
            $reportInfo['page_title'] = addslashes($pageInfo['page_title']);
98
            $reportInfo['page_description'] = addslashes($pageInfo['page_description']);
99
            $reportInfo['page_keywords'] = addslashes($pageInfo['page_keywords']);
100
            $reportInfo['total_links'] = intval($pageInfo['total_links']);
101
            $reportInfo['external_links'] = intval($pageInfo['external']);
102
            $reportInfo['crawled'] = 1;
103
 
104
            // gooogle pagerank check
105
            if ($projectInfo['check_pr']) {
106
            	$mozCtrler = $this->createController('Moz');
107
            	$mozRankList = $mozCtrler->__getMozRankInfo(array($reportUrl));
108
            	$reportInfo['pagerank'] = !empty($mozRankList[0]['moz_rank']) ? $mozRankList[0]['moz_rank'] : 0;
109
            	$reportInfo['page_authority'] = !empty($mozRankList[0]['page_authority']) ? $mozRankList[0]['page_authority'] : 0;
110
            }
111
 
112
            // backlinks page check
113
            if ($projectInfo['check_backlinks']) {
114
                $backlinkCtrler = $this->createController('Backlink');
115
                $backlinkCtrler->url = Spider::addTrailingSlash($reportUrl);
116
                $reportInfo['bing_backlinks'] = $backlinkCtrler->__getBacklinks('msn');
117
                $reportInfo['google_backlinks'] = $backlinkCtrler->__getBacklinks('google');
118
            }
119
 
120
            // indexed page check
121
            if ($projectInfo['check_indexed']) {
122
                $saturationCtrler = $this->createController('SaturationChecker');
123
                $saturationCtrler->url = Spider::addTrailingSlash($reportUrl);
124
                $reportInfo['bing_indexed'] = $saturationCtrler->__getSaturationRank('msn');
125
                $reportInfo['google_indexed'] = $saturationCtrler->__getSaturationRank('google');
126
            }
127
 
128
            if ($projectInfo['check_brocken']) {
129
                $reportInfo['brocken'] = Spider::isLInkBrocken($linkInfo['link_url']);
130
            }
131
 
132
            $this->saveReportInfo($reportInfo, 'update');
133
 
134
            // to store sitelinks in page and links reports
135
            $i = 0;
136
            if (count($pageInfo['site_links']) > 0) {
137
 
138
            	// loo through site links
139
                foreach ($pageInfo['site_links'] as $linkInfo) {
140
                    // if store links
141
                    if ($projectInfo['store_links_in_page']) {
142
                        $delete = $i++ ? false : true;
143
                        $linkInfo['report_id'] = $rInfo['id'];
144
                        $this->storePagelLinks($linkInfo, $delete);
145
                    }
146
 
147
                    // if total links saved less than max links allowed for a project
148
                    if ($totalLinks < $projectInfo['max_links']) {
149
 
150
                    	// check whether valid html serving link
151
                        if(preg_match('/\.zip$|\.gz$|\.tar$|\.png$|\.jpg$|\.jpeg$|\.gif$|\.mp3$|\.flv$|\.pdf$|\.m4a$|#$/i', $linkInfo['link_url'])) continue;
152
 
153
                        // if found any space in the link
154
                        $linkInfo['link_url'] = Spider::formatUrl($linkInfo['link_url']);
155
                        if (!preg_match('/\S+/', $linkInfo['link_url'])) continue;
156
 
157
                        // check whether url needs to be excluded
158
                        if ($this->isExcludeLink($linkInfo['link_url'], $projectInfo['exclude_links'])) continue;
159
 
160
                        // save links for the project report
161
                        if (!$this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='{$linkInfo['link_url']}'")) {
162
        		            $repInfo['page_url'] = $linkInfo['link_url'];
163
        		            $repInfo['project_id'] = $projectInfo['id'];
164
        		            $this->saveReportInfo($repInfo);
165
        		            $totalLinks++;
166
                        }
167
                    }
168
                }
169
            }
170
 
171
            // to store external links in page
172
            if ($projectInfo['store_links_in_page']) {
173
                if (count($pageInfo['external_links']) > 0) {
174
                    foreach ($pageInfo['external_links'] as $linkInfo) {
175
                        $delete = $i++ ? false : true;
176
                        $linkInfo['report_id'] = $rInfo['id'];
177
                        $linkInfo['extrenal'] = 1;
178
                        $this->storePagelLinks($linkInfo, $delete);
179
                    }
180
                }
181
            }
182
 
183
            // calculate score of each page and update it
184
            $this->updateReportPageScore($rInfo['id']);
185
 
186
            // calculate score of each page and update it
187
            $this->updateProjectPageScore($projectInfo['id']);
188
        }
189
 
190
        return $reportUrl;
191
    }
192
 
193
    // function to get report info
194
    function getReportInfo($where) {
195
	    $sql = "SELECT * FROM auditorreports where 1=1 $where";
196
		$listInfo = $this->db->select($sql, true);
197
		return empty($listInfo['id']) ? false : $listInfo;
198
	}
199
 
200
    // function to store link of page
201
    function storePagelLinks($linkInfo, $delete=false) {
202
 
203
        foreach ($linkInfo as $col => $val) {
204
            $linkInfo[$col] = addslashes($val);
205
        }
206
 
207
        if ($delete) {
208
            $sql = "Delete from auditorpagelinks where report_id=".$linkInfo['report_id'];
209
            $this->db->query($sql);
210
        }
211
 
212
        $linkKeys = array_keys($linkInfo);
213
        $linkValues = array_values($linkInfo);
214
        $sql = "insert into auditorpagelinks(".implode(',', $linkKeys).") values('".implode("','", $linkValues)."')";
215
        $this->db->query($sql);
216
    }
217
 
218
    // function to check whether link should be excluded or not
219
    function isExcludeLink($link, $excludeList) {
220
        $exclude =  false;
221
        if (!empty($excludeList)) {
222
            $excludeList = explode(',', $excludeList);
223
            foreach ($excludeList as $exUrl) {
224
                if (stristr($link, trim($exUrl))) {
225
                    $exclude = true;
226
                    break;
227
                }
228
            }
229
        }
230
        return $exclude;
231
    }
232
 
233
    // function to find the score of a report page
234
    function updateReportPageScore($reportId) {
235
        $reportInfo = $this->getReportInfo(" and id=$reportId");
236
        $scoreInfo = $this->countReportPageScore($reportInfo);
237
        $score =  array_sum($scoreInfo);
238
        $sql = "update auditorreports set score=$score where id=$reportId";
239
        $this->db->query($sql);
240
    }
241
 
242
    // function to count report page score
243
    function countReportPageScore($reportInfo) {
244
        $scoreInfo = array();
245
        $this->commentInfo = array();
246
        $spTextSA = $this->getLanguageTexts('siteauditor', $_SESSION['lang_code']);
247
 
248
        // check page title length
249
        $lengTitle = strlen($reportInfo['page_title']);
250
        if ( ($lengTitle <= SA_TITLE_MAX_LENGTH) && ($lengTitle >= SA_TITLE_MIN_LENGTH) ) {
251
            $scoreInfo['page_title'] = 1;
252
        } else {
253
            $scoreInfo['page_title'] = -1;
254
            $msg = $spTextSA["The page title length is not between"]." ".SA_TITLE_MIN_LENGTH." & ".SA_TITLE_MAX_LENGTH;
255
            $this->commentInfo['page_title'] = formatErrorMsg($msg, 'error', '');
256
        }
257
 
258
        // check meta description length
259
        $lengDes = strlen($reportInfo['page_description']);
260
        if ( ($lengDes <= SA_DES_MAX_LENGTH) && ($lengDes >= SA_DES_MIN_LENGTH) ) {
261
            $scoreInfo['page_description'] = 1;
262
        } else {
263
            $scoreInfo['page_description'] = -1;
264
            $msg = $spTextSA["The page description length is not between"]." ".SA_DES_MIN_LENGTH." and ".SA_DES_MAX_LENGTH;
265
            $this->commentInfo['page_description'] = formatErrorMsg($msg, 'error', '');
266
        }
267
 
268
        // check meta keywords length
269
        $lengKey = strlen($reportInfo['page_keywords']);
270
        if ( ($lengKey <= SA_KEY_MAX_LENGTH) && ($lengKey >= SA_KEY_MIN_LENGTH) ) {
271
            $scoreInfo['page_keywords'] = 1;
272
        } else {
273
            $scoreInfo['page_keywords'] = -1;
274
            $msg = $spTextSA["The page keywords length is not between"]." ".SA_KEY_MIN_LENGTH." and ".SA_KEY_MAX_LENGTH;
275
            $this->commentInfo['page_keywords'] = formatErrorMsg($msg, 'error', '');
276
        }
277
 
278
        // if link brocken
279
        if ($reportInfo['brocken']) {
280
            $scoreInfo['brocken'] = -1;
281
            $msg = $spTextSA["The page is brocken"];
282
            $this->commentInfo['brocken'] = formatErrorMsg($msg, 'error', '');
283
        }
284
 
285
        // if total links of a page
286
        if ($reportInfo['total_links'] >= SA_TOTAL_LINKS_MAX) {
287
            $scoreInfo['total_links'] = -1;
288
            $msg = $spTextSA["The total number of links in page is greater than"]." ".SA_TOTAL_LINKS_MAX;
289
            $this->commentInfo['page_keywords'] = formatErrorMsg($msg, 'error', '');
290
        }
291
 
292
        // check google pagerank
293
        if ($reportInfo['pagerank'] >= SA_PR_CHECK_LEVEL_SECOND) {
294
            $scoreInfo['pagerank'] = $reportInfo['pagerank'] * 3;
295
            $msg = $spTextSA["The page is having exellent pagerank"];
296
            $this->commentInfo['pagerank'] = formatSuccessMsg($msg);
297
        } else if ($reportInfo['pagerank'] >= SA_PR_CHECK_LEVEL_FIRST) {
298
            $scoreInfo['pagerank'] = $reportInfo['pagerank'] * 2;
299
            $msg = $spTextSA["The page is having very good pagerank"];
300
            $this->commentInfo['pagerank'] = formatSuccessMsg($msg);
301
        } else if ($reportInfo['pagerank']) {
302
            $scoreInfo['pagerank'] = 1;
303
            $msg = $spTextSA["The page is having good pagerank"];
304
            $this->commentInfo['pagerank'] = formatSuccessMsg($msg);
305
        } else {
306
            $scoreInfo['pagerank'] = 0;
307
            $msg = $spTextSA["The page is having poor pagerank"];
308
            $this->commentInfo['pagerank'] = formatErrorMsg($msg, 'error', '');
309
        }
310
 
311
        // check page authority value
312
        if ($reportInfo['page_authority'] >= SA_PA_CHECK_LEVEL_SECOND) {
313
        	$scoreInfo['page_authority'] = 6;
314
        	$msg = $spTextSA["The page is having excellent page authority value"];
315
        	$this->commentInfo['page_authority'] = formatSuccessMsg($msg);
316
        } else if ($reportInfo['page_authority'] >= SA_PA_CHECK_LEVEL_FIRST) {
317
        	$scoreInfo['page_authority'] = 3;
318
        	$msg = $spTextSA["The page is having very good page authority value"];
319
        	$this->commentInfo['page_authority'] = formatSuccessMsg($msg);
320
        } else if ($reportInfo['page_authority']) {
321
        	$scoreInfo['page_authority'] = 1;
322
        	$msg = $spTextSA["The page is having good page authority value"];
323
        	$this->commentInfo['page_authority'] = formatSuccessMsg($msg);
324
        } else {
325
        	$scoreInfo['page_authority'] = 0;
326
        	$msg = $spTextSA["The page is having poor page authority value"];
327
        	$this->commentInfo['page_authority'] = formatErrorMsg($msg, 'error', '');
328
        }
329
 
330
        // check backlinks
331
        $seArr = array('google', 'bing');
332
        foreach ($seArr as $se) {
333
            $label = $se.'_backlinks';
334
            if ($reportInfo[$label] >= SA_BL_CHECK_LEVEL) {
335
                $scoreInfo[$label] = 2;
336
                $msg = $spTextSA["The page is having exellent number of backlinks in"]." ".$se;
337
                $this->commentInfo[$label] = formatSuccessMsg($msg);
338
            } elseif($reportInfo[$label]) {
339
                $scoreInfo[$label] = 1;
340
                $msg = $spTextSA["The page is having good number of backlinks in"]." ".$se;
341
                $this->commentInfo[$label] = formatSuccessMsg($msg);
342
            } else {
343
                $scoreInfo[$label] = 0;
344
                $msg = $spTextSA["The page is not having backlinks in"]." ".$se;
345
                $this->commentInfo[$label] = formatErrorMsg($msg, 'error', '');
346
            }
347
        }
348
 
349
        // check whether indexed or not
350
        foreach ($seArr as $se) {
351
            $label = $se.'_indexed';
352
            if($reportInfo[$label]) {
353
                $scoreInfo[$label] = 1;
354
            } else {
355
                $scoreInfo[$label] = -1;
356
                $msg = $spTextSA["The page is not indexed in"]." ".$se;
357
                $this->commentInfo[$label] = formatErrorMsg($msg, 'error', '');
358
            }
359
        }
360
 
361
        return $scoreInfo;
362
    }
363
 
364
    // function to find the score of a project
365
    function updateProjectPageScore($projectId) {
366
        $sql = "select sum(score)/count(*) as avgscore from auditorreports where crawled=1 and project_id=$projectId";
367
        $listInfo = $this->db->select($sql, true);
368
		$score = empty($listInfo['avgscore']) ? 0 : $listInfo['avgscore'];
369
 
370
        $sql = "update auditorprojects set score=$score where id=$projectId";
371
        $this->db->query($sql);
372
    }
373
 
374
    // function to get all links of a page
375
    function getAllLinksPage($reportId) {
376
        $sql = "select * from auditorpagelinks where report_id=$reportId";
377
        $linkList = $this->db->select($sql);
378
        return $linkList;
379
    }
380
 
381
    // function to get duplicate meta contents info
382
    function getDuplicateMetaInfoCount($projectId, $col='page_title', $statusCheck=false, $statusVal=1) {
383
        $crawled = $statusCheck ? " and crawled=$statusVal" : "";
384
        $sql = "select $col,count(*) as count from auditorreports where project_id=$projectId and $col!='' $crawled group by $col having count>1";
385
        $list = $this->db->select($sql);
386
        $total = 0;
387
        foreach ($list as $info) {
388
            $total++;
389
        }
390
        return $total;
391
    }
392
 
393
    // function to get all report pages of a project
394
    function getAllreportPages($where='', $cols='*') {
395
	    $sql = "SELECT $cols FROM auditorreports where 1=1 $where";
396
		$list = $this->db->select($sql);
397
		return $list;
398
    }
399
 
400
}