Blame | Last modification | View Log | RSS feed
<?php/**************************************************************************** Copyright (C) 2009-2011 by Geo Varghese(www.seopanel.in) ** sendtogeo@gmail.com ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 2 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the ** Free Software Foundation, Inc., ** 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ****************************************************************************/# class defines all site auditor controller functionsclass AuditorComponent extends Controller{var $commentInfo = array(); // to store the details about the score of each page// function to save report infofunction saveReportInfo($reportInfo, $action='create') {if ($action == 'create') {$dateTime = date('Y-m-d H:i:s');$reportKeys = array_keys($reportInfo);$reportValues = array_values($reportInfo);$sql = "insert into auditorreports(".implode(',', $reportKeys).", updated) values('".implode("','", $reportValues)."', '$dateTime')";} elseif($action == 'update') {$sql = "Update auditorreports set ";foreach ($reportInfo as $col => $value) {if ($col != 'id') {$sql .= "$col='$value',";}}$sql = preg_replace('/\,$/', '', $sql);$sql .= " where id=".$reportInfo['id'];}$this->db->query($sql);}// func to run report for a projectfunction runReport($reportUrl, $projectInfo, $totalLinks) {$spider = new Spider();if ($rInfo = $this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='$reportUrl'") ) {$pageInfo = $spider->getPageInfo($reportUrl, $projectInfo['url'], true);// handle redirectsif(!empty($spider->effectiveUrl)) {$effectiveUrl = rtrim($spider->effectiveUrl, '/'); //remove trailing slash$reportId = $rInfo['id'];if ($effectiveUrl != $reportUrl){ //redirect occurred. Could be simply www vs. no www$parse = parse_url($effectiveUrl);$effectiveDomain = str_replace("www.", '', $parse['host']);$parse = parse_url($projectInfo['url']);$projectDomain = str_replace("www.", '', $parse['host']);if ($effectiveDomain == $projectDomain) { //still on same domain// check if we already have an entry for the effective URLif ($rInfoForEffectiveUrl = $this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='$effectiveUrl'")){// If we already have an entry then we can delete this new one and not continue running tests on it as it's a duplicate$this->db->query("delete from auditorreports where id=$reportId");// if already existing effective url is not crawled, continue with the page crawl details and saveif ($rInfoForEffectiveUrl['crawled'] == 0) {$rInfo = $rInfoForEffectiveUrl;$reportId = $rInfo['id'];} else {return $effectiveUrl; //Redirected to existing URL}} else { //if we don't already have an entry, update this one$this->db->query("update auditorreports set page_url='$effectiveUrl' where id=$reportId");$reportUrl = $effectiveUrl;}} else { //external link -- delete it from report$this->db->query("delete from auditorreports where id=$reportId");return "Error: External Link Found";}}}$reportInfo['id'] = $rInfo['id'];$reportInfo['page_title'] = addslashes($pageInfo['page_title']);$reportInfo['page_description'] = addslashes($pageInfo['page_description']);$reportInfo['page_keywords'] = addslashes($pageInfo['page_keywords']);$reportInfo['total_links'] = intval($pageInfo['total_links']);$reportInfo['external_links'] = intval($pageInfo['external']);$reportInfo['crawled'] = 1;// gooogle pagerank checkif ($projectInfo['check_pr']) {$mozCtrler = $this->createController('Moz');$mozRankList = $mozCtrler->__getMozRankInfo(array($reportUrl));$reportInfo['pagerank'] = !empty($mozRankList[0]['moz_rank']) ? $mozRankList[0]['moz_rank'] : 0;$reportInfo['page_authority'] = !empty($mozRankList[0]['page_authority']) ? $mozRankList[0]['page_authority'] : 0;}// backlinks page checkif ($projectInfo['check_backlinks']) {$backlinkCtrler = $this->createController('Backlink');$backlinkCtrler->url = Spider::addTrailingSlash($reportUrl);$reportInfo['bing_backlinks'] = $backlinkCtrler->__getBacklinks('msn');$reportInfo['google_backlinks'] = $backlinkCtrler->__getBacklinks('google');}// indexed page checkif ($projectInfo['check_indexed']) {$saturationCtrler = $this->createController('SaturationChecker');$saturationCtrler->url = Spider::addTrailingSlash($reportUrl);$reportInfo['bing_indexed'] = $saturationCtrler->__getSaturationRank('msn');$reportInfo['google_indexed'] = $saturationCtrler->__getSaturationRank('google');}if ($projectInfo['check_brocken']) {$reportInfo['brocken'] = Spider::isLInkBrocken($linkInfo['link_url']);}$this->saveReportInfo($reportInfo, 'update');// to store sitelinks in page and links reports$i = 0;if (count($pageInfo['site_links']) > 0) {// loo through site linksforeach ($pageInfo['site_links'] as $linkInfo) {// if store linksif ($projectInfo['store_links_in_page']) {$delete = $i++ ? false : true;$linkInfo['report_id'] = $rInfo['id'];$this->storePagelLinks($linkInfo, $delete);}// if total links saved less than max links allowed for a projectif ($totalLinks < $projectInfo['max_links']) {// check whether valid html serving linkif(preg_match('/\.zip$|\.gz$|\.tar$|\.png$|\.jpg$|\.jpeg$|\.gif$|\.mp3$|\.flv$|\.pdf$|\.m4a$|#$/i', $linkInfo['link_url'])) continue;// if found any space in the link$linkInfo['link_url'] = Spider::formatUrl($linkInfo['link_url']);if (!preg_match('/\S+/', $linkInfo['link_url'])) continue;// check whether url needs to be excludedif ($this->isExcludeLink($linkInfo['link_url'], $projectInfo['exclude_links'])) continue;// save links for the project reportif (!$this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='{$linkInfo['link_url']}'")) {$repInfo['page_url'] = $linkInfo['link_url'];$repInfo['project_id'] = $projectInfo['id'];$this->saveReportInfo($repInfo);$totalLinks++;}}}}// to store external links in pageif ($projectInfo['store_links_in_page']) {if (count($pageInfo['external_links']) > 0) {foreach ($pageInfo['external_links'] as $linkInfo) {$delete = $i++ ? false : true;$linkInfo['report_id'] = $rInfo['id'];$linkInfo['extrenal'] = 1;$this->storePagelLinks($linkInfo, $delete);}}}// calculate score of each page and update it$this->updateReportPageScore($rInfo['id']);// calculate score of each page and update it$this->updateProjectPageScore($projectInfo['id']);}return $reportUrl;}// function to get report infofunction getReportInfo($where) {$sql = "SELECT * FROM auditorreports where 1=1 $where";$listInfo = $this->db->select($sql, true);return empty($listInfo['id']) ? false : $listInfo;}// function to store link of pagefunction storePagelLinks($linkInfo, $delete=false) {foreach ($linkInfo as $col => $val) {$linkInfo[$col] = addslashes($val);}if ($delete) {$sql = "Delete from auditorpagelinks where report_id=".$linkInfo['report_id'];$this->db->query($sql);}$linkKeys = array_keys($linkInfo);$linkValues = array_values($linkInfo);$sql = "insert into auditorpagelinks(".implode(',', $linkKeys).") values('".implode("','", $linkValues)."')";$this->db->query($sql);}// function to check whether link should be excluded or notfunction isExcludeLink($link, $excludeList) {$exclude = false;if (!empty($excludeList)) {$excludeList = explode(',', $excludeList);foreach ($excludeList as $exUrl) {if (stristr($link, trim($exUrl))) {$exclude = true;break;}}}return $exclude;}// function to find the score of a report pagefunction updateReportPageScore($reportId) {$reportInfo = $this->getReportInfo(" and id=$reportId");$scoreInfo = $this->countReportPageScore($reportInfo);$score = array_sum($scoreInfo);$sql = "update auditorreports set score=$score where id=$reportId";$this->db->query($sql);}// function to count report page scorefunction countReportPageScore($reportInfo) {$scoreInfo = array();$this->commentInfo = array();$spTextSA = $this->getLanguageTexts('siteauditor', $_SESSION['lang_code']);// check page title length$lengTitle = strlen($reportInfo['page_title']);if ( ($lengTitle <= SA_TITLE_MAX_LENGTH) && ($lengTitle >= SA_TITLE_MIN_LENGTH) ) {$scoreInfo['page_title'] = 1;} else {$scoreInfo['page_title'] = -1;$msg = $spTextSA["The page title length is not between"]." ".SA_TITLE_MIN_LENGTH." & ".SA_TITLE_MAX_LENGTH;$this->commentInfo['page_title'] = formatErrorMsg($msg, 'error', '');}// check meta description length$lengDes = strlen($reportInfo['page_description']);if ( ($lengDes <= SA_DES_MAX_LENGTH) && ($lengDes >= SA_DES_MIN_LENGTH) ) {$scoreInfo['page_description'] = 1;} else {$scoreInfo['page_description'] = -1;$msg = $spTextSA["The page description length is not between"]." ".SA_DES_MIN_LENGTH." and ".SA_DES_MAX_LENGTH;$this->commentInfo['page_description'] = formatErrorMsg($msg, 'error', '');}// check meta keywords length$lengKey = strlen($reportInfo['page_keywords']);if ( ($lengKey <= SA_KEY_MAX_LENGTH) && ($lengKey >= SA_KEY_MIN_LENGTH) ) {$scoreInfo['page_keywords'] = 1;} else {$scoreInfo['page_keywords'] = -1;$msg = $spTextSA["The page keywords length is not between"]." ".SA_KEY_MIN_LENGTH." and ".SA_KEY_MAX_LENGTH;$this->commentInfo['page_keywords'] = formatErrorMsg($msg, 'error', '');}// if link brockenif ($reportInfo['brocken']) {$scoreInfo['brocken'] = -1;$msg = $spTextSA["The page is brocken"];$this->commentInfo['brocken'] = formatErrorMsg($msg, 'error', '');}// if total links of a pageif ($reportInfo['total_links'] >= SA_TOTAL_LINKS_MAX) {$scoreInfo['total_links'] = -1;$msg = $spTextSA["The total number of links in page is greater than"]." ".SA_TOTAL_LINKS_MAX;$this->commentInfo['page_keywords'] = formatErrorMsg($msg, 'error', '');}// check google pagerankif ($reportInfo['pagerank'] >= SA_PR_CHECK_LEVEL_SECOND) {$scoreInfo['pagerank'] = $reportInfo['pagerank'] * 3;$msg = $spTextSA["The page is having exellent pagerank"];$this->commentInfo['pagerank'] = formatSuccessMsg($msg);} else if ($reportInfo['pagerank'] >= SA_PR_CHECK_LEVEL_FIRST) {$scoreInfo['pagerank'] = $reportInfo['pagerank'] * 2;$msg = $spTextSA["The page is having very good pagerank"];$this->commentInfo['pagerank'] = formatSuccessMsg($msg);} else if ($reportInfo['pagerank']) {$scoreInfo['pagerank'] = 1;$msg = $spTextSA["The page is having good pagerank"];$this->commentInfo['pagerank'] = formatSuccessMsg($msg);} else {$scoreInfo['pagerank'] = 0;$msg = $spTextSA["The page is having poor pagerank"];$this->commentInfo['pagerank'] = formatErrorMsg($msg, 'error', '');}// check page authority valueif ($reportInfo['page_authority'] >= SA_PA_CHECK_LEVEL_SECOND) {$scoreInfo['page_authority'] = 6;$msg = $spTextSA["The page is having excellent page authority value"];$this->commentInfo['page_authority'] = formatSuccessMsg($msg);} else if ($reportInfo['page_authority'] >= SA_PA_CHECK_LEVEL_FIRST) {$scoreInfo['page_authority'] = 3;$msg = $spTextSA["The page is having very good page authority value"];$this->commentInfo['page_authority'] = formatSuccessMsg($msg);} else if ($reportInfo['page_authority']) {$scoreInfo['page_authority'] = 1;$msg = $spTextSA["The page is having good page authority value"];$this->commentInfo['page_authority'] = formatSuccessMsg($msg);} else {$scoreInfo['page_authority'] = 0;$msg = $spTextSA["The page is having poor page authority value"];$this->commentInfo['page_authority'] = formatErrorMsg($msg, 'error', '');}// check backlinks$seArr = array('google', 'bing');foreach ($seArr as $se) {$label = $se.'_backlinks';if ($reportInfo[$label] >= SA_BL_CHECK_LEVEL) {$scoreInfo[$label] = 2;$msg = $spTextSA["The page is having exellent number of backlinks in"]." ".$se;$this->commentInfo[$label] = formatSuccessMsg($msg);} elseif($reportInfo[$label]) {$scoreInfo[$label] = 1;$msg = $spTextSA["The page is having good number of backlinks in"]." ".$se;$this->commentInfo[$label] = formatSuccessMsg($msg);} else {$scoreInfo[$label] = 0;$msg = $spTextSA["The page is not having backlinks in"]." ".$se;$this->commentInfo[$label] = formatErrorMsg($msg, 'error', '');}}// check whether indexed or notforeach ($seArr as $se) {$label = $se.'_indexed';if($reportInfo[$label]) {$scoreInfo[$label] = 1;} else {$scoreInfo[$label] = -1;$msg = $spTextSA["The page is not indexed in"]." ".$se;$this->commentInfo[$label] = formatErrorMsg($msg, 'error', '');}}return $scoreInfo;}// function to find the score of a projectfunction updateProjectPageScore($projectId) {$sql = "select sum(score)/count(*) as avgscore from auditorreports where crawled=1 and project_id=$projectId";$listInfo = $this->db->select($sql, true);$score = empty($listInfo['avgscore']) ? 0 : $listInfo['avgscore'];$sql = "update auditorprojects set score=$score where id=$projectId";$this->db->query($sql);}// function to get all links of a pagefunction getAllLinksPage($reportId) {$sql = "select * from auditorpagelinks where report_id=$reportId";$linkList = $this->db->select($sql);return $linkList;}// function to get duplicate meta contents infofunction getDuplicateMetaInfoCount($projectId, $col='page_title', $statusCheck=false, $statusVal=1) {$crawled = $statusCheck ? " and crawled=$statusVal" : "";$sql = "select $col,count(*) as count from auditorreports where project_id=$projectId and $col!='' $crawled group by $col having count>1";$list = $this->db->select($sql);$total = 0;foreach ($list as $info) {$total++;}return $total;}// function to get all report pages of a projectfunction getAllreportPages($where='', $cols='*') {$sql = "SELECT $cols FROM auditorreports where 1=1 $where";$list = $this->db->select($sql);return $list;}}