Subversion Repositories cheapmusic

Rev

Blame | Last modification | View Log | RSS feed

<?php

/***************************************************************************
 *   Copyright (C) 2009-2011 by Geo Varghese(www.seopanel.in)                      *
 *   sendtogeo@gmail.com                                                                                                   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

# class defines all site auditor controller functions
class AuditorComponent extends Controller{
    
    var $commentInfo = array(); // to store the details about the score of each page
    
    // function to save report info
    function saveReportInfo($reportInfo, $action='create') {
        if ($action == 'create') {
                        $dateTime = date('Y-m-d H:i:s');
            $reportKeys = array_keys($reportInfo);
            $reportValues = array_values($reportInfo);
            $sql = "insert into auditorreports(".implode(',', $reportKeys).", updated) values('".implode("','", $reportValues)."', '$dateTime')";
        } elseif($action == 'update') {
            $sql = "Update auditorreports set ";
            foreach ($reportInfo as $col => $value) {
                if ($col != 'id') {
                    $sql .= "$col='$value',";    
                }
            }
            $sql = preg_replace('/\,$/', '', $sql);
            $sql .= " where id=".$reportInfo['id'];
        }
        $this->db->query($sql);
    }
    
    // func to run report for a project
    function runReport($reportUrl, $projectInfo, $totalLinks) {        
        $spider = new Spider();

        if ($rInfo = $this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='$reportUrl'") ) {
                
                $pageInfo = $spider->getPageInfo($reportUrl, $projectInfo['url'], true);
                
            // handle redirects
            if(!empty($spider->effectiveUrl)) {
                $effectiveUrl = rtrim($spider->effectiveUrl, '/'); //remove trailing slash
                $reportId = $rInfo['id'];

                if ($effectiveUrl != $reportUrl){ //redirect occurred. Could be simply www vs. no www
                                        $parse = parse_url($effectiveUrl);
                                        $effectiveDomain = str_replace("www.", '', $parse['host']);
                        $parse = parse_url($projectInfo['url']);
                        $projectDomain = str_replace("www.", '', $parse['host']);
                  
                        if ($effectiveDomain == $projectDomain) { //still on same domain
                        
                        // check if we already have an entry for the effective URL
                                                if ($rInfoForEffectiveUrl = $this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='$effectiveUrl'")){

                                                        // If we already have an entry then we can delete this new one and not continue running tests on it as it's a duplicate 
                                                        $this->db->query("delete from auditorreports where id=$reportId");
                        
                                // if already existing effective url is not crawled, continue with the page crawl details and save
                                if ($rInfoForEffectiveUrl['crawled'] == 0) {                            
                                                        $rInfo = $rInfoForEffectiveUrl;
                                        $reportId = $rInfo['id'];
                                } else {
                                        return $effectiveUrl; //Redirected to existing URL
                                }
                        
                                                } else { //if we don't already have an entry, update this one
                                $this->db->query("update auditorreports set page_url='$effectiveUrl' where id=$reportId");
                                $reportUrl = $effectiveUrl;
                        }
                      
                                        } else { //external link -- delete it from report                       
                                                $this->db->query("delete from auditorreports where id=$reportId");
                        return "Error: External Link Found";
                                        }
                  
                                }
            }
            
            $reportInfo['id'] = $rInfo['id'];
            $reportInfo['page_title'] = addslashes($pageInfo['page_title']);
            $reportInfo['page_description'] = addslashes($pageInfo['page_description']);
            $reportInfo['page_keywords'] = addslashes($pageInfo['page_keywords']);
            $reportInfo['total_links'] = intval($pageInfo['total_links']);
            $reportInfo['external_links'] = intval($pageInfo['external']);
            $reportInfo['crawled'] = 1;
        
            // gooogle pagerank check
            if ($projectInfo['check_pr']) {
                $mozCtrler = $this->createController('Moz');
                $mozRankList = $mozCtrler->__getMozRankInfo(array($reportUrl));
                $reportInfo['pagerank'] = !empty($mozRankList[0]['moz_rank']) ? $mozRankList[0]['moz_rank'] : 0;
                $reportInfo['page_authority'] = !empty($mozRankList[0]['page_authority']) ? $mozRankList[0]['page_authority'] : 0;
            }
            
            // backlinks page check
            if ($projectInfo['check_backlinks']) {
                $backlinkCtrler = $this->createController('Backlink');
                $backlinkCtrler->url = Spider::addTrailingSlash($reportUrl);
                $reportInfo['bing_backlinks'] = $backlinkCtrler->__getBacklinks('msn');
                $reportInfo['google_backlinks'] = $backlinkCtrler->__getBacklinks('google');
            }
            
            // indexed page check
            if ($projectInfo['check_indexed']) {
                $saturationCtrler = $this->createController('SaturationChecker');
                $saturationCtrler->url = Spider::addTrailingSlash($reportUrl);
                $reportInfo['bing_indexed'] = $saturationCtrler->__getSaturationRank('msn');
                $reportInfo['google_indexed'] = $saturationCtrler->__getSaturationRank('google');
            }
        
            if ($projectInfo['check_brocken']) {
                $reportInfo['brocken'] = Spider::isLInkBrocken($linkInfo['link_url']);    
            }
            
            $this->saveReportInfo($reportInfo, 'update');
            
            // to store sitelinks in page and links reports
            $i = 0;
            if (count($pageInfo['site_links']) > 0) {
                
                // loo through site links
                foreach ($pageInfo['site_links'] as $linkInfo) {
                    // if store links 
                    if ($projectInfo['store_links_in_page']) {
                        $delete = $i++ ? false : true;
                        $linkInfo['report_id'] = $rInfo['id'];
                        $this->storePagelLinks($linkInfo, $delete);
                    }
                    
                    // if total links saved less than max links allowed for a project
                    if ($totalLinks < $projectInfo['max_links']) { 
                        
                        // check whether valid html serving link
                        if(preg_match('/\.zip$|\.gz$|\.tar$|\.png$|\.jpg$|\.jpeg$|\.gif$|\.mp3$|\.flv$|\.pdf$|\.m4a$|#$/i', $linkInfo['link_url'])) continue;
                        
                        // if found any space in the link
                        $linkInfo['link_url'] = Spider::formatUrl($linkInfo['link_url']);
                        if (!preg_match('/\S+/', $linkInfo['link_url'])) continue;                        
                        
                        // check whether url needs to be excluded
                        if ($this->isExcludeLink($linkInfo['link_url'], $projectInfo['exclude_links'])) continue;
                        
                        // save links for the project report
                        if (!$this->getReportInfo(" and project_id={$projectInfo['id']} and page_url='{$linkInfo['link_url']}'")) {
                                    $repInfo['page_url'] = $linkInfo['link_url'];
                                    $repInfo['project_id'] = $projectInfo['id'];
                                    $this->saveReportInfo($repInfo);
                                    $totalLinks++;            
                        }
                    }
                }
            }
            
            // to store external links in page
            if ($projectInfo['store_links_in_page']) {
                if (count($pageInfo['external_links']) > 0) {
                    foreach ($pageInfo['external_links'] as $linkInfo) {
                        $delete = $i++ ? false : true;
                        $linkInfo['report_id'] = $rInfo['id'];
                        $linkInfo['extrenal'] = 1;
                        $this->storePagelLinks($linkInfo, $delete);
                    }
                }                
            }

            // calculate score of each page and update it
            $this->updateReportPageScore($rInfo['id']);
            
            // calculate score of each page and update it
            $this->updateProjectPageScore($projectInfo['id']);
        }
        
        return $reportUrl;                 
    }
    
    // function to get report info
    function getReportInfo($where) {        
            $sql = "SELECT * FROM auditorreports where 1=1 $where";
                $listInfo = $this->db->select($sql, true);
                return empty($listInfo['id']) ? false : $listInfo;
        }
    
    // function to store link of page
    function storePagelLinks($linkInfo, $delete=false) {
        
        foreach ($linkInfo as $col => $val) {
            $linkInfo[$col] = addslashes($val);
        }
        
        if ($delete) {
            $sql = "Delete from auditorpagelinks where report_id=".$linkInfo['report_id'];
            $this->db->query($sql);
        }
        
        $linkKeys = array_keys($linkInfo);
        $linkValues = array_values($linkInfo);
        $sql = "insert into auditorpagelinks(".implode(',', $linkKeys).") values('".implode("','", $linkValues)."')";
        $this->db->query($sql);
    }
    
    // function to check whether link should be excluded or not
    function isExcludeLink($link, $excludeList) {
        $exclude =  false;
        if (!empty($excludeList)) {
            $excludeList = explode(',', $excludeList);
            foreach ($excludeList as $exUrl) {
                if (stristr($link, trim($exUrl))) {
                    $exclude = true;
                    break;    
                }    
            }
        }
        return $exclude;
    }
    
    // function to find the score of a report page
    function updateReportPageScore($reportId) {
        $reportInfo = $this->getReportInfo(" and id=$reportId");
        $scoreInfo = $this->countReportPageScore($reportInfo);
        $score =  array_sum($scoreInfo);
        $sql = "update auditorreports set score=$score where id=$reportId";
        $this->db->query($sql);
    }
    
    // function to count report page score
    function countReportPageScore($reportInfo) {
        $scoreInfo = array();
        $this->commentInfo = array();
        $spTextSA = $this->getLanguageTexts('siteauditor', $_SESSION['lang_code']);
        
        // check page title length
        $lengTitle = strlen($reportInfo['page_title']);
        if ( ($lengTitle <= SA_TITLE_MAX_LENGTH) && ($lengTitle >= SA_TITLE_MIN_LENGTH) ) {
            $scoreInfo['page_title'] = 1;        
        } else {
            $scoreInfo['page_title'] = -1;
            $msg = $spTextSA["The page title length is not between"]." ".SA_TITLE_MIN_LENGTH." & ".SA_TITLE_MAX_LENGTH;
            $this->commentInfo['page_title'] = formatErrorMsg($msg, 'error', '');
        }
        
        // check meta description length
        $lengDes = strlen($reportInfo['page_description']);
        if ( ($lengDes <= SA_DES_MAX_LENGTH) && ($lengDes >= SA_DES_MIN_LENGTH) ) {
            $scoreInfo['page_description'] = 1;
        } else {
            $scoreInfo['page_description'] = -1;
            $msg = $spTextSA["The page description length is not between"]." ".SA_DES_MIN_LENGTH." and ".SA_DES_MAX_LENGTH;
            $this->commentInfo['page_description'] = formatErrorMsg($msg, 'error', '');
        }
        
        // check meta keywords length
        $lengKey = strlen($reportInfo['page_keywords']);
        if ( ($lengKey <= SA_KEY_MAX_LENGTH) && ($lengKey >= SA_KEY_MIN_LENGTH) ) {
            $scoreInfo['page_keywords'] = 1;
        } else {
            $scoreInfo['page_keywords'] = -1;
            $msg = $spTextSA["The page keywords length is not between"]." ".SA_KEY_MIN_LENGTH." and ".SA_KEY_MAX_LENGTH;
            $this->commentInfo['page_keywords'] = formatErrorMsg($msg, 'error', '');
        }
        
        // if link brocken
        if ($reportInfo['brocken']) {
            $scoreInfo['brocken'] = -1;
            $msg = $spTextSA["The page is brocken"];
            $this->commentInfo['brocken'] = formatErrorMsg($msg, 'error', '');
        }

        // if total links of a page
        if ($reportInfo['total_links'] >= SA_TOTAL_LINKS_MAX) {
            $scoreInfo['total_links'] = -1;
            $msg = $spTextSA["The total number of links in page is greater than"]." ".SA_TOTAL_LINKS_MAX;
            $this->commentInfo['page_keywords'] = formatErrorMsg($msg, 'error', '');
        }
        
        // check google pagerank
        if ($reportInfo['pagerank'] >= SA_PR_CHECK_LEVEL_SECOND) {
            $scoreInfo['pagerank'] = $reportInfo['pagerank'] * 3;
            $msg = $spTextSA["The page is having exellent pagerank"];
            $this->commentInfo['pagerank'] = formatSuccessMsg($msg);
        } else if ($reportInfo['pagerank'] >= SA_PR_CHECK_LEVEL_FIRST) {
            $scoreInfo['pagerank'] = $reportInfo['pagerank'] * 2;
            $msg = $spTextSA["The page is having very good pagerank"];
            $this->commentInfo['pagerank'] = formatSuccessMsg($msg);
        } else if ($reportInfo['pagerank']) {
            $scoreInfo['pagerank'] = 1;
            $msg = $spTextSA["The page is having good pagerank"];
            $this->commentInfo['pagerank'] = formatSuccessMsg($msg);
        } else {
            $scoreInfo['pagerank'] = 0;
            $msg = $spTextSA["The page is having poor pagerank"];
            $this->commentInfo['pagerank'] = formatErrorMsg($msg, 'error', '');
        }
        
        // check page authority value
        if ($reportInfo['page_authority'] >= SA_PA_CHECK_LEVEL_SECOND) {
                $scoreInfo['page_authority'] = 6;
                $msg = $spTextSA["The page is having excellent page authority value"];
                $this->commentInfo['page_authority'] = formatSuccessMsg($msg);
        } else if ($reportInfo['page_authority'] >= SA_PA_CHECK_LEVEL_FIRST) {
                $scoreInfo['page_authority'] = 3;
                $msg = $spTextSA["The page is having very good page authority value"];
                $this->commentInfo['page_authority'] = formatSuccessMsg($msg);
        } else if ($reportInfo['page_authority']) {
                $scoreInfo['page_authority'] = 1;
                $msg = $spTextSA["The page is having good page authority value"];
                $this->commentInfo['page_authority'] = formatSuccessMsg($msg);
        } else {
                $scoreInfo['page_authority'] = 0;
                $msg = $spTextSA["The page is having poor page authority value"];
                $this->commentInfo['page_authority'] = formatErrorMsg($msg, 'error', '');
        }
        
        // check backlinks
        $seArr = array('google', 'bing');
        foreach ($seArr as $se) {
            $label = $se.'_backlinks';
            if ($reportInfo[$label] >= SA_BL_CHECK_LEVEL) {                
                $scoreInfo[$label] = 2;
                $msg = $spTextSA["The page is having exellent number of backlinks in"]." ".$se;
                $this->commentInfo[$label] = formatSuccessMsg($msg);
            } elseif($reportInfo[$label]) {
                $scoreInfo[$label] = 1;
                $msg = $spTextSA["The page is having good number of backlinks in"]." ".$se;
                $this->commentInfo[$label] = formatSuccessMsg($msg);                
            } else {
                $scoreInfo[$label] = 0;
                $msg = $spTextSA["The page is not having backlinks in"]." ".$se;
                $this->commentInfo[$label] = formatErrorMsg($msg, 'error', '');
            }     
        }
        
        // check whether indexed or not    
        foreach ($seArr as $se) {
            $label = $se.'_indexed';
            if($reportInfo[$label]) {
                $scoreInfo[$label] = 1;                
            } else {
                $scoreInfo[$label] = -1;
                $msg = $spTextSA["The page is not indexed in"]." ".$se;
                $this->commentInfo[$label] = formatErrorMsg($msg, 'error', '');
            }   
        }
        
        return $scoreInfo;
    }
    
    // function to find the score of a project
    function updateProjectPageScore($projectId) {        
        $sql = "select sum(score)/count(*) as avgscore from auditorreports where crawled=1 and project_id=$projectId";
        $listInfo = $this->db->select($sql, true);
                $score = empty($listInfo['avgscore']) ? 0 : $listInfo['avgscore'];
        
        $sql = "update auditorprojects set score=$score where id=$projectId";
        $this->db->query($sql); 
    }
    
    // function to get all links of a page
    function getAllLinksPage($reportId) {
        $sql = "select * from auditorpagelinks where report_id=$reportId";
        $linkList = $this->db->select($sql);
        return $linkList;        
    }
    
    // function to get duplicate meta contents info
    function getDuplicateMetaInfoCount($projectId, $col='page_title', $statusCheck=false, $statusVal=1) {
        $crawled = $statusCheck ? " and crawled=$statusVal" : "";
        $sql = "select $col,count(*) as count from auditorreports where project_id=$projectId and $col!='' $crawled group by $col having count>1";
        $list = $this->db->select($sql);
        $total = 0;
        foreach ($list as $info) {
            $total++;
        }
        return $total;
    }
    
    // function to get all report pages of a project
    function getAllreportPages($where='', $cols='*') {              
            $sql = "SELECT $cols FROM auditorreports where 1=1 $where";
                $list = $this->db->select($sql);
                return $list;
    }
    
}