Blame | Last modification | View Log | RSS feed
<?php
/***************************************************************************
* Copyright (C) 2009-2011 by Geo Varghese(www.seopanel.in) *
* sendtogeo@gmail.com *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
# class defines all sitemap controller functions
class SitemapController extends Controller{
var $smLimit = 50000; # number of pages in a sitemap
var $baseUrl; # base url of page
var $smType = 'xml'; # the type of sitemap file should be created
var $urlList; # the list of urls crawled from a site
var $hostName; # hostname of the site
var $spider; # spider object
var $sleep = 0; # sleep b/w the page crawl in seconds
var $excludeUrl = ""; # url to be excluded
var $changefreq = "always"; # page modification frequency
var $priority = 0.5; # priority of a page
var $lastmod; # page last modification date
var $smheader; # sitemap header
var $smfooter; # sitemap footer
var $smfile = ""; # sitemap file
var $section = ""; # sitemap website
var $sitemapDir = ""; # sitemap directory where sitemap is created
# func to show sitemap generator interface
function showSitemapGenerator() {
$userId = isLoggedIn();
$saCtrler = $this->createController('SiteAuditor');
$where = isAdmin() ? "" : " and w.user_id=$userId";
$pList = $saCtrler->getAllProjects($where);
$projectList = array();
foreach($pList as $pInfo) {
$pInfo['total_links'] = $saCtrler->getCountcrawledLinks($pInfo['id']);
if ($pInfo['total_links'] > 0) {
$projectList[] = $pInfo;
}
}
if(empty($projectList)) {
$spTextSA = $this->getLanguageTexts('siteauditor', $_SESSION['lang_code']);
showErrorMsg($spTextSA['No active projects found'].'!');
}
$this->set('projectList', $projectList);
$this->render('sitemap/showsitemap');
}
# func to generate sitemap
function generateSitemapFile($sitemapInfo){
$sitemapInfo['project_id'] = intval($sitemapInfo['project_id']);
if(!empty($sitemapInfo['project_id'])){
# check whether the sitemap directory is writable
if(!is_writable(SP_TMPPATH ."/".$this->sitemapDir)){
hideDiv('message');
showErrorMsg("Directory '<b>".SP_TMPPATH ."/".$this->sitemapDir."</b>' is not <b>writable</b>. Please change its <b>permission</b> !");
}
$saCtrler = $this->createController('SiteAuditor');
$projectInfo = $saCtrler->__getProjectInfo($sitemapInfo['project_id']);
$this->section = formatFileName($projectInfo['name']);
$this->smType = $sitemapInfo['sm_type'];
$this->excludeUrl = $sitemapInfo['exclude_url'];
if(!empty($sitemapInfo['freq'])) $this->changefreq = $sitemapInfo['freq'];
if(!empty($sitemapInfo['priority'])) $this->priority = $sitemapInfo['priority'];
$auditorComp = $this->createComponent('AuditorComponent');
$pageList = $auditorComp->getAllreportPages(" and project_id=".$sitemapInfo['project_id']);
$urlList = array();
foreach ($pageList as $pageInfo) {
$pageInfo['page_url'] = Spider::addTrailingSlash($pageInfo['page_url']);
if ($auditorComp->isExcludeLink($pageInfo['page_url'], trim($sitemapInfo['exclude_url']))) continue;
$urlList[] = $pageInfo['page_url'];
}
$this->createSitemap($this->smType, $urlList);
}else{
hideDiv('message');
showErrorMsg("No Website Found!");
}
}
# Create new sitemaps and index file
function createSitemap($smType="", $urlList="") {
if(!empty($smType)){
$this->smType = $smType;
}
print("<p class=\"note noteleft\">".$_SESSION['text']['common']['Found']." <a>".count($urlList)."</a> Sitemap Urls</p>");
$function = $this->smType ."SitemapFile";
$this->deleteSitemapFiles();
$this->$function($urlList);
$this->showSitemapFiles();
}
# func to get a sitemap urls of a site
function getSitemapUrls(){
$this->urlList = array();
$this->crawlSitemapUrls($this->baseUrl, true);
}
# func to crawl sitemap urls
function crawlSitemapUrls($baseUrl, $recursive=false){
if($this->urlList[$baseUrl]['visit'] == 1) return;
$this->urlList[$baseUrl]['visit'] = 1;
$urlList = $this->spider->getUniqueUrls($baseUrl);
$hostName = $this->hostName;
foreach($urlList as $href){
if(preg_match('/\.zip$|\.gz$|\.tar$|\.png$|\.jpg$|\.jpeg$|\.gif$|\.mp3$/i', $href)) continue;
$urlInfo = @parse_url($href);
$urlHostName = str_replace('www.', '', $urlInfo['host']);
if(empty($urlHostName)){
$href = $this->baseUrl.$href;
}else{
if($urlHostName != $hostName){
continue;
}
}
$href = $this->spider->formatUrl($href);
$href = preg_replace('/http:\/\/.*?\//i', $this->baseUrl, $href);
if(!empty( $this->excludeUrl) && stristr($href, $this->excludeUrl)) continue;
if(!isset($this->urlList[$href]['visit']) && !isset($this->urlList[$href.'/']['visit'])){
$this->urlList[$href]['visit'] = 0;
if($recursive){
sleep($this->sleep);
$this->crawlSitemapUrls($href,true);
}
}
}
}
# create text sitemap file
function txtSitemapFile($urlList) {
$this->smheader = '';
$this->smfooter = '';
$smxml = "";
foreach($urlList as $this->loc){
$smxml .= $this->loc ."\n";
}
$this->smfile = $this->section ."_sitemap1.".$this->smType;
$this->createSitemapFile($smxml);
}
# create Html sitemap file
function htmlSitemapFile($urlList) {
$this->smheader = '';
$this->smfooter = '';
$smxml = "";
foreach($urlList as $this->loc){
$smxml .= "<a href='$this->loc'>$this->loc</a><br>";
}
$this->smfile = $this->section ."_sitemap1.".$this->smType;
$this->createSitemapFile($smxml);
}
# create xml sitemap file
function xmlSitemapFile($urlList) {
$this->lastmod = Date("Y-m-d");
$this->smheader = '<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"><!-- created with Seo Panel:www.seopanel.in -->';
$this->smfooter = '</urlset>';
$index = 1;
$rowcount = 0;
$smxml = "";
foreach($urlList as $this->loc){
$smxml .= $this->createUrlXmlText();
if(($this->smLimit -1) == $rowcount++){
# create sitemap file when tot url count equal max count
$this->smfile = $this->section ."_sitemap". $index . ".".$this->smType;
$this->createSitemapFile($smxml);
$rowcount = 0;
$smxml = "";
$index++;
}
}
# to create sitemap file with rest of urls
if(!empty($smxml)){
$this->smfile = $this->section ."_sitemap". $index . ".xml";
$this->createSitemapFile($smxml);
}
}
function showSitemapFiles(){
if ($handle = opendir(SP_TMPPATH ."/".$this->sitemapDir)) {
while (false !== ($file = readdir($handle))) {
if ( ($file != ".") && ($file != "..") ) {
if(preg_match("/".$this->section."_sitemap\d+\.".$this->smType."/", $file, $matches)){
echo "<p class=\"note noteleft\">
".$this->spTextSitemap['Download sitemap file from'].":
<a href='".SP_WEBPATH."/download.php?filesec=sitemap&filetype=$this->smType&file=".urlencode($matches[0])."' target='_blank'>$file</a>
</p>";
}
}
}
closedir($handle);
}
}
function deleteSitemapFiles(){
if ($handle = opendir(SP_TMPPATH ."/".$this->sitemapDir)) {
while (false !== ($file = readdir($handle))) {
if ( ($file != ".") && ($file != "..") ) {
if(preg_match("/".preg_quote($this->section, '/')."_sitemap\d+\.".$this->smType."/", $file, $matches)){
unlink(SP_TMPPATH ."/".$this->sitemapDir."/$file");
}
}
}
closedir($handle);
}
}
# create url xml text
function createUrlXmlText() {
$xmltext =
'
<url>
<loc><![CDATA['.$this->loc.']]></loc>
<lastmod>'.$this->lastmod.'</lastmod>
<changefreq>'.$this->changefreq.'</changefreq>
<priority>'.$this->priority.'</priority>
</url>
';
return $xmltext;
}
# create sitemap file
function createSitemapFile($smxml) {
$fp = fopen(SP_TMPPATH ."/".$this->sitemapDir."/" .$this->smfile, 'w');
$smxml = $this->smheader . $smxml . $this->smfooter;
fwrite($fp, $smxml);
fclose($fp);
}
# function to create encoded url for sitemap
function getEncodedUrl($url){
# convert url to entity encoded
$url = str_replace(array('&',"'",'"','>','<'," "), array('&',''','"','>','<','_'), $url);
return $url;
}
}
?>