<?php

//	Simple Map - XML, text or HTML Site Map  Generator
//	A free open source Scriptol program,  by (c) Denis G. Sureau
//	http://www.scriptol.com/
include_once("path.php");
include_once("libphp.php");
include_once("tools.php");
include_once("options.php");
include_once("url.php");
include_once("page.php");
$siteList=array();
$siteTime=array();
$checkedList=array();
$skipped=array();
$defaultList=array();
$pageText="";
$siteMap=array();
$counter=0;
function output($t)
{
   global $siteMap;
   array_push($siteMap,$t);
   return;
}

// get the part of tag inside quotes or double-quotes, thus the url
function extractLink($off)
{
   $c="";
   $link="";
   global $pageText;
   while($off<strlen($pageText))
   {
      do
      {
         $c=$pageText{$off};
         if($c==='=')
         {
            break;
         }
         if($c===' ')
         {
            break;
         }
         if($c==="\"")
         {
            $off+=1;
            break 2;
         }
         if($c==='\'')
         {
            $off+=1;
            break 2;
         }
         break 2;
      } while(false);
      $off+=1;
   }
   while($off<strlen($pageText))
   {
      do
      {
         $c=$pageText{$off};
         if($c==='\'')
         {
            break 2;
         }
         if($c==="\"")
         {
            break 2;
         }
         if($c==='>')
         {
            break 2;
         }
         $link.=$c;
      } while(false);
      $off+=1;
   }
   return trim($link);
}

function inCheckedList($page)
{
   if(!hasProtocol($page))
   {
      $page=createLinkFromRelative($page);
   }
   global $checkedList;
   if(in_array($page,$checkedList))
   {
      return true;
   }
   return false;
}

function inSiteMap($page)
{
   if(!hasProtocol($page))
   {
      $page=createLinkFromRelative($page);
   }
   global $siteList;
   if(in_array($page,$siteList))
   {
      return true;
   }
   return false;
}

function addFileList($page)
{
   if(!hasProtocol($page))
   {
      $page=createLinkFromRelative($page);
   }
   global $checkedList;
   if(in_array($page,$checkedList))
   {
      return;
   }
   array_push($checkedList,$page);
   return;
}

function addLink($page,$lmod)
{
   $url=setURL($page);
   if(!hasProtocol($url))
   {
      global $siteURL;
      $url=Path::merge($siteURL,$url);
   }
   global $siteList;
   if(in_array($url,$siteList))
   {
      return false;
   }
   array_push($siteList,$url);
   global $LASTMOD;
   if($LASTMOD)
   {
      global $siteTime;
      global $dateFormat;
      $siteTime[$url]=date($dateFormat,intVal($lmod));
   }
   global $VERBOSE;
   if($VERBOSE)
   {
      display("Added link $url");
   }
   return true;
}

function getLinks($page)
{
   $x=array();
   $x=file($page);
   global $DEBUG;
   if($DEBUG)
   {
      echo "Getting links from ", " ", getcwd(), " ", $page, "\n";
   }
   $pageLinks=array();
   reset($x);
   do
   {
      $x[key($x)]=rtrim(strval(current($x)));
   }
   while(!(next($x)===false));
   global $pageText;
   $pageText=implode("",$x);

   $offset=0;
   $srcoff=0;
   $shifting=0;
   while(true)
   {
      do
      {
         if($offset!=-1)
         {
            $offset=strpos($pageText,"href",$shifting);
         }
         if($srcoff!=-1)
         {
            $srcoff=strpos($pageText,"frame src",$shifting);
            if($srcoff===-1)
            {
               $srcoff=strpos($pageText,"frame src",$shifting);
            }
         }
         if($offset<1)
         {
            if($srcoff<1)
            {
               break 2;
            }
            $shifting=$srcoff+5;
         }
         else
         {
            if($srcoff<1)
            {
               $shifting=$offset;
            }
            else
            {
               if($offset<$srcoff)
               {
                  $shifting=$offset;
               }
               else
               {
                  $shifting=$srcoff+5;
               }
            }
         }
         $shifting+=4;

         $link=extractLink($shifting);
         $shifting+=strlen($link);

         if($link==="")
         {
            break;
         }
         if($link{0}==='#')
         {
            break;
         }
         global $siteList;
         if(in_array($link,$siteList))
         {
            break;
         }
         if(in_array($link,$pageLinks))
         {
            break;
         }
         if(!validExtension($link)&&!isDirectory($link))
         {
            break;
         }
         if(strstr($link,"../"))
         {
            display(" ../ such path is not valid, use absolute URL instead (in ".$link.").");
            break;
         }
         if(hasProtocol($link))
         {
            $host=getURL($link);
            global $siteURL;
            if($host===$siteURL)
            {
               array_push($pageLinks,$link);
            }
            break;
         }
         $realLink=$link;
         if(isDirectory($link))
         {
            $link=findDefault($link);
         }
         global $WINDOWS;
         if($WINDOWS)
         {
            $link=setWindows($link);
         }
         if(file_exists($link))
         {
            array_push($pageLinks,$realLink);
         }
         else
         {
            global $VERBOSE;
            if($DEBUG||$VERBOSE)
            {
               display("Broken link: $link not in ".getcwd());
            }
         }
      } while(false);
   }
   return $pageLinks;
}

function buildTag($tagname,$value)
{
   if($value==="")
   {
      return;
   }
   output("     <".$tagname.">".$value."</".$tagname.">");
   return;
}

// from the list of files
// build now a xml file
function buildTheXmlFile()
{
   $name="";
   output("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
   output("<urlset xmlns=\"http://www.google.com/schemas/sitemap/0.84\" ");
   output("xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ");
   output("xsi:schemaLocation=\"http://www.google.com/schemas/sitemap/0.84 ");
   output("http://www.google.com/schemas/sitemap/0.84/sitemap.xsd\">");
   global $siteList;
   foreach($siteList as $name)
   {
      output("  <url>");
      buildTag("loc",textToUTF8($name));
      global $PRIORITY;
      if($PRIORITY===true)
      {
         global $DEFAULT_PRIORITY;
         buildTag("priority",$DEFAULT_PRIORITY);
      }
      global $LASTMOD;
      global $LASTMODSHORT;
      if(($LASTMOD===true)||($LASTMODSHORT===true))
      {
         global $siteTime;
         $lmod=$siteTime[$name];
         if($lmod!="")
         {
            buildTag("lastmod",$lmod);
         }
      }
      global $FREQUENCY;
      if($FREQUENCY===true)
      {
         global $DEFAULT_FREQUENCY;
         buildTag("changefreq",$DEFAULT_FREQUENCY);
      }
      output("  </url>");
   }
   output("</urlset>");
   return;
}

$levelOffset=0;
// create a tag, content UTF-8 compatible
function buildLink($link)
{
   $linkpath="";
   $name="";
   output("<li>");
   $link=textToUTF8($link);
   $_I1=Path::splitFile($link);
   $linkpath=reset($_I1);
   $name=next($_I1);
   output("<a href=".$link.">".$name."</a>");
   output("</li>");
   return;
}

function openSubdir($dirname)
{
   output("<br>");
   output("<h4>");
   output($dirname);
   output("</h4>");
   output("<ul>");
   return;
}

function closeSubdir()
{
   output("</ul>");
   return;
}

function removeBase($linkpath,$base)
{
   $l=strlen($base);
   if($l<strlen($linkpath))
   {
      if(substr($linkpath,0,$l)===$base)
      {
         return substr($linkpath,$l+1);
      }
   }
   return $linkpath;
}

// test if the local part of the URL holds a directory
function hasDir($name)
{
   $l=strlen($name);
   if($l<2)
   {
      return false;
   }
   if(strpos($name,"/",1)!=false)
   {
      return true;
   }
   return false;
}

// get dir
function getMainDir($name)
{
   if(strlen($name)===0)
   {
      return "";
   }
   $i=strpos($name,"/");
   if($i<1)
   {
      return "";
   }
   return substr($name,0,$i);
}

function processDirs($base)
{
   $page="";
   $name="";
   $linkpath="";
   $empty=true;
   $currdir="";
   $thisdir="";
   // get all the files, display them

   global $siteList;
   for($i=0;$i<count($siteList);$i++)
   {
      if($siteList[$i]==="")
      {
         continue;
      }
      $page=$siteList[$i];
      $name=removeBase($page,$base);

      if(!Path::hasDir($name))
      {
         buildLink($page);
         $siteList[$i]="";
      }
      else
      {
         $empty=false;
      }
   }
   if($empty===true)
   {
      return;
   }
   global $siteList;
   for($i=0;$i<count($siteList);$i++)
   {
      if($siteList[$i]==="")
      {
         continue;
      }
      $page=$siteList[$i];
      $name=removeBase($page,$base);

      $thisdir=getMainDir($name);
      if($thisdir!=$currdir)
      {
         openSubdir($thisdir);
         processDirs($base."/".$thisdir);
         closeSubdir();
         $currdir=$thisdir;
      }
   }
   return;
}

// scan list of links
// build a text array of page in sub-dirs
// make an entry for file in current directory
function processRoot()
{
   $urlpart="";
   $name="";
   $link="";
   output("<br>");
   output("<h1>");
   global $siteURL;
   output($siteURL);
   output("</h1>");
   global $siteList;
   reset($siteList);
   do
   {
      $link=current($siteList);
      $_I1=splitURL($link);
      $urlpart=reset($_I1);
      $name=next($_I1);
      if(Path::hasDir($name)!=true)
      {
         buildLink($link);
         $siteList[key($siteList)]="";
      }
   }
   while(!(next($siteList)===false));
   processDirs($siteURL);
   return;
}

// from the list of files
// build now a html page
// process file at the level
// then get first sub-dir, and loop
function buildTheHtmlTree()
{
   $name="";
   output("<html>");
   output("<head>");
   output("</head>");
   output("<body>");
   global $levelOffset;
   global $siteURL;
   $levelOffset=strlen($siteURL);
   processRoot();
   output("</body>");
   output("</html>");
   return;
}

// Check if meta tag robots allows to index the page
// If the tag is not added, the page will be indexed
function isIndexable($fullpath)
{
   $tags=get_meta_tags($fullpath);
   if(!array_key_exists('robots',$tags))
   {
      return true;
   }
   $robots=$tags['robots'];
   if(stripos($robots,"noindex")!=false)
   {
      return false;
   }
   if(stripos($robots,"none")!=false)
   {
      return false;
   }
   return true;
}

// Scan repository, build the list of files to add to the site map
function scanLocal($locpath,$locdir)
{
   $content=scandir($locpath);
   $returned=0;
   $url="";
   $src="";
   if(empty($content))
   {
      return;
   }
   $exdir=Path::merge($locdir,"*");
   global $excludedDirs;
   if(!in_array($exdir,$excludedDirs))
   {
      foreach($content as $name)
      {
         if($name{0}===".")
         {
            global $VERBOSE;
            if($VERBOSE)
            {
               display("$locdir/$name skipped");
            }
            continue;
         }
         global $exclusions;
         if(in_array($name,$exclusions))
         {
            continue;
         }
         $src=Path::merge($locpath,$name);
         $url=Path::merge($locdir,$name);
         global $siteURL;
         $url=Path::merge($siteURL,$url);
         global $VERBOSE;
         if($VERBOSE)
         {
            display("Processing $src to $url");
         }
         if(filetype($src)==="file")
         {
            global $counter;
            $counter+=1;

            if(validExtension($src))
            {
               global $MINFILESIZE;
               if(filesize($src)<$MINFILESIZE)
               {
                  continue;
               }
               if(!isIndexable($src))
               {
                  continue;
               }
               $lmod=filemtime($src);
               addLink($url,$lmod);
            }
         }
      }
   }
   else
   {
      display($locdir." content skipped.");
   }
   global $localRoot;
   $sulen=strlen($localRoot);
   foreach($content as $name)
   {
      if($name{0}==='.')
      {
         continue;
      }
      $src=Path::merge($locpath,$name);

      if(in_array(substr($src,$sulen),$excludedDirs))
      {
         display($src." skipped.");
         continue;
      }
      if(filetype($src)==="dir")
      {
         scanLocal($src,Path::merge($locdir,$name));
      }
   }
   return;
}

// Main Simple Map function
function SimpleMap($num,$args)
{
   options($num,$args);
   echo "\n";
   global $version;
   echo $version, "\n";
   global $siteURL;
   display("Web adress: ".$siteURL);
   global $localRoot;
   display("Local repository: ".$localRoot);
   if(strstr($localRoot,"\\"))
   {
      global $WINDOWS;
      $WINDOWS=true;
   }
   scanLocal($localRoot,"/");
   display("");
   global $siteList;
   sort($siteList);
   global $mapname;
   if(!hasProtocol($mapname))
   {
      global $localMap;
      $localMap=Path::merge($localRoot,$mapname);
      $mapname=Path::merge($siteURL,$mapname);
   }
   else
   {
      global $localMap;
      $localMap=str_replace($siteURL,$localRoot,$mapname);
   }
   global $VERBOSE;
   if($VERBOSE===true)
   {
      global $localMap;
      display("Creating ".$localMap);
   }
   global $mapType;
   
   if($mapType===0)
   {
      buildTheXmlFile();
   }
   else
   {
      if($mapType===1)
      {
         global $localMap;
         file_put_contents($localMap,implode("\n",$siteList));
      }
   else
   {
      if($mapType===2)
      {
         buildTheHtmlTree();
      }
   }}
   if($mapType!=1)
   {
      global $localMap;
      $f=fopen($localMap,"w");
      global $siteMap;
      foreach($siteMap as $line)
      {
         fwrite($f,$line."\n");
      }
      fclose($f);
   }
   global $GRAPHICAL;
   if($GRAPHICAL)
   {
      global $logfile;
      global $logname;
      file_put_contents($logname,implode("\n",$logfile));
   }
   global $counter;
   display(strval($counter)." files found.");
   display($mapname." will have  ".strval(count($siteList))." links.");
   return 0;
}


?>
