<?php
	/*
		Ein Crawler, der eine Sitemap generiert

		Aktion: PHP Scripte für die armen dieser Welt
		Der Erlös geht für mein Pausenbrot drauf 
		
		Copyright (c) 2006 by Phillip 'Firebird' Berndt
	*/
?>
<h1>Sitemap Generator</h1>
<form method="post">
	<p>
		URL:
		<input type="text" name="url" />
		<input type="submit" />
	</p>
</form>
<hr/>
<ul>
<?php
	ob_implicit_flush();
	if(isset($_POST['url']))
		crawlURL($_POST['url']);

	$urlList = array();
	function crawlURL($url)
	{
		if(isset($GLOBALS['urlList'][$url]))
		{
			echo('<li><a href="#a'.$GLOBALS['urlList'][$url][0].'">'.$GLOBALS['urlList'][$url][1].'</a></li>');
			return;
		}

		$dirs = parse_url($url);
		$basedir = $dirs['scheme'].'://'.$dirs['host'];
		$subdir = $basedir.dirname($dirs['path']);
		$ctr = count($GLOBALS['urlList']);

		$data = file_get_contents($url);
		if(preg_match('/<title>(.+?)<\/title>/si', $data, $title))
			$title = strip_tags($title[1]);
		else
			$title = basename($url);

		$GLOBALS['urlList'][$url] = array($ctr, $title);

		echo('<li id="a'.$ctr.'"><a href="'.htmlspecialchars($url).'">'.htmlspecialchars($title).'</a>');
		echo('<ul>');
		if(preg_match_all('/<a href=(?:"|\')([^"\']+)/si', $data, $urls))
		{
			foreach($urls[1] as $suburl)
			{
				if($suburl[0] == '/')
					$suburl = $basedir.$suburl;
				elseif(substr($suburl, 0, 7) != 'http://')
					$suburl = $subdir.$suburl;
				else
				{
					echo('<li><em>Extern link:</em> <a href="'.htmlspecialchars($suburl).'">'.htmlspecialchars($suburl).'</a></li>');
					continue;
				}

				crawlURL($suburl);
			}
		}
		echo('</ul></li>');
	}
?>
</ul>
