#!/usr/bin/perl
#######################################################
#
# doc 0.2 by Thomas Linden
# doc is a small perl script to create html-pages
# from plain text files
# 
# before beginning it searches for a config file.
# first /etc/doc.conf and then ~/.doc
# if nothing found, default values will be used.
# the following values are possible:
# $TITLE		title of the document
# $HEAD			html header
# $FOOT			html footer
# $AUTOR		the autor
# $CRLF "yes"| "no"	translate CR/LF and TABS
# in the config file you can use perl code too
#
# additional, you can use the following snytax in
# your sourcefile to overwrite the above options: 
# .title.the title of the page
# .head.the header of the page
# .foot.the footer of the page
# .autor.the autor of the page
# .crlf.no|yes
# 
# TOC: doc can create anchors for every <H4> line (see below),
# if you make use of the following syntax at the start of the document:
# .0.top
# .1.introduction
# .2.descritption
# and so forth
# at the end of every section it will then insert
# a link to the top of the document with caption of ".0.xxx", for
# example: ".0.nach oben" it will create:
# <a href="#top">nach oben</a>
#
# Special translating:
# if a string is between < and > and the lines 
# before and after it are empty, then <H4> will be used
#
# if a empty line is followed by "---" doc will create a "<hr>" (horizontal line)
#
# if a "	o " is found, a list will be created, 
# the list will end, if an empty line occurs.
# 
# if the string <date> is found it will be replaced
# with the current date
#
# if a string like this is found:
# <anything.jpg|gif>, then an img-tag will be created:
# <img src=anything.jpg border=0>
#
# doc converts german Umlaute to their html-representations:
# i.e. "" to "&uuml;".
#
# you can insert normal html-links in you sources, doc leaves
# them untouched!
#
# If it finds a URL, it will create a hyperlink to that URL
#
# You can create bold text with the use of "*" before and after
# a word: "*bold*"
#
# Use "_" to create underlined text: "_underlined_" 
#
# lines beginning with a "#" will be ignored.
#
# if you find it useful, or find a bug, feel free to drop
# me a line: tom@daemon.de
#
############################################################





# ein paar default werte...
$version = "0.2";
$date = `date +%e\".\"%m\".\"%Y\" \"%T`;
chomp $date;
$USER = getlogin || getpwuid($<);
chomp $USER;

sub error;


# parse command-line parameter
if ($ARGV[0] eq "-v")
{
        print "doc ".$version."\n";
        exit 0;
}
elsif($ARGV[0] eq "-h" || $ARGV[0] eq "--help")
{
        error;
}
elsif($ARGV[0] ne "")
{
	# filenam specified
	$input = $ARGV[0];
}
else
{
	print "no input file specified.\n";
	error;
}
if($ARGV[1] eq "-o")
{
	if($ARGV[2] ne "")
	{
		$output = $ARGV[2];
	}
	else
	{
		print "no output file specified, but -o used.\n";
		error;
	}
}
elsif($ARGV[1] eq "")
{
		$output = $ARGV[0] . ".html";
}
else
{
	print "inoperable option: $ARGV[1].\n";
	error;
}

# we have parsed successful the commandline parameters :-0





# now check for config file...
if(-e "/etc/doc.conf")
{
        eval `cat /etc/doc.conf`;
}
elsif(-e "/home/$USER/.doc")
{
        eval `cat ~/.doc`;
}
else
{
        $haveconfig = "no";
}






open FILE, "<$input" or die "Could not open file $input.\n";
while( <FILE> )
{
        chomp $_; # Weg mit dem "\n" !
        if($_ !~ /^\#/)
        {
                if($_ =~ /^\.\w*/)
                {
			# .* Syntax berschreibt Config!
                        @temp = split(/\./,$_);
                        if($temp[1] eq "title")
                        {
                                $TITLE = $temp[2];
                        }
                        elsif($temp[1] eq "autor")
                        {
                                $AUTOR = $temp[2];
                        }
                        elsif($temp[1] eq "head")
                        {
                                $HEAD = $temp[2];
                        }
                        elsif($temp[1] eq "foot")
                        {
                                $FOOT = $temp[2];
                        }
			elsif($temp[1] eq "crlf")
			{
				if($temp[2] eq "no")
				{
					$CRLF = "no";
				}
			}
			elsif($temp[1] =~ /\d*/)
			{
				if($temp[1] == 0)
				{
					$top = $temp[2];
					$t = "<a name=\"top\">";
				}
				else
				{
					$t = "<a href=\"\#anchor".$temp[1]."\">".$temp[2]."</a>";
				}
				push (@data,$t);
			}
                }
                else
                {
                        push (@data,$_);
                }
        }
        else
        {
                #print "comment found.\n";
        }
}
close (FILE);





	# no config available!
	if($haveconfig eq "no")
	{
		print "no configfile found, using defaults...\n";
	}
	@links = split(/\./,$input);
	if($AUTOR eq "")
	{
		$AUTOR = $USER;
	}
	if($TITLE eq "")
	{
		$TITLE = "<h1>$links[0]</h1>\n";
	}
	if($HEAD eq "")
	{
		$HEAD = "<html>\n<head>\n";
		$HEAD = $HEAD."<META NAME=\"GENERATOR\" CONTENT=\"doc ".$version." by Thomas Linden\">\n";
		$HEAD = $HEAD."<META NAME\"Author\" CONTENT=\"".$AUTOR."\">\n";
		$HEAD = $HEAD."<title>".$links[0]."</title>\n";
		$HEAD = $HEAD."</HEAD>\n";
		$HEAD = $HEAD."<body bgcolor=\"\#ffffff\">\n";
	}
	if($FOOT eq "")
	{
		$FOOT = "<p>&nbsp;</p><font size=1>last update: ".$date." by ".$AUTOR;
		$FOOT = $FOOT."<br>\npage created with <b>doc ".$version."</b> &copy; 1999 ";
		$FOOT = $FOOT."<a href=\"mailto:tom\@daemon.de\">Thomas Linden</a></font>\n";
		$FOOT = $FOOT."</body></html>\n";
	}
	if($CRLF eq "")
	{
		$CRLF = "yes";
	}




############## beginning of parsing... ##########################

open OUTPUT, "> $output" or die "Could not open file $output.\n";
print "processing $input...";
select (OUTPUT);

$i = 0;
$anchor = 1;

print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n";
print $HEAD.$TITLE."\n";
print "<!-- created with doc ".$version." by Thomas Linden-->\n";

foreach $_(@data)
{
	$_ =~ s/\<date\>/$date/;
	
	if($data[$i+1] eq ""  && $_ =~ /^\</ && $_ =~ /\>$/ && ($data[$i-1] eq "" || $data[$i-1] eq "<br>"))
	{
		# berschrift: <Ausfall>
		# Nur, wenn Zeile davor und Zeile danach LEER ist!
		if($_ !~ /\<\/\w*/ && $_ !~ /\<(\w*)(\.jpg)\>/ && $_ !~ /\<(\w*)(\.gif)\>/)
		{
			$_ =~ s/\</\<H4\>/;
			chop $_;
			$_ = $_."</H4>";
			$_ = $_."<a name=\"anchor".$anchor."\">";
			if($anchor > 1)
			{
				$_ = "<a href=\"\#top\">$top</a><p>&nbsp\;</p>".$_;
			}
			$anchor++;
		}
	}

	#image!
	if($_ =~ /\<(\w*)(\.jpg)\>/ || $_ =~ /\<(\w*)(\.gif)\>/)
	{
		$bild = $1.$2;
		$_ =~ s/\<$bild\>/\<img src=$bild border=0\>/;
	}

	
	# LISTE ---------------------------
	if($_ eq "" && $data[$i+1] =~ /^\to\s/)
	{
		# <ul> begin, leere Zeile und  "	o " folgt!
		$_ = "<ul>";
	}
	if($_ =~ /^\to\s/ && $data[$i+1] =~ /^\to\s/)
	{
		# <li> !
		$_ =~ s/^\to\s/\<li\>/;
		$_ = $_."</li>";
		# </li> !
	}
	elsif($_ =~ /^\to\s/ && ($data[$i+1] ne "	o " || $data[$i+1] eq ""))
	{
		# </ul> Ende, wenn vorher "	o " war !
		$_ =~ s/^\to\s/\<li\>/;
		$_ = $_."</li>\n</ul>";
	}
	# LISTE ----------------------------


	if($CRLF eq "yes")
	{
		if($_ eq "")
		{
			$_ = "<br>";
		}
		1 while $_ =~ s/\t+/'&nbsp;' x (length($&) * 8 - length($`) % 8)/e;
		1 while $_ =~ s/\s\s+/'&nbsp;' x (length($&) * 1 - length($`) % 1)/e;
	}

	$_ =~ s/([\.*\-*\w*]+\@[\.+\-*\w*]+)/<a href="mailto:$1">$1<\/a>/g;

	# <B>
	$_ =~ s#(^|\W)\*(\w+)\*(\W|$)#$1<B>$2</B>$3#g;
	
	# <U>
	$_ =~ s#(^|\W)_(\w+)_(\W|$)#$1<U>$2</U>$3#g;
	
	# URL
	if($_ !~ /href="http|href="ftp/)
	{
		$_ =~ s/(http:\/\/|ftp:\/\/)([\w*\.*\-*]+)/<a href="$1$2">$2<\/a>/g;
	}	

	# Umlaute!
	$_ =~ s//&uuml\;/;
	$_ =~ s//&Uuml\;/;
	$_ =~ s//&ouml\;/;
	$_ =~ s//&Ouml\;/;
	$_ =~ s//&auml\;/;
	$_ =~ s//&Auml\;/;
	$_ =~ s//&szlig\;/;	


	if($_ =~ /\-\-\-*/ && ($data[$i-1] eq "<br>" || $data[$i-1] eq ""))
	{
		$_ = "<hr>";
	}

	print $_;

	if($CRLF eq "yes" && $_ !~ /^\<li\>/ && $_ !~ /^\<br\>$/)
	{
		print "<br>";
	}
	
	print "\n";

	$i++;
}

if($anchor > 1)
{
	print "<a href=\"\#top\">$top</a><br>\n";
}

print $FOOT;

close FILE;
select (STDOUT);
print "$output created.\n";
exit 0;
################ ende ###########################################
sub error
{
	print "usage: doc inputfile [-o outputfile]\n";
	exit 1;
}
