#!/usr/bin/perl -w
#
# This little handy script grabs the german/english translation for a
# given term from http://dict.leo.org. Thanks the LEO folks for their
# good job!
#
# Usage is quite simple, the script requires just one parameter,
# the term to be translated. It will then return the results in
# an unformatted form.
#
# $Id: leo,v 1.14 2003/05/12 13:00:04 scip Exp $
#
# Copyleft (l) 2000-2003 by Thomas Linden <tom@daemon.de>. leo may be used and
# distributed under the terms of the GNU General Public License. All other
# brand and product names are trademarks, registered trademarks or
# service marks of their respective holders.

use strict;
use IO::Socket;
use Getopt::Long;
use Data::Dumper;
use DB_File;

my $linecount = 0;
my $maxsize   = 0;
my @match     = ();


#
# internal settings
#
my $highlight = 1;
my $default_c = "\033[0m";    # reset default terminal color
my $bold_c    = "\033[0;34m"; # blue color
my $copy_c    = "\033[0;35m"; # copyright message color (green)

my $ip        = "dict.leo.org";
my $port      = "80";
my $url       = "";
my $ghost     = "$ip:$port";

my $version   = "1.14";
my $config    = $ENV{HOME} . "/.leo";
my $cache     = $ENV{HOME} . "/.leo-CACHE.db";

#defaults for config
my %conf      = (
		 use_cache => "yes",
		 use_color => "yes"
		 );

my %validopts = qw(use_cache 0 use_color 0);
my %line      = %validopts;
my %CACHE     = ();
my $site      = "";

my($o_s, $o_m, $o_c, $o_l, $o_v, $o_h, $o_n, $o_f);

#
# commandline options
#
Getopt::Long::Configure( qw(no_ignore_case));
if (! GetOptions (
		  "spelltolerance|s=s" => \$o_s,
		  "morphology|m=s"     => \$o_m,
		  "chartolerance|c=s"  => \$o_c,
		  "language|l=s"       => \$o_l,
		  "force|f"            => \$o_f,
		  "version|v"          => \$o_v,
		  "help|h"             => \$o_h,
		  "noescapechars|n"    => \$o_n
                 )    ) {
  &usage;
}

if ($o_h) {
  &usage;
}
if ($o_v) {
  print STDERR "leo version $version\n";
  exit;
}

#
# search term
#
my $string = shift;
if (!$string) {
  &usage;
}

#
# open the config, if any
#
if (-e $config) {
  open C, "<$config" or die "Could not open config $config: $!\n";
  local $_;
  while (<C>) {
    chomp;
    next if(/^\s*#/); # ignore comments
    next if(/^\s*$/); # ignore empty lines
    s/^\s*//;         # remove leading whitespace
    s/\s*$//;         # remove trailing whitespace
    s/\s*#.*$//;      # remove trailing comment
    my($opt, $val) = split /\s*=\s*/;
    $conf{$opt} = $val;
    $line{$opt} = $.;
  }
  close C;
}


#
# validate the config
#
foreach my $opt (keys %conf) {
  if (!exists $validopts{$opt}) {
    print "<$opt>\n";
    print STDERR "Error in config $config line: " . $line{$opt} . ". Unsupported option \"$opt\"!\n";
    exit;
  }
}

#
# feed config values into program
#
if ($conf{use_color} eq "no") {
  $highlight = 0;
}
elsif ($conf{use_color} eq "yes") {
  $highlight = 1;
}

#
# open the cache, if wanted
#
if ($conf{use_cache} eq "yes") {
  dbmopen(%CACHE, $cache, 0600) or $conf{use_cache} = "no";
}


#
# form var transitions for searchLoc
#
my %lang = (
	    de2en => 1,
	    en2de => -1,
	   );
if ($o_l) {
  if (exists $lang{$o_l}) {
    $o_l = $lang{$o_l};
  }
  else {
    &usage;
  }
}

if($o_n) {
  $highlight = 0;
}

#
# cut invalid values for parameters or set defaults if unspecified
#
my %form = (
	    spellToler => { mask => [ qw(standard on off) ],         val => $o_s || "standard" },
	    deStem     => { mask => [ qw(standard none forcedAll) ], val => $o_m || "standard" },
	    cmpType    => { mask => [ qw(fuzzy exact relaxed) ],     val => $o_c || "relaxed"  },
	    searchLoc  => { mask => [ qw(-1 0 1) ],                  val => $o_l || "0"        },
	    );
my @form;
foreach my $var (keys %form) {
  if (grep { $form{$var}->{val} eq $_ } @{$form{$var}->{mask}}) {
    push @form, $var . "=" . $form{$var}->{val};
  }
}

#
# process whitespaces
#
my $query = $string;
$query =~ s/\s\s*/ /g;
$query =~ s/\s/\+/g;
push @form, "search=$query";

#
# make the query cgi'ish
#
my $form = join "&", @form;


#
# check if we run in forced mode
#
if ($o_f) {
  delete $CACHE{$form};
  $conf{use_cache} = "no";
}

if (exists $CACHE{$form} && $conf{use_cache} eq "yes") {
  $site = $CACHE{$form};
  $conf{cached} = 1;
}
else {
  #
  # check for proxy settings and use it if exists
  # otherwise use direct connection
  #
  if (exists $ENV{http_proxy}) {
    my $proxy = $ENV{http_proxy};
    $proxy =~  s/^http:\/\///i;
    my($host, $pport) = split /:/, $proxy;
    if ($pport) {
      $url = "http://$ip:$port";
      $port = $pport;
    }
    else {
      $port = 80;
    }
    $ip = $host;
  }


  my $conn = new IO::Socket::INET(
				  Proto    => "tcp",
				  PeerAddr => $ip,
				  PeerPort => $port,
				 ) or die "Unable to connect to $ip:$port: $!\n";
  $conn->autoflush(1);

  print $conn "GET $url/?$form HTTP/1.0\r\n";

  # be nice, simulate Konqueror.
  print $conn
    "User-Agent: Mozilla/5.0 (compatible; Konqueror/3.0; X11)
Host: $ghost
Accept: text/*;q=1.0, image/png;q=1.0, image/jpeg;q=1.0, image/gif;q=1.0, image/*;q=0.8, */*;q=0.5
Accept-Encoding: x-gzip; q=1.0, gzip; q=1.0, identity
Accept-Charset: iso-8859-1;q=1.0, *;q=0.9, utf-8;q=0.8
Accept-Language: en_US, en\r\n\r\n";


  #
  # parse dict.leo.org output
  #
  my @line = <$conn>;
  close $conn;
  $site = join "", @line;

  if ($site =~ /produced\s+no\s+results\s+for\s+the\s+selected/) {
    print STDERR "Search for \"$string\" returned no results.\n";
    exit;
  }

  #
  # remove the boring html stuff
  #
  $site =~ s/^.*(\d+?\s*search\s*results)//s;
  $site =~ s/<\!\-\- # Spalte 7.*$//s;
  my @titles = ("Direct Matches", "Composed Entries", "Verbs and Verb Phrases", "Phrases and Collocations", "Examples");
  foreach my $title (@titles) {
    $site =~ s/<TR><TD .+?$title<\/B><\/TD>//s;
  }

  # convert their tr/td stuff to my column style
  $site =~ s/(<TD>)(.+?)(<\/TD>)/$1     $2     $3/gs;
  $site =~ s/(<\/TR>)\s*(<TR)/$1\n$2/gs;
  $site =~ s/<.+?>//gs;
  $site =~ s/\&nbsp;//gs;
  $site =~ s/\s*$//gs;
  $site =~ s/                 English:German://;
}

#
# store the result in the cache if wanted
#
if ($conf{use_cache} eq "yes") {
  $CACHE{$form} = $site;
  dbmclose(%CACHE);
}


#
# prepare formating
#
my ($undef, @lines) = split /\n/, $site;

#
# normalize the results
#
foreach (@lines) {
  $linecount++;
  s/^\s*/ /;
  s/\s*$//;
  my($left, $right) = split /\s\s\s*/;
  my $leftsize = length($left);
  $maxsize = $leftsize if($leftsize > $maxsize);
  push @match, { left => $left, right => $right };
}

$maxsize += 5;
print "Found $linecount matches for '$string' on dict.leo.org";
if ($conf{cached}) {
  print " (cached)";
}
print ":\n";


#
# print it out in a formated manner
#
foreach my $entry (sort {$a->{left} cmp $b->{left}} @match) {
  $entry->{left} =~ s/^(.*)$/$1 . " " x ($maxsize - length($1))/e;
  if ($highlight) {
    $entry->{left}  =~ s/(\Q$string\E)/$bold_c . $1 . $default_c/ei;
    $entry->{right} =~ s/(\Q$string\E)/$bold_c . $1 . $default_c/ei;
  }
  print $entry->{left} . $entry->{right} . "\n";
}

print "$copy_c" if $highlight;
print "\n     Generated by leo via http://dict.leo.org/";
print "\n     Copyright  LEO Dictionary Team 1995-2003";
print "\n     [leo]  Copyleft  Thomas Linden 2000-2003\n\n";
print "$default_c" if $highlight;



sub usage {
  my $me        = $0;
  $me           =~ s(^.*/)();

  print qq(Usage:   $me [-slmcf] <term>
Translate a term from german to english or vice versa.

  -s, --spelltolerance=on|off       allow spelling errors
  -m, --morphology=none|forcedAll   provide morphology informations
  -c, --chartolerance=fuzzy|exact   allow umlaut alternatives
  -l, --language=de2en|en2de        translation direction
  -n, --noescapechars               dont use escapes for highlighting
  -f, --force                       don't use the query cache
  -h, --help                        display this help and exit
  -v, --version     output version information and exit

Report bugs to <tom\@daemon.de>.);

  exit 1;
}

1;


