#! /usr/bin/perl
# ============================= Simple Multiple Dictionary Search =======================
# You will need to set configurations in a file and give a path here
#   THIS IS NEW
$configurationfile = "/var/www/html/mark/quickdict/database.cfg";
# ==================================== Main Function ====================================
print "Content-type: text/html; charset=UTF-8\n\n";        # Print httpd args
&getconfiguration();                                       # Do the configurations file
if ($RequireAccesControl) {                                # If specified, run access control
        &checkaccesscontrol;
        }
use DBI;                                                   # Call your perl modules. Doing it
use Text::Unaccent::PurePerl;                              # now since we have conf and access
$assumecharset = "utf8";                                   # set the assumed charset for Unaccent
use Encode qw(from_to);
&printheader();                                            # Print the page header
&evaluatequery();                                          # Get the search data from QUERY_STRING
&connect2database();                                       # Use DBI to open a connection
$indocyear = $queryhash{"docyear"};

$strippedhw = $queryhash{'strippedhw'};
$strippedhw =~ s/^\s*//g;
$strippedhw =~ s/\s*$//g;
$strippedhw =~ s/[\.\?\!;:,-]*$//g;    #should we strip internal punctuation? unsure.
$strippedhw =~ s/^[\.\?\!;:,-]*//g; 
$strippedhw =~ s/^[slndm]?'//g;
$strippedhw =~ s/^qu'//g;
$strippedhw =~ tr/A-Z/a-z/;
$queryhash{'strippedhw'} = $strippedhw;
$instrippedhw = $queryhash{"strippedhw"};

if ($queryhash{"strippedhw"} =~ /[\177-\377]/) {
	$queryhash{"strippedhw"} = unac_string($assumecharset, $queryhash{"strippedhw"});
	}
&getquerystring;                                           # Generate SQL Query String
$query1 = &editquerystring($query1);                       # Modify query string (external subroutine)

# print ">>>>" . $query1 . "<br>";

print "Searching for <b>" . $instrippedhw . "</b>... ";

                                                           # SEARCH ONE : word and years (if there)
$someresults = &runquery($query1);                         # Run the query and format results
if ($someresults) {
	print $someresults;
	&printfooter;
	exit(0);
	}
print "not found.<br>\n";

if ($indocyear) {                                          # SEARCH TWO, try word in all dicos
	print "Trying in all dictionaries... ";
	$query2 = $query1;
	$query2 =~ s/AND \(docyear[^\)]*\)//;
	$someresults = &runquery($query2);
	if ($someresults) {
		print "<p>\n";
        	print $someresults;
        	&printfooter;
        	exit(0);
        }
	print "not found.<br>\n";
}

$consonants = "[bcdfghjklmnpqrstvwxz]";
$vowels = "[aeiouy]";
$stemmedhw = $queryhash{"strippedhw"};
$stemmedhw =~ s/ans$/ant/;                            # modernized
$stemmedhw =~ s/ens$/ent/;                            # modernized
$stemmedhw =~ s/([bcdfghjklmnpqrstvwxz])s$/$1/;
$stemmedhw =~ s/([bcdfghjklmnpqrstvwxz])es$/$1e/;
$stemmedhw =~ s/euse$/eux/;
$stemmedhw =~ s/($vowels)($vowels)s$/$1$2/;
$stemmedhw =~ s/ee$/e/;
$stemmedhw =~ s/($vowels)($consonants)($vowels)s$/$1$2$3/;
$stemmedhw =~ s/ales$/al/;
$stemmedhw =~ s/nnes$/n/;
$stemmedhw =~ s/nne$/n/;
$stemmedhw =~ s/nte$/nt/;
$stemmedhw =~ s/ouse$/oux/;
$stemmedhw =~ s/elle$/el/;
$stemmedhw =~ s/aux$/al/;
$stemmedhw =~ s/trice$/teur/;
$stemmedhw =~ s/onne$/on/;

if ($stemmedhw ne $queryhash{"strippedhw"}) {
	print "Searching for stemmed headword <b> " . $stemmedhw . "</b>... ";
	$query1a = $query1;
	$query1a =~ s/strippedhw = "[^"]*"/strippedhw = "$stemmedhw"/;
        $someresults = &runquery($query1a);
        if ($someresults) {
                print "<p>\n";
                print $someresults;
                &printfooter;
                exit(0);
                }
	else {
		print "not found.<br>\n";
		}
	if ($indocyear) {
                print "Trying in all dictionaries... ";
                $query2a = $query2;
                $query2a =~ s/strippedhw = "[^"]*"/strippedhw = "$stemmedhw"/;
                $someresults = &runquery($query2a);
                if ($someresults) {
                        print "<p>\n";
                        print $someresults;
                        &printfooter;
                        exit(0);
                        }
                else {
                        print "not found.<br>";
                        }
		}
	}

$modstrippedhw = &modernize($instrippedhw);
if ($instrippedhw ne $modstrippedhw) {
	print "Modernization: changed " . $instrippedhw . " to " . $modstrippedhw . "<br>\n";
	}
print "Trying to find lemma for " . $modstrippedhw . " ... ";
&flushbuffer;

# See if we can get a lemma from the modstrippedhw

# $treeoutfile = "/var/www/html/tmp/TreeTagger.out.$$";
$treeoutfile = "/tmp/TreeTagger.out.$$";
# $treetaggercmd = " /opt/treetagger/cmd/tree-tagger-french ";
$treetaggercmd = " /var/www/html/mark/quickdict/tt/cmd/tree-tagger-french ";

# system("/bin/echo \"$modstrippedhw\"  |  $treetaggercmd >  $treeoutfile 2>/dev/null");
# print "/bin/echo \"$modstrippedhw\"  |  $treetaggercmd >  $treeoutfile 2>/dev/null <br>";

system("/bin/echo \"$modstrippedhw\"  |  $treetaggercmd >  $treeoutfile ");
# print "/bin/echo \"$modstrippedhw\"  |  $treetaggercmd >  $treeoutfile <br>";

open(TT, "$treeoutfile");
$lin = <TT>;
chop($lin);
close(TT);
system("rm -f $treeoutfile");

($mvosurf, $mvopart, $mvolemma) = split("\t", $lin);

if ($mvolemma =~ /<unknown>/) {
	$hwlem = "";
	}
else {
	$hwlem = $mvolemma;
}


if ($hwlem) {
	print "found possible lemma <b>" . $hwlem . "</b><br>";
	print "Searching for <b>" . $hwlem . "</b>... ";
        if ($hwlem =~ /[177-\377]/) {
                $hwlem = unac_string($assumecharset, $hwlem);
                }
	$query3 = $query1;
        $query3 =~ s/strippedhw = "[^"]*"/strippedhw = "$hwlem"/;
	# print "<p>" . $query3 . "<p>";
	$someresults = &runquery($query3);
	if ($someresults) {
                print "<p>\n";
                print $someresults;
                &printfooter;
                exit(0);
		}
	else {
		print "not found.<br>";
		}
	if ($indocyear) {
		print "Trying in all dictionaries... ";
		$query4 = $query2;
		$query4 =~ s/strippedhw = "[^"]*"/strippedhw = "$hwlem"/;
		# print "<p>". $query4 . "<p>";
		$someresults = &runquery($query4);
        	if ($someresults) {
                	print "<p>\n";
                	print $someresults;
                	&printfooter;
                	exit(0);
                	}
        	else {
                	print "not found.<br>";
                	}
		}	
	}
else {
	print "not found.";	
	}


&dicosimlook;
&printfooter;

# ================================== That's All Folks ==================================

# ==================================== Subroutines =====================================

sub modernize {
# select * from frmod where old = 'abastardies';
	my $dbfile1 = "/var/www/html/mark/quickdict/frmodernize.db3";
        my $dbh1 = DBI->connect("dbi:SQLite:dbname=$dbfile1","","");
	my $w = $_[0];
	my $x = length($w);
	if ($x < 6) {
		return($w);
		}
	# print $w;
	$mvoqq = "select mod from frmod where old = \'" . $w . "\';";
	# print $mvoqq;
        if ($ssth = $dbh1->prepare($mvoqq)) {
         	$ssth->execute();
		}
	# print "TESTOUT";  
	@r = $ssth->fetchrow; 
	if ($r[0]) {
		# print "newmod: $w --> $r[0] ";
		$w = $r[0];
		return ($w);
		}	
	else {
		$w =~ s/oit$/ait/;
		$w =~ s/ois$/ais/;
		$w =~ s/oient$/aient/;
		$w =~ s/mens$/ments/;
		$w =~ s/cy$/ci/;
		$w =~ s/ievr$/ieur/;
		$w =~ s/ict$/it/;
		}
return ($w);
}

sub flushbuffer {
	for (my $i = 0 ; $i < 400 ; $i++) {
             print "                                                                 \n";
       	}
}

# Get query simply builds the SQL query.  Each and every search
# function will require crafting.  So, let's say we want to search
# date as numbers and then built = >= <=, you should be able to do this.
# Note: the variables like author, title, and date are so named
# in the SQL database.  This is a handy convention.
# Also note that searching and sorting for some fields are performed
# on merged fields, such as ssauthor.  
# Question to ask.  I am passing the null queries along to the engine.
# This seems to work OK.  Maybe I should not bother.  

sub getquerystring {
	$selector = "*";
    	$query1 = "select $selector from $TABLE where ";

     foreach $arg10 (@Biblio_Fields) {
        if (length(@queryhash{$arg10}) gt 0) {
           $qnam = $arg10;
	   $qval = @queryhash{$arg10};
           $qop = @BIBOPS{$arg10};

# Regular Expression operator

	   if ($qop eq "regexp") {	
                if ($turnonand eq 1) {
	           $query1 .= " AND ";
	           } 
                if ($qval =~ / AND / || $qval =~ / OR / || $qval =~ / NOT /) {
                   $qval = &expand_query($qval, "$qnam", "regexp");
                   $query1 .= " " . $qval . "  ";
                   }
		elsif ($qval =~ /^NOT/) {
		     $qval =~ s/^NOT //g;
		     $qval =~ s/[ACEINOUY]/$ACCENTS{$&}/ge;
		     $query1 .= " $qnam NOT regexp \"$qval\" ";
		     }
                else {
	           $qval =~ s/[ACEINOUY]/$ACCENTS{$&}/ge;
                   $query1 .= " $qnam regexp \"$qval\" ";
                   }
               $turnonand = 1;
	    }

# Numeric Operator 
            if ($qop eq "numeric") {
                if ($turnonand eq 1) {
                   $query1 .= " AND ";     
                   }
                if ($qval =~ / OR /) {
                   $qval = &expand_query($qval, "$qnam", "=");
                   $query1 .=  $qval . " ";
                   }
                elsif ($qval =~ /\-/) {
                       if ($qval =~ /-$/) {
			   $qval .= "9998";
		           }
                       if ($qval =~ /^-/) {
                           $qval = "0" . $qval;
                           }

                   ($d1, $d2) = split(/-/,$qval);
                   $qval = "($qnam >= \"".$d1."\" AND $qnam <= \"".$d2."\") ";
                   $query1 .= $qval;
                   }
                else {
                   $query1 .= "$qnam = \"$qval\" ";
                   }
             $turnonand = 1;
             }
# Exact match
            if ($qop eq "exact") {
                if ($turnonand eq 1) {
                   $query1 .= " AND ";
                   }
                   $query1 .= "$qnam = \"$qval\" ";
                   $turnonand = 1;
             }
	}
    }

# Sort Order
    if (!$sortorder) {
    	$sortorder = $DEFAULTSORTORDER;
	}
    if ($BROWSEFIELD) {
       $query1 .= "order by $selector";
        }
    else {
        $query1 .= "order by $sortorder;";
        }
     }

sub dicosimlook {
	my $exactword = $queryhash{"strippedhw"};
 	if (length($exactword) > 4) {
		$flexvalue = 1;
		if (length($exactword) > 7) {
			$flexvalue = 2;
			}
        	if (length($exactword) > 12) {
                	$flexvalue = 3;
                	}
		print "<p>Attempting similarity search on <b> $exactword </b> ";
		print "on headwords from all dictionaries (constraint = $flexvalue)";
		$THEARG = "$AGREP -x -" . $flexvalue . " \"" . $exactword . "\" ";
		# $THEARG = "$AGREP -" . $flexvalue . " \"" . $exactword . "\" ";
		$THEARG .= $THEWORDS . " > " . $SIMWORDS;
		# print $THEARG;
		system ($THEARG);
		$havewords = -s("$SIMWORDS");
		if ($havewords > 3) {
			print "<br>Did you mean any of these words?: <blockquote>";
			open (SIMILARWORDS, $SIMWORDS);	
			while ($asimilarword = <SIMILARWORDS>) {
				if ($asimilarword =~ / /) {
					}
				else {
		    			$asimilarword =~ s/\n//;
		    			$asimilarword1 = $asimilarword;
		    			$asimilarword1 =~ s/ /\+/g;
		    			print "<a href=\"" . $THESERVER;
		    			print $asimilarword1 . "\">" . $asimilarword ."</a><br>";
					}
				}
			print "</blockquote>";
			}
		else {
			print "<p>No Similarity Matches<p>";
			}
        	system ("rm -f $SIMWORDS");
		} 
  	else {
		print "Sorry, " . $exactword . " is too short for similarity search.";		
	}
}

# Expand query is not finished.  It takes the individual query argument,
# the field to query, and the operator as arguments from the general
# query processor.  It expands AND, NOT, OR operators as required.
# I have not yet implemented this to handle any operator other than
# SQL "like", the default string search.  Note that I am adding the
# substring operator "%" to every term.  In a full implementation,
# this will need to be made conditional.

sub expand_query {
         local ($q, @qq, $qt, $outq, $qq, $q2, $op, $nextnot);
	 $nextnot = 0;
         $q = $_[0];
         $qt = $_[1];
         $op = $_[2];
	 $q =~ s/  */\_/g;
	 $q =~ s/\_OR\_/ OR /g;
	 $q =~ s/\_AND\_/ AND /g;
	 $q =~ s/\_NOT\_/ NOT /g;
         @qq = split(/ /,$q);
         $outq = "(";
         foreach $q2 (@qq) {
		if ($q2 ne "OR" && $q2 ne "AND" && $q2 ne "NOT") {
			$q2 =~ s/[ACEINOUY]/$ACCENTS{$&}/ge;
			}
                if ($q2 eq "OR" || $q2 eq "AND") {
                            $outq .= " $q2 ";
                     }
		elsif ($q2 eq "NOT") { 
			$nextnot = 1;
			}
                else {
			$q2 =~ s/\_/ /g;
			if ($nextnot eq 1){ 
                            $outq .= " AND (".$qt." NOT ".$op." \"". $q2."\")";
			    $nextnot = 0;
			    }
			else {
			    $outq .= "(".$qt." ".$op." \"". $q2."\")";	
			    }
                     }
         }
         $outq .= ")";
	 if ($outq =~ / AND / && $outq =~ / OR /) {
		$outq =~ s/ AND /) AND (/g;
		$outq = "(" . $outq . ")";
		}
         return $outq;
}

# Print the header file.  I should check to see if it is there.....
sub printheader {
	open (HEADERFILE, $HEADERS . "search.header.html");
	while ($linein = <HEADERFILE>) {
        	print $linein;
        	}
	close HEADERFILE;
}

# Print the footer.  A subroutine because we want to print it from various locations.

sub printfooter {
open (FOOTERFILE, $HEADERS . "search.footer.html");
while ($linein = <FOOTERFILE>) {
        print $linein;
        }
close FOOTERFILE;
}

sub checkaccesscontrol {
        if (! -e $AccessControlFile) {
                print "<p>\n<p>\n";
                print "Cannot find required access control module for this database.<br/>";
                print "Contact $ERRORCONTACT.";
                exit (0);
                }

        require "$AccessControlFile";
        if (!&security_check) {
                print "<p>\n<p>\n";
                print $REJECT_MESSAGE;
                print "<p>Requesting Computer Address: ";
                if ($host) {
                        print "$host ";
                        }
                if ($ip) {
                        print " $ip";
                        }
                print "\n";
                exit (0);
                }
        }

sub evaluatequery {
	$wehavebib = 0;
	$QS = $ENV{'QUERY_STRING'};
	@argbuffer = split ("&", $QS);
	@argbuffer = grep (/^[a-zA-Z_1]*=.+$/, @argbuffer);
	@argbuffer = grep (!/^[a-zA-Z_1]*=ALL$/, @argbuffer);
	foreach $arg (@argbuffer) {
       		$arg = '$' . $arg;
  		$arg =~ s/\%3A/:/g;
  		$arg =~ s/\%2C/,/g;
  		$arg =~ s:\%2F:/:g;
  		$arg =~ s/\%26/\&/g;
  		$arg =~ s/\+/ /g;
  		$arg =~ s/%../pack("H2", substr($&,1))/ge;
  		$mvoarg = $arg;
  		$arg =~ s/^(.[^=]*=)(.*$)/$1'$2'/;
  		$mvoarg =~ s/\$//;
  		($mvoarg1,$mvoarg2) = split(/=/,$mvoarg);
  		$mvoarg2 =~ s/^  *//;    
  		$mvoarg2 =~ s/  *$//;    
  		$mvoarg2 =~ s/([^\.])\*/$1\.\*/g;
  		@queryhash{$mvoarg1} = $mvoarg2;
  		if (@BIBOPS{$mvoarg1}) {
           		$wehavebib = 1;
     			}
  		elsif ($mvoarg1 =~ /xxbrowse/) {
           		$fieldtobrowse = $mvoarg1;
           		$fieldtobrowse =~ s/xxbrowse//;
     			}
  		else {
       			eval $arg;
   		}
   	}

        my $t1 = $queryhash{"strippedhw"};
	if ($t1 =~ /[a-zA-Z\177-\377]/) {
		$donothing = "";
		}
	else {
   		print "<h2><center>Please enter a headword to search.</h2></center>";
   		&printfooter;
   		exit;
		}

}

# The perl DBI method uses a connection in the format:
# "DBI:servertype:database:hostname:port" (the hostname and port are optional)
# the ;mysql_socket=/usr/lib/mysql/mysql.sock was used because by
# default it looks in /var/lib/mysql/mysql.sock
# If I get something in the error string, I'm going to assume that
# I did not connect, leave a message, and exit.  We should have
# a administrator contact here as well.  This works properly, as
# I have tested it.

sub connect2database {
	$dbfile = "/var/www/html/mark/quickdict/dico1look.db3";
	$dbh = DBI->connect("dbi:SQLite:dbname=$dbfile","","");
	if ($MVOERROR) {
        	print "<b>Internal Error.  Database handle not defined. <p>";
        	print "Error Message: $MVOERROR ";
        	print "<p>";
        	print "Please contact ARTFL and include the Error Message";
        	&printfooter;
        	exit;
	}
}

sub getconfiguration {
	if (! -e $configurationfile) {                             
                print "<p>\n<p>\n";  
                print "Cannot find required configuration file for this database.<br/>";
                print "Contact $ERRORCONTACT.";
                exit (0);
                }
	do "$configurationfile";

	if (! -e $DATABASESUBS) {
		print "<p>\n<p>\n";
                print "Cannot find subroutine library this database.<br/>";
                print "Contact $ERRORCONTACT.";
                exit (0);
		}
	require "$DATABASESUBS";
}


# Russ's Escape Function
sub uri_escape {
my $text = $_[0];
        return undef unless defined $text;
my %escapes = ();
for (0..255) {
	$escapes{chr($_)} = sprintf("%%%02X", $_);
	}
# Default unsafe characters.  RFC 2732 ^(uric - reserved)
$text =~ s/([^A-Za-z0-9\-_.!~*'()])/$escapes{$1}/g;

$text;
}

