#!/opt/local/bin/perl -wT ############################################################################### # # search document log # # Greg Fruth, Flight Mechanics Department # # v0.9, 12 Dec 1995 # v0.9.1, 5 Jan 1996 : added E-mail hardcopy request # v0.9.2, 22 Jan 1996 : removed limit on number of matches to show; # added short input names # v0.9.3, 3 Feb 2000 : Y2K fixes # v0.9.4, 11 Apr 2000 : wrap Auth field # v1.0.0, 9 Jan 2001 : rewrite; "use CGI" instead of cgi_handlers.pl; # v1.0.1, 13 Feb 2001 : new look and feel; removed E-mail hardcopy request, # removed short input names # v1.1, 5 Apr 2001 : keyword field added # v1.2, 21 Jun 2001 : support multiple orgs; CCC field added; age field # added; "use strict" # v1.3, 31 Aug 2001 : taint mode, adapt for web.aero.org # v1.4, 5 Apr 2002 : re-adapt for web.aero.org # ############################################################################### use CGI; use CGI::Carp qw(fatalsToBrowser); use Time::Local; use strict; # global variables use vars qw($Query $Debug $Now $MyCCC %UserDir); use vars qw($ConfFile $Style $LibrarianDesc $LibrarianAddr $UseSection); use vars qw($LogDir); use vars qw($NumRecords $NumMatches $ElapsedTime); use vars qw($SearchCCC @SearchFields %SearchStrings %Matches); use vars qw($SortField $SortDir $SearchMode); use vars qw(@Fields %Label); require('./doc_util.pl'); $Query = new CGI; $Debug = 0; $Now = 0; $MyCCC = '????'; $ConfFile = 'config.txt'; $Style = ''; $LibrarianDesc = 'the Archive Librarian'; $LibrarianAddr = 'root'; $UseSection = 0; $LogDir = 'logs'; $NumRecords = 0; $NumMatches = 0; $ElapsedTime = 0; $SearchCCC = '\d+'; @SearchFields = (); %SearchStrings = (); %Matches = (); $SortField = 'Date'; $SortDir = 'decreasing'; $SearchMode = 'normal'; # database fields @Fields = ( 'DocId', 'Fmt', # computed at run time, not stored in the log file 'Url', 'Date', 'Age', # computed at run time, not stored in the log file 'Auth', 'Clas', 'Dist', 'Subj', 'Keyw', 'Log' # computed at run time, not stored in the log file ); # descriptions for database fields (for output only) %Label = ( 'DocId' => 'Document ID', 'Fmt' => 'Online Format', 'Url' => 'URL', 'Date' => 'Date', 'Age' => 'Age', 'Auth' => 'Author', 'Clas' => 'Cls.', 'Dist' => 'Distribution', 'Subj' => 'Subject', 'Keyw' => 'Keywords', 'Log' => 'Log File', ); ############################################################################### # main body ############################################################################### main(); exit(0); ############################################################################### # main body ############################################################################### sub main { my ($key, $value); my ($Errors); my ($Day, $Mon, $Year, $junk); my ($AgeVal, $AgeUnits, $AgeDir, $AgeMult, $AgeSign, $Age); $Errors = ''; # get inputs foreach $key ($Query->param) { $value = $Query->param($key); next unless(defined($value) && $value); # skip empty params # controls $SortField = $value, next if ($key eq 'SortField'); $SortDir = $value, next if ($key eq 'SortDir'); $SearchMode = $value, next if ($key eq 'SearchMode'); $SearchCCC = $value, next if ($key eq 'SearchCCC'); $MyCCC = $value, next if ($key eq 'MyCCC'); $Debug = $value, next if ($key eq 'Debug'); # if present, these override Date $Mon = $value, next if ($key eq 'Mon'); $Day = $value, next if ($key eq 'Day'); $Year = $value, next if ($key eq 'Year'); # if present, these override Age $AgeVal = $value, next if ($key eq 'AgeVal'); $AgeUnits = $value, next if ($key eq 'AgeUnits'); $AgeDir = $value, next if ($key eq 'AgeDir'); # search strings $SearchStrings{$key} = $value if (grep {$_ eq $key} @Fields); } # if any of Mon, Day or Year were input, convert to Date if ($Mon || $Day || $Year) { $Mon = '.*' unless ($Mon); $Day = '.*' unless ($Day); $Year = '.*' unless ($Year); $SearchStrings{Date} = "$Year/$Mon/$Day"; } # if AgeVal, AgeUnits and AgeDir were all input, convert to Age if ($AgeVal && $AgeUnits && $AgeDir) { $Age = 0; $Age = $AgeVal if ($AgeVal =~ /^\d+$/); $AgeMult = 0; $AgeMult = 1 if ($AgeUnits eq 'days'); $AgeMult = 7 if ($AgeUnits eq 'weeks'); $AgeMult = 31 if ($AgeUnits eq 'months'); $AgeMult = 365 if ($AgeUnits eq 'years'); $AgeSign = 0; $AgeSign = -1 if ($AgeDir eq 'or newer'); $AgeSign = 1 if ($AgeDir eq 'or older'); $Age *= $AgeSign*$AgeMult; $SearchStrings{Age} = $Age if ($Age != 0); } # validate inputs $SortField = 'Date' unless (grep {$_ eq $SortField} @Fields); $SortDir = 'decreasing' unless ($SortDir eq 'increasing'); $SearchMode = 'normal' unless ($SearchMode eq 'power' || $SearchMode eq 'browse'); delete($SearchStrings{Age}) if (defined($SearchStrings{Age}) && ($SearchStrings{Age} !~ /^-?\d+$/ || $SearchStrings{Age} == 0)); # determine which fields are being searched @SearchFields = sort(keys(%SearchStrings)); ($junk, $junk, $junk, $Day, $Mon, $Year, $junk) = localtime(); $Now = timelocal(0, 0, 0, $Day, $Mon, $Year); $Errors = ReadConf($ConfFile); Header('Power Search Results', $Style) if ($SearchMode eq 'power'); Header('Search Results' , $Style) if ($SearchMode eq 'normal'); Header('File Index' , $Style) if ($SearchMode eq 'browse'); if ($Errors) { htmlError('cannot read config file', $Errors); } else { if (SearchRecords()) { PrintRecords(); } } Tailer(); } ############################################################################### # Search the logs for records which match the regexps in SearchStrings. # Save the matching records to array Matches. # Set NumRecords to the number of records. # Set NumMatches to the number of matching records. # Set ElapsedTime to the time elapsed in the search. # Return 1 on success, 0 on error. ############################################################################### sub SearchRecords { my ($org, @logs, $log); my ($field, %rec); $NumRecords = 0; $NumMatches = 0; $ElapsedTime = ftime(); # find logs for matching CCCs @logs = (); foreach $org (sort(keys(%UserDir))) { next unless ($org =~ /^$SearchCCC$/); next unless (defined($UserDir{$org})); next unless (-d "$UserDir{$org}/$LogDir"); if (!opendir(DIR, "$UserDir{$org}/$LogDir")) { htmlError("Cannot open directory \"$UserDir{$org}/$LogDir\"!", "opendir() failed: $!"); next; } push(@logs, map("$UserDir{$org}/$LogDir/$_", sort(readdir(DIR)))); closedir(DIR); } # search each log foreach $log (@logs) { next unless (-f "$log"); if (!open(LOG, "<$log")) { htmlError("Cannot open file \"$log\"!", "open() failed: $!"); next; } while () { chomp; # skip empty records next unless ($_ =~ /\S/); # clear current record undef(%rec); # get TAB-delimited fields # (field Extra accomodates extra fields) ($rec{DocId}, $rec{Url}, $rec{Date}, $rec{Auth}, $rec{Clas}, $rec{Dist}, $rec{Subj}, $rec{Keyw}, $rec{Extra}) = split(/\t/, $_, 9); # filter out HTML escape characters and quotes added by Excel foreach $field (@Fields, 'Extra') { if (defined($rec{$field})) { $rec{$field} =~ s/\"\"/\"/g; $rec{$field} =~ s/^\s*\"//g; $rec{$field} =~ s/\"\s*$//g; $rec{$field} =~ s/&/&/g; $rec{$field} =~ s/>/>/g; $rec{$field} =~ s/'); beginPanel('header'); print('Search Results'); midPanel('data'); # echo search fields print('Search for records where: '); print($Query->start_ul); if (@SearchFields) { $count = 0; foreach $field (@SearchFields) { $temp = sprintf('%s matches "%s"', $Label{$field}, $SearchStrings{$field}); $temp .= ' and' if (@SearchFields > 1 && $count < $#SearchFields); print($Query->li($temp)); $count++; } } else { print($Query->li("(no search criteria given; all records shown)")); } print($Query->end_ul); printf('
', $Query->url); # sort controls print('Sort records by '); printf(''); print('in '); printf(''); print('order'); print('
'); # keep state persistent foreach $field (@SearchFields) { printf('', $field, $SearchStrings{$field}); } printf('', 'SearchMode', $SearchMode); printf('', 'MyCCC', $MyCCC); printf('', 'SearchCCC', $SearchCCC); printf('', 'Debug', $Debug); print('
'); endPanel(); print('
'); beginPanel('data'); $ElapsedTime = 0.01 if ($ElapsedTime <= 0.0); printf("%d matching records found " . "(%d records searched in %.2lf sec, " . "%.0lf records per sec)
", $NumMatches, $NumRecords, $ElapsedTime, $NumRecords / $ElapsedTime); endPanel(); return unless ($NumMatches > 0); } # sort the record indices (not the records themselves) @sortedIndices = sort(compareRecords 0..$NumMatches-1); @sortedIndices = reverse(@sortedIndices) if ($SortDir eq 'decreasing'); print(''); # print the table header print(''); print(''); print(''); foreach $field (@Fields) { # omit Url field unless in power search next if ($field eq 'Url' && $SearchMode ne 'power'); # omit Age field unless in power search next if ($field eq 'Age' && $SearchMode ne 'power'); # omit Log field unless in power search next if ($field eq 'Log' && $SearchMode ne 'power'); # omit Clas field if in browse mode next if ($field eq 'Clas' && $SearchMode eq 'browse'); # omit Dist field if in browse mode next if ($field eq 'Dist' && $SearchMode eq 'browse'); # omit Keyw field if in browse mode next if ($field eq 'Keyw' && $SearchMode eq 'browse'); printf('', $Label{$field}); } print(''); print(''); # print the saved lines $count = 1; print(''); foreach $index (@sortedIndices) { # OPTIMIZATION # if Age is a search string, SearchRecords() already calculated # the age. otherwise, calculate it here, but only if in power search. # if not in power search, Age isn't displayed so there's no need to # calculate it. $Matches{$index}{Age} = dateToAge($Matches{$index}{Date}) if (!defined($SearchStrings{Age}) && $SearchMode eq 'power'); print(''); printf('', $count); printf('', $Matches{$index}{DocId}); if ($Matches{$index}{Fmt}) { printf('', $Matches{$index}{Url}, $Matches{$index}{Fmt}); } else { print(''); } # omit Url field unless in power search if ($SearchMode eq 'power') { if ($Matches{$index}{Url}) { printf('', $Matches{$index}{Url}); } else { printf(''); } } printf('', $Matches{$index}{Date}); # omit Age field unless in power search printf('', $Matches{$index}{Age}) if ($SearchMode eq 'power'); printf('', wrapAuth($Matches{$index}{Auth})); # omit Clas field if in browse mode printf('', $Matches{$index}{Clas}) unless ($SearchMode eq 'browse'); # omit Dist field if in browse mode printf('', $Matches{$index}{Dist}) unless ($SearchMode eq 'browse'); printf('', $Matches{$index}{Subj}); # omit Keyw field if in browse mode printf('', $Matches{$index}{Keyw}) unless ($SearchMode eq 'browse'); # omit Log field unless in power search printf('', $Matches{$index}{Log}) if ($SearchMode eq 'power'); print(''); $count++; } print(''); print('
#%s
%s%s%s(none)%s(none)%s%s days%s%s%s%s%s%s
'); if ($SearchMode eq 'browse') { beginPanel('data'); print("Original Files"); endPanel(); } } ############################################################################### sub wrapAuth { # Some Auth strings are really long, with "/" separating the names # (e.g. "NAHASAPEEMAPETILON/DEBEAUMARCHAIS"). Because there are no # spaces around the "/", the string is displayed all on one line, which # can make the column too wide to fit on the screen. Therefore, we # add spaces around the "/" so that the string can wrap around to # multiple lines. my $string = shift; $string =~ s|/| / |g; return($string); } ############################################################################### # return true if the current line matches (case insensitive) all the # SearchFields, false otherwise ############################################################################### sub matchRecord { my $rec = shift; my ($field, $keyword, @recKeywords, @srchKeywords, $found); foreach $field (@SearchFields) { # treat Age field specially if ($field eq 'Age' && $$rec{Age} != -1) { if ($SearchStrings{Age} > 0) { return(0) if ($$rec{Age} < $SearchStrings{Age}); } elsif ($SearchStrings{Age} < 0) { return(0) if ($$rec{Age} > abs($SearchStrings{Age})); } } # treat Keyw field specially elsif ($field eq 'Keyw') { @recKeywords = split(/,/, $$rec{Keyw}); @srchKeywords = split(/,/, $SearchStrings{Keyw}); $found = 0; foreach $keyword (@srchKeywords) { $found = 1 if (grep(/$keyword/i, @recKeywords)); } return(0) unless ($found); } else { return(0) if ($$rec{$field} !~ /$SearchStrings{$field}/i); } } return(1); } ############################################################################### # return string comparison (case insensitive) between sort fields of # records a and b. use Date as secondary sort field if a anb b compare # the same on the primary sort field. ############################################################################### sub compareRecords { my $status; $a = $a; $b = $b; # suppress warnings produced by -w switch $status = (lc($Matches{$a}{$SortField}) cmp lc($Matches{$b}{$SortField})); return($status) if ($status != 0); return($Matches{$a}{Date} cmp $Matches{$b}{Date}); }