All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

websearch.in 3.45 KB
Newer Older
1 2 3
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2008 University of Utah and the Flux Group.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
# All rights reserved.
#
use English;
use Getopt::Std;
use File::Basename;

#
# Search our web index. Spits out HTML. Sorry, easier that way!
# 
sub usage {
    print STDERR "Usage: websearch ...\n";
    exit(-1);
}
my $optlist  = "";

#
# Configure variables
#
my $TB		= "@prefix@";
my $WWW         = "$TB/www";
my $index	= "$TB/www/site.index";
my $swish       = "swish-e";
my $searchstring;
my $searchwords;
my $numhits;
my %hits	= ();	# indexed by document.
my %lines       = ();   # indexed by document.
my @order	= ();

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
$| = 1;

#
# Very simple; one argument is the string to search for.
#
usage()
    if (! @ARGV);

$searchstring = $ARGV[0];

#
# Do this from the webdir.
#
chdir($WWW) or
    die("*** $0:\n".
	"    Could not chdir to $WWW!\n");

#
# Run swish. The next version of swish will include a perl API module, so
# we will be able to avoid this extra shell call. For now, we are stuck
# calling swish-e.
#
61
open(SWISH, "swish-e -f $index -x '%r %p %l \"%D\"\n' ".
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
     "-w '$searchstring' |") or
    die("*** $0:\n".
	"    Could not invoke swish-e on '$searchstring'!\n");

#
# Read back results. 
#
while (<SWISH>) {
    #
    # First few lines are special.
    #
    if ($_ =~ /^\#/) {
	if ($_ =~ /^\# Search words: (.*)$/) {
	    $searchwords = $1;
	}
	elsif ($_ =~ /^\# Number of hits: (\d*)$/) {
	    $numhits = $1;
	}
    }
    elsif ($_ =~ /^\.$/) {
	# So we suck everything up to the dot.
	last;
    }
    else {
	#
	# By this point we should have seen the above stuff go by.
	#
	if (!defined($searchwords) || !defined($numhits)) {
	    next;
	}
92
	if ($_ =~ /^(\d*) (.*) (\d*) \"(.*)\"$/) {
93 94
	    my $rank = $1;
	    my $path = $2;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
95 96
	    my $size = $3;
	    my $mod  = $4;
97

98
	    #print "$path $mod\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
99

100
	    $hits{$path}  = [ $rank, $path, $size, $mod ];
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
	    $lines{$path} = [];
	    push(@order, $path);
	}
    }
}
close(SWISH);

#
# Build a query to pass to agrep; this is pathetic, but the next version of
# swish will handle this, so not going to worry; it will do the job.
#
my $query = $searchwords;
$query =~ s/\s+AND\s+/\;/ig;
$query =~ s/\s+OR\s+/\,/ig;
$query =~ s/(?<!\\)\"//ig;

#print "$query\n";

#
# Run agrep on the list of files. 
# 
122
open(AGREP, "agrep -w -i '$query' " . join(" ", keys(%hits)) .
123 124 125 126 127
     " /dev/null 2> /dev/null |") or
    die("*** $0:\n".
	"    Could not invoke agrep with '$query'!\n");

while (<AGREP>) {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
128
#    print $_;
129 130
    if ($_ =~ /^(.*): (.*)$/) {
	push(@{ $lines{$1} }, $2)
131
	    if (exists($lines{$1}) && scalar(@{ $lines{$1} }) < 10);
132 133 134 135
    }
}
close(AGREP);

Leigh B. Stoller's avatar
Leigh B. Stoller committed
136 137 138 139
if (@order) {
    print "Rank, filename, modification date, matching lines<br><br>\n";
    print "<ul>\n";
}
140
foreach my $hit (@order) {
141
    my ($rank, $path, $size, $mod) = @{ $hits{$hit} };
142
    my $url = $path;
143 144

    print "<hr>\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
145
    print "<li> $rank - <b><a href=$url>$path</a></b>, $mod<br>\n";
146 147
    print "<ul>\n";
    foreach my $line (@{ $lines{$hit} }) {
148 149
	$line =~ s/<([^>]*)>//ig;	# Matched <...>
	$line =~ s/<[^>]*//ig;		# Unmatched <...
150 151 152 153 154 155
	#$line =~ s/<.*>//ig; 	
	#$line =~ s/<li>//ig;
	#$line =~ s/<\/li>//ig;

	next
	    if ($line eq "");
156 157 158 159 160
	
	print "<li> $line\n";
    }
    print "</ul>\n";
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
161 162 163 164
if (@order) {
    </ul>
}

165 166 167 168 169
print "<hr><br><br>".
      "Web Search powered by <a href=\"http://swish-e.org/\"><b>Swish-e</b></a>".
      "\n";

exit(0);