Commit ac1c62df authored by Leigh Stoller's avatar Leigh Stoller

Replace old and tired webglimpse with new and modern index/search

soeftware called swish-e since webglimpse was broken, did not install
out of the box, no longer being maintained by the authors, and
generally a pain in the ass.

swish-e is probably overkill, but it works and can be configured and
run from a single configuration file (swish.conf.in).

I've removed the advanced search form and replaced with simple form.
I can bring back advanced stuff if needed, although better to wait
until the swish-e port is upgraded and there is better backend support
for generating the html output.

There is a backend script called websearch that is called from the php
search page. It invokes swish and spits out html that is amenable to
being inbedded in our existing page layout (we were not able to do
that with webglimpse).
parent 930a8ef4
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
# All rights reserved.
#
SRCDIR = @srcdir@
......@@ -20,7 +20,7 @@ include $(OBJDIR)/Makeconf
# Force dependencies to make sure configure regenerates if the .in file
# is changed.
#
all: defs.php3 dbdefs.php3
all: defs.php3 dbdefs.php3 swish.conf websearch
include $(TESTBED_SRCDIR)/GNUmakerules
......@@ -114,16 +114,12 @@ install: $(addprefix $(INSTALL_WWWDIR)/, $(ALLFILES)) \
$(addprefix $(INSTALL_WWWDIR)/downloads/, $(ALLDOWNLOADS)) \
$(addprefix $(INSTALL_WWWDIR)/buildui/, $(ALLBUI)) \
$(addprefix $(INSTALL_WWWDIR)/autostatus-icons/, $(ALLICONS)) \
$(addprefix $(INSTALL_LIBEXECDIR)/, websearch) \
$(addprefix $(INSTALL_WWWDIR)/cvsweb/, $(ALLCVSWEB))
$(INSTALL_DATA) $(TESTBED_SRCDIR)/doc/ChangeLog \
$(INSTALL_WWWDIR)/doc/ChangeLog.txt
(swish-e -c ./swish.conf)
cd $(INSTALL_WWWDIR) && \
if test -d ../webglimpse; \
then \
rm -f webglimpse; \
ln -s ../webglimpse webglimpse; \
(cd webglimpse && wgreindex -q); \
fi; \
rm -f tbdb.html; \
ln -s index.html tbdb.html; \
rm -f icons; \
......
......@@ -201,7 +201,7 @@ function WRITEPLABBOTTOMBAR() {
<center>
<br>
<font size=-1>
<form method=get action='/cgi-bin/webglimpse/usr/testbed/webglimpse'>
<form method=get action=search.php3>
[ <a href='$TBDOCBASE/doc.php3'>
Documentation</a> : <input name=query size = 15/>
<input type=submit style='font-size:10px;' value='Search' /> ]
......@@ -259,7 +259,7 @@ function WRITESIDEBAR() {
}
}
echo "<FORM method=get ACTION=\"/cgi-bin/webglimpse/usr/testbed/webglimpse\">\n";
echo "<FORM method=get ACTION=search.php3>\n";
?>
<table class="menu" width=210 cellpadding="0" cellspacing="0">
<tr><td class="menuheader"><b>Information</b></td></tr>
......@@ -317,7 +317,6 @@ function WRITESIDEBAR() {
<tr><td class="menuoptst"><b>Search Documentation:</b></td></tr>
<tr><td class="menuoptsb"><input name=query size = 15/>
<input type=submit style="font-size:10px;" value="Go" /><br>
[ <a href="<?php echo "$TBDOCBASE/search.php3"; ?>">Advanced Search</a> ]
</td></tr>
<?php # BACK TO PHP
echo "</table>\n";
......
<?php
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2002 University of Utah and the Flux Group.
# Copyright (c) 2000-2002, 2004 University of Utah and the Flux Group.
# All rights reserved.
#
require("defs.php3");
......@@ -10,78 +10,89 @@ require("defs.php3");
# Standard Testbed Header
#
PAGEHEADER("Search Emulab Documentation");
?>
<center>
<table border=0 cellpadding=4 cellspacing=0 class="nogrid">
<FORM method=get ACTION="/cgi-bin/webglimpse/usr/testbed/webglimpse">
#
# We no longer support an advanced search option. We might bring it back
# someday.
#
function SPITFORM($query)
{
echo "<table align=center border=1>
<form action=search.php3 method=get>\n";
<tr>
<th>
<font size=+3>
<a href="http://glimpse.cs.arizona.edu/webglimpse">WebGlimpse</a>
Search</font>
</th>
</tr>
$query = htmlspecialchars($query);
<tr><td class="paddedcell">
String to search for: <INPUT NAME=query size=30>
<INPUT TYPE=submit VALUE=Submit>
</td></tr>
<tr><td class="paddedcell">
<INPUT NAME=case TYPE=checkbox>Case&nbsp;sensitive
<!-- SPACES -->&nbsp;&nbsp;&nbsp;
<INPUT NAME=whole TYPE=checkbox>Partial&nbsp;match
<!-- SPACES -->&nbsp;&nbsp;&nbsp;
<INPUT NAME=lines TYPE=checkbox>Jump&nbsp;to&nbsp;line
</td></tr>
<tr><td class="paddedcell">
<SELECT NAME=errors align=right>
<OPTION>0
<OPTION>1
<OPTION>2
</SELECT>
misspellings&nbsp;allowed
</td></tr>
<tr><td class="paddedcell">
Return only files modified within the last <INPUT NAME=age size=5>
days.
</td></tr>
<tr><td class="paddedcell">
Maximum number of files returned:
<SELECT NAME=maxfiles>
<OPTION>10
<OPTION selected>50
<OPTION>100
<OPTION>1000
</SELECT>
</td></tr>
<tr><td class="paddedcell">
Maximum number of matches per file returned:
<SELECT NAME=maxlines>
<OPTION>10
<OPTION selected>30
<OPTION>50
<OPTION>500
</SELECT>
<br>
</td></tr>
<tr><td class="paddedcell">
Maximum number of characters output per file:
<INPUT NAME="maxchars" VALUE=10000>
</td></tr>
<tr><td class="paddedcell">
<font size=-1>
<a href="http://glimpse.cs.arizona.edu">Glimpse</a> and
<a href="http://glimpse.cs.arizona.edu/webglimpse">WebGlimpse</a>,
Copyright &copy; 1996, University of Arizona
</font>
</td>
</tr>
</form>
</table>
#
# Just the query please.
#
echo "<tr>
<td class=left>
<input type=text name=query value=\"$query\"
size=25 maxlength=100>
</td>
</tr>\n";
echo "<tr>
<td align=center>
<b><input type=submit name=submit value='Submit Query'></b>
</td>
</tr>\n";
echo "</form>
</table><br>\n";
}
if (!isset($query) || $query == "") {
SPITFORM("");
PAGEFOOTER();
return;
}
# Sanitize for the shell. Be fancy later.
if (!preg_match("/^[-\w\ \"]+$/", $query)) {
SPITFORM("");
PAGEFOOTER();
return;
}
#
# Run the query. We get back html we can just spit out.
#
#
# A cleanup function to keep the child from becoming a zombie, since
# the script is terminated, but the children are left to roam.
#
$fp = 0;
function CLEANUP()
{
global $fp;
if (!$fp || !connection_aborted()) {
exit();
}
pclose($fp);
exit();
}
ignore_user_abort(1);
register_shutdown_function("CLEANUP");
SPITFORM($query);
flush();
if ($fp = popen("$TBSUEXEC_PATH nobody nobody websearch '$query'", "r")) {
while (!feof($fp)) {
$string = fgets($fp, 1024);
echo "$string";
flush();
}
pclose($fp);
$fp = 0;
}
else {
TBERROR("Query failed: $query", 0);
}
<?php
#
# Standard Testbed Footer
#
......
IndexFile @prefix@/www/site.index
IndexDir @prefix@/www
ReplaceRules remove @prefix@/www/
DefaultContents TXT
IndexContents HTML .htm .html
IndexContents TXT .txt .text
# Don't index any directories of these directories
FileRules dirname contains /webglimpse
FileRules dirname contains /images
FileRules dirname contains /thumbs
FileRules dirname contains /dev/
FileRules dirname contains /downloads
FileRules dirname contains /distributions
FileRules dirname contains /bugtrack
# But only index the .html files
IndexOnly .html
# Show basic info while indexing
IndexReport 1
MetaNames swishtitle swishdocpath swishdescription
PropertyNames swishdescription
StoreDescription HTML <body>
StoreDescription TXT 10000
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
# All rights reserved.
#
use English;
use Getopt::Std;
use File::Basename;
#
# Search our web index. Spits out HTML. Sorry, easier that way!
#
sub usage {
print STDERR "Usage: websearch ...\n";
exit(-1);
}
my $optlist = "";
#
# Configure variables
#
my $TB = "@prefix@";
my $WWW = "$TB/www";
my $index = "$TB/www/site.index";
my $swish = "swish-e";
my $searchstring;
my $searchwords;
my $numhits;
my %hits = (); # indexed by document.
my %lines = (); # indexed by document.
my @order = ();
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
# Turn off line buffering on output
$| = 1;
#
# Very simple; one argument is the string to search for.
#
usage()
if (! @ARGV);
$searchstring = $ARGV[0];
#
# Do this from the webdir.
#
chdir($WWW) or
die("*** $0:\n".
" Could not chdir to $WWW!\n");
#
# Run swish. The next version of swish will include a perl API module, so
# we will be able to avoid this extra shell call. For now, we are stuck
# calling swish-e.
#
open(SWISH, "swish-e -f $index -p swishlastmodified swishdescription ".
"-w '$searchstring' |") or
die("*** $0:\n".
" Could not invoke swish-e on '$searchstring'!\n");
#
# Read back results.
#
while (<SWISH>) {
#
# First few lines are special.
#
if ($_ =~ /^\#/) {
if ($_ =~ /^\# Search words: (.*)$/) {
$searchwords = $1;
}
elsif ($_ =~ /^\# Number of hits: (\d*)$/) {
$numhits = $1;
}
}
elsif ($_ =~ /^\.$/) {
# So we suck everything up to the dot.
last;
}
else {
#
# By this point we should have seen the above stuff go by.
#
if (!defined($searchwords) || !defined($numhits)) {
next;
}
if ($_ =~ /^(\d*) (.*) \"(.*)\" (\d*) \"(.*)\" \"(.*)\"$/) {
my $rank = $1;
my $path = $2;
my $file = $3;
my $size = $4;
my $mod = $5;
my $text = $6;
$hits{$path} = [ $rank, $path, $file, $size, $mod,
substr($text, 0, 300) ];
$lines{$path} = [];
push(@order, $path);
}
}
}
close(SWISH);
#
# Build a query to pass to agrep; this is pathetic, but the next version of
# swish will handle this, so not going to worry; it will do the job.
#
my $query = $searchwords;
$query =~ s/\s+AND\s+/\;/ig;
$query =~ s/\s+OR\s+/\,/ig;
$query =~ s/(?<!\\)\"//ig;
#print "$query\n";
#
# Run agrep on the list of files.
#
open(AGREP, "agrep -i '$query' " . join(" ", keys(%hits)) .
" /dev/null 2> /dev/null |") or
die("*** $0:\n".
" Could not invoke agrep with '$query'!\n");
while (<AGREP>) {
if ($_ =~ /^(.*): (.*)$/) {
push(@{ $lines{$1} }, $2)
if (scalar(@{ $lines{$1} }) < 10);
}
}
close(AGREP);
print "Rank, filename, modification date, first part of file<br>\n";
foreach my $hit (@order) {
my ($rank, $path, $file, $size, $mod, $text) = @{ $hits{$hit} };
$path = dirname($path);
print "<hr>\n";
print "$rank - <b><a href=$path/docwrapper.php3?docname=$file>".
"$path/$file</a></b>, $mod<br>\n";
print "<ul>\n";
foreach my $line (@{ $lines{$hit} }) {
$line =~ s/<li>//ig;
$line =~ s/<\/li>//ig;
print "<li> $line\n";
}
print "</ul>\n";
}
print "<hr><br><br>".
"Web Search powered by <a href=\"http://swish-e.org/\"><b>Swish-e</b></a>".
"\n";
exit(0);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment