Commit 55d1bb6e authored by Leigh Stoller's avatar Leigh Stoller

Mostly this commit is the switch from SVN archives to ZIP archives.

Other stuff leaked in too ...

I did separate out a lot of tbsetup/libArchive into db/Archive, and
whats left in libArchive.pm will eventually move over into the
Template library.

Note that I have dropped archiving of plain experiments; this is not
really owrth it outside the workbench context, and it just wastes
space and makes a lot if stuff painful in the web interface.
parent 529ed6ca
#!/usr/bin/perl -wT
#
# EMULAB-COPYRIGHT
# Copyright (c) 2005-2007 University of Utah and the Flux Group.
# All rights reserved.
#
package Archive;
use strict;
use Exporter;
use vars qw(@ISA @EXPORT);
@ISA = "Exporter";
@EXPORT = qw ( );
# Must come after package declaration!
use lib '@prefix@/lib';
use libdb;
use libtestbed;
use Project;
use Experiment;
use Template;
use English;
use File::stat;
use File::Basename;
use Data::Dumper;
use POSIX qw(strftime getgid);
use Time::HiRes qw(gettimeofday);
use overload ('""' => 'Stringify');
# Configure variables
my $TB = "@prefix@";
my $MAINSITE = @TBMAINSITE@;
my $ARCHSUPPORT = @ARCHIVESUPPORT@;
my $USEARCHIVE = ($MAINSITE || $ARCHSUPPORT);
my %ALLOWEDPID = ("testbed" => 1);
# XXX
my $ARCHIVEDIR = "/usr/testbed/exparchive";
my $TESTMODE = @TESTMODE@;
my $TBOPS = "@TBOPSEMAIL@";
my $ELABINELAB = @ELABINELAB@;
my $MD5 = "/sbin/md5";
my $MKDIR = "/bin/mkdir";
my $CHMOD = "/bin/chmod";
my $SUCHOWN = "$TB/sbin/suchown";
my $CHGRP = "/usr/bin/chgrp";
my $TAR = "/usr/bin/tar";
my $RSYNC = "/usr/local/bin/rsync";
my $RM = "/bin/rm";
my $REALPATH = "/bin/realpath";
my $ZIP = "/usr/local/bin/zip";
my $UNZIP = "/usr/local/bin/unzip";
my $ZIPINFO = "/usr/local/bin/zipinfo";
my $DU = "/usr/bin/du";
my $DIFF = "/usr/bin/diff";
my $inittag = 'root';
my $defaultview = 'head';
my $REPO = "ziprepo";
my $TAGVERSION = 1; # 0 is from SVN.
my $debug = 1;
my $zipopt = ($debug ? "-v" : "-q");
my $SHAREROOT = SHAREROOT();
my $SCRATCHROOT = SCRATCHROOT();
my %ROOTS = (PROJROOT() => "proj",
USERROOT() => "users",
$SHAREROOT => "share",
GROUPROOT() => "groups");
if ($SCRATCHROOT) {
$ROOTS{$SCRATCHROOT} = "scratch";
}
# Cache of instances to avoid regenerating them.
my %archives = ();
my $TAGTYPE_USER = "user";
my $TAGTYPE_COMMIT = "commit";
my $TAGTYPE_SAVEPOINT = "savepoint";
my $TAGTYPE_INTERNAL = "internal";
#
# Set umask so that other people in the project can mess with the archive.
#
umask(0002);
# On or off
sub doarchiving($$)
{
my ($pid, $eid) = @_;
return 0
if (! $USEARCHIVE);
my $project = Project->Lookup($pid);
return 0
if (!defined($project));
my $experiment = Experiment->Lookup($pid, $eid);
return 0
if (!defined($experiment));
# The experiment might be the one underlying a template.
my $template = Template->LookupByPidEid($pid, $eid);
return 1
if (($experiment->IsInstance() || defined($template)) &&
(exists($ALLOWEDPID{$pid}) || $project->allow_workbench()));
return 0;
}
# Little helper and debug function.
sub mysystem($)
{
my ($command) = @_;
TBDebugTimeStampsOn();
print STDERR "Running '$command'\n"
if ($debug);
TBDebugTimeStamp($command);
my $retval = system($command);
TBDebugTimeStamp("Done");
return $retval;
}
# Another little helper for scripts that include this library.
sub setdebug($)
{
my ($toggle) = @_;
if ($toggle) {
$debug = $toggle;
$zipopt = "-v";
}
else {
$debug = 0;
$zipopt = "-q";
}
}
#
# Lookup and create a class instance to return.
#
sub Lookup($$)
{
my ($class, $archive_idx) = @_;
# Look in cache first
return $archives{$archive_idx}
if (exists($archives{$archive_idx}));
return undef
if (! ($archive_idx =~ /^\d+$/));
my $query_result =
DBQueryWarn("select * from archives where idx='$archive_idx'");
return undef
if (!$query_result || !$query_result->numrows);
my $self = {};
$self->{"DBROW"} = $query_result->fetchrow_hashref();
$self->{"VIEWS"} = {};
bless($self, $class);
# Add to cache.
$archives{$archive_idx} = $self;
return $self;
}
# accessors
sub field($$) { return ((! ref($_[0])) ? -1 : $_[0]->{'DBROW'}->{$_[1]}); }
sub idx($) { return field($_[0], 'idx'); }
sub unix_gid($) { return field($_[0], 'unix_gid'); }
sub directory($) { return field($_[0], 'directory'); }
sub date_created($) { return field($_[0], 'date_created'); }
sub archived($) { return field($_[0], 'archived'); }
sub date_archived($) { return field($_[0], 'date_archived'); }
# For views.
sub View($$)
{
my ($self, $view) = @_;
return undef
if (! ref($self));
my $idx = $self->idx();
my $query_result =
DBQueryWarn("select * from archive_views ".
"where archive_idx='$idx' and view='$view'");
return undef
if (!$query_result || !$query_result->numrows);
return $query_result->fetchrow_hashref();
}
# For revisions
sub Revision($$$)
{
my ($self, $view, $tag) = @_;
return undef
if (! ref($self));
my $idx = $self->idx();
my $query_result =
DBQueryWarn("select * from archive_revisions ".
"where archive_idx='$idx' and ".
" view='$view' and tag='$tag'");
return undef
if (!$query_result || !$query_result->numrows);
return $query_result->fetchrow_hashref();
}
#
# Refresh a class instance by reloading from the DB.
#
sub Refresh($)
{
my ($self) = @_;
return -1
if (! ref($self));
my $idx = $self->idx();
my $query_result =
DBQueryWarn("select * from archives where idx='$idx'");
return -1
if (!$query_result || !$query_result->numrows);
$self->{"DBROW"} = $query_result->fetchrow_hashref();
$self->{"VIEWS"} = {};
return 0;
}
#
# Stringify for output.
#
sub Stringify($)
{
my ($self) = @_;
my $idx = $self->idx();
return "[Archive: $idx]";
}
#
# Create a new archive. Returns -1 if any error. Otherwise return
# the new record index.
#
sub Create($;$$)
{
my ($class, $view, $unix_gid) = @_;
my $idx;
my $dir;
$view = $defaultview
if (!defined($view));
$unix_gid = POSIX::getgid()
if (!defined($unix_gid));
#
# Need to create the directory for it, once we have the index.
#
my $query_result =
DBQueryWarn("insert into archives set ".
" idx=NULL, date_created=UNIX_TIMESTAMP(now())");
return undef
if (!$query_result);
$idx = $query_result->insertid;
$dir = "$ARCHIVEDIR/$idx";
#
# Create the directory and store the absolute path into the new record.
# This should probably be a privledged operation at some point.
#
mysystem("$MKDIR $dir") == 0 or goto bad;
mysystem("$CHMOD 775 $dir") == 0 or goto bad;
if (defined($unix_gid)) {
mysystem("$CHGRP $unix_gid $dir") == 0 or goto bad;
}
DBQueryWarn("update archives set directory='$dir' where idx='$idx'")
or goto bad;
#
# Make subdirs. One to hold the control tree, and the other
# to hold currently checked out versions of the tree. Lastly, we
# need a place to copyin files before they are added to the repo.
#
my $repodir = "$dir/$REPO";
my $checkouts = "$dir/checkouts";
my $checkins = "$dir/checkins";
if (! mkdir("$repodir", 0777)) {
print STDERR "ArchiveCreate: Could not mkdir $repodir: $!\n";
goto bad;
}
if (! chmod(0777, "$repodir")) {
print STDERR "ArchiveCreate: Could not chmod $repodir: $!\n";
goto bad;
}
if (! mkdir("$checkouts", 0777)) {
print STDERR "ArchiveCreate: Could not mkdir $checkouts: $!\n";
goto bad;
}
if (! chmod(0777, "$checkouts")) {
print STDERR "ArchiveCreate: Could not chmod $checkouts: $!\n";
goto bad;
}
if (! mkdir("$checkins", 0777)) {
print STDERR "ArchiveCreate: Could not mkdir $checkins: $!\n";
goto bad;
}
if (! chmod(0777, "$checkins")) {
print STDERR "ArchiveCreate: Could not chmod $checkins: $!\n";
goto bad;
}
if (! mkdir("$checkins/$view", 0777)) {
print STDERR "ArchiveCreate: Could not mkdir $checkins/$view: $!\n";
goto bad;
}
if (! chmod(0777, "$checkins/$view")) {
print STDERR "ArchiveCreate: Could not chmod $checkins/$view: $!\n";
goto bad;
}
if (! mkdir("$repodir/$view", 0777)) {
print STDERR "ArchiveCreate: Could not mkdir $checkins/$view: $!\n";
goto bad;
}
if (! chmod(0777, "$repodir/$view")) {
print STDERR "ArchiveCreate: Could not chmod $checkins/$view: $!\n";
goto bad;
}
# Now enter the default view (okay, branch) of this archive.
DBQueryWarn("insert into archive_views set ".
" current_tag=NULL, archive_idx='$idx', view='$view', ".
" date_created=UNIX_TIMESTAMP(now())") or goto bad;
return Archive->Lookup($idx);
bad:
# mysystem("$RM -rf $dir")
# if (defined($dir));
if (defined($idx)) {
DBQueryFatal("delete from archive_views ".
"where view='$view' and archive_idx='$idx'");
DBQueryFatal("delete from archives where idx='$idx'");
}
return undef;
}
#
# Helper function for below; Checks that a path is safe and legal.
#
sub ValidatePath($)
{
my ($ppath) = @_;
# We get a pointer so we can return the new path.
my $pathname = $$ppath;
my $rootdir;
# Taint check path before handing off to shell below.
if ($pathname =~ /^([-\w\/\.\+\@,~]+)$/) {
$pathname = $1;
}
else {
print STDERR "*** ValidatePath: illegal characters in '$pathname'\n";
return 1;
}
if (! -e $pathname || ! -r $pathname) {
print STDERR "*** ValidatePath: $pathname cannot be read!\n";
return 1;
}
#
# Use realpath to check that the path does not contain links to
# files outside the directory space the user is allowed to access.
# We must taint check the result to keep everyone happy.
#
my $realpath = `$REALPATH $pathname`;
if ($realpath =~ /^([-\w\/\.\+\@,~]+)$/) {
$realpath = $1;
}
else {
print STDERR "*** ValidatePath: ".
"Bad data returned by realpath: $realpath\n";
return -1;
}
#
# Strip leading /dir from the pathname. We want a relative path to
# the rootdir so we can copy it in.
#
if ($realpath =~ /^[\/]+(\w+)\/(.+)$/) {
$rootdir = "/$1";
$pathname = $2;
}
else {
print STDERR "*** ValidatePath: ".
"Illegal characters in pathname: $realpath\n";
return -1;
}
#
# The file must reside in one of the Emulab "root" filesystems.
#
if (! exists($ROOTS{$rootdir})) {
print STDERR "*** ValidatePath: ".
"$realpath does not resolve to an allowed directory!\n";
return -1;
}
$$ppath = $realpath;
return 0;
}
#
# Add a file to an archive. Returns -1 if any error. Otherwise return 0.
# All this does is copy the file (and its directory structure) into the
# temporary store. Later, after all the files are in the tree, must
# commit it to the repo.
#
sub Add($$;$$$)
{
my ($self, $pathname, $view, $exact, $special) = @_;
return -1
if (! ref($self));
$view = $defaultview
if (!defined($view));
$exact = 0
if (!defined($exact));
$special = 0
if (!defined($special));
# This returns a taint checked value in $pathname.
if (ValidatePath(\$pathname) != 0) {
print STDERR "ArchiveAdd: Could not validate pathname $pathname\n";
return -1;
}
#
# Strip leading /dir from the pathname, we need it below.
#
my ($rootdir, $sourcedir, $sourcefile);
my $rsyncopt = "";
if ($special) {
#
# What does this do?
# Basically, we copy the last part (directory) to / of the checkin.
# eg: cp /proj/pid/exp/eid... /exp of the checkins.
# This avoids pid/eid tokens in the archive.
#
# Last part of path must be a directory.
#
if (! -d $pathname) {
print STDERR "ArchiveAdd: Must be a directory: $pathname\n";
return -1;
}
$rootdir = "exp";
$sourcedir = $pathname;
$sourcefile = "./";
}
elsif ($pathname =~ /^[\/]+(\w+)\/([-\w\/\.\+\@,~]+)$/) {
$rootdir = $1;
$sourcedir = $1;
$sourcefile = $2;
$rsyncopt = "-R";
}
else {
print STDERR "ArchiveAdd: Illegal characters in pathname $pathname\n";
return -1;
}
#
# See if the archive exists and if it does, get the pathname to it.
#
my $directory = $self->directory();
if (! -d $directory || ! -w $directory) {
print STDERR "ArchiveAdd: $directory cannot be written!\n";
return -1;
}
my $checkin = "$directory/checkins/$view";
#
# If the target rootdir exists and is not writable by the current
# user, then run a chown over the whole subdir. This will avoid
# avoid permission problems later during the rsync/tar ops below.
#
if (-e "$checkin/$rootdir" && ! -o "$checkin/$rootdir") {
mysystem("$SUCHOWN $checkin/$rootdir") == 0 or return -1
}
#
# Copy the file in. We use tar on individual files (to retain the
# directory structure and mode bits, etc). On a directory, use either
# tar or rsync, depending on whether we want an exact copy (removing
# files in the target that are not present in the source).
#
if (! -e "$checkin/$rootdir") {
mysystem("$MKDIR $checkin/$rootdir") == 0 or return -1
}
if (-f "/${sourcedir}/${sourcefile}" || !$exact) {
mysystem("$TAR cf - -C /$sourcedir $sourcefile | ".
"$TAR xf - -U -C $checkin/$rootdir");
mysystem("$CHMOD 775 $checkin/$rootdir/$sourcefile");
}
else {
mysystem("cd /$sourcedir; ".
"$RSYNC $rsyncopt -rtgoDlz ".
" --delete ${sourcefile} $checkin/$rootdir");
}
if ($?) {
print STDERR "ArchiveAdd: Could not copy in $pathname\n";
return -1;
}
return 0;
}
#
# Do a diff of the checkin directory against provided directory to
# determine if anything has changed.
#
sub Diff($$;$)
{
my ($self, $pathname, $view) = @_;
return -1
if (! ref($self));
$view = $defaultview
if (!defined($view));
# This returns a taint checked value in $pathname.
if (ValidatePath(\$pathname) != 0) {
print STDERR "ArchiveDiff: Could not validate pathname $pathname\n";
return -1;
}
#
# See if the archive exists and if it does, get the pathname to it.
#
my $directory = $self->directory();
if (! -d $directory || ! -w $directory) {
print STDERR "ArchiveAdd: $directory cannot be written!\n";
return -1;
}
my $checkin = "$directory/checkins/$view";
#
# If the target rootdir exists and is not writable by the current
# user, then run a chown over the whole subdir. This will avoid
# avoid permission problems later during the rsync/tar ops below.
#
if (-e "$checkin" && ! -o "$checkin") {
mysystem("$SUCHOWN $checkin") == 0 or return -1
}
my $retval = mysystem("$DIFF -q -r $checkin $pathname");
return 0
if ($retval == 0);
return 1
if ($retval >> 8 == 1);
# A real problem.
return -1;
}
#
# Commit the current contents of the temporary store to the archive.
# Returns -1 if any error. Otherwise return 0.
#
sub Commit($;$$$$)
{
my ($self, $newtag, $mfile, $view, $altdir) = @_;
my $noactivity = 0;
my $cwd;
return -1
if (! ref($self));
my $archive_idx = $self->idx();
$view = $defaultview
if (!defined($view));
my $View = $self->View($view);
return -1
if (!defined($View));
return -1
if (TBScriptLock("archive_${archive_idx}_${view}", 0, 600) !=
TBSCRIPTLOCK_OKAY());
#
# See if the archive exists and if it does, get the pathname to it.
#
my $directory = $self->directory();
if (! -d $directory || ! -w $directory) {
print STDERR "ArchiveCommit: $directory cannot be written!\n";
goto bad;
}
my $repodir = "$directory/$REPO/$view";
my $checkin = (defined($altdir) ? $altdir : "$directory/checkins/$view");
# Need to remember where we came from!
chomp($cwd = `pwd`);
# Must do the taint check too.
if ($cwd =~ /^([-\w\.\/]+)$/) {
$cwd = $1;
}
else {
print STDERR "ArchiveCommit: Bad data in $cwd!\n";
goto bad;
}
if (! chdir("$checkin")) {
print STDERR "ArchiveCommit: Cannot chdir to $checkin!\n";
goto bad;
}
# Get the revision and user tag for the archive/view.
my $current_tag = $View->{'current_tag'};
my $oldprev_tag = $View->{'previous_tag'};
my $revision = $View->{'revision'};
# args for new records.
my $parent_revision = (defined($revision) ? "'$revision'" : "NULL");
my $previous_tag = (defined($current_tag) ? "'$current_tag'" : "NULL");
#
# Message can come from a file.
#
my $description = "'Commit Archive'";
if (defined($mfile)) {
if (! -r $mfile) {
print STDERR "*** ArchiveCommit: $mfile cannot be read!\n";
goto bad;
}
$description = DBQuoteSpecial(`cat $mfile`);
}
#
# Create a new revision record so I can get the revision number.
# Need to update the tag later.
#
my $query_result =
DBQueryWarn("insert into archive_revisions set revision=NULL, ".
" archive_idx='$archive_idx', view='$view', ".
" parent_revision=$parent_revision, ".
" description=$description, ".
" date_created=UNIX_TIMESTAMP(now())") or goto bad;
my $newrevision = $query_result->insertid;
my $zipfile = "$repodir/$newrevision";
$newtag = (defined($newtag) ? $newtag : "R${newrevision}");
#
# Okay, zip up the checkin directory. This becomes a new "revision"
#
mysystem("$ZIP $zipopt -y -r $zipfile .") == 0 or goto bad;
#
# Enter new tag and revision now that we got it all.
#
DBQueryWarn("update archive_views set ".
" revision='$newrevision', ".
" current_tag='$newtag', ".
" previous_tag=$previous_tag ".
"where archive_idx='$archive_idx' and view='$view'")
or goto bad;
DBQueryWarn("update archive_revisions set ".
" tag='$newtag' ".
"where archive_idx='$archive_idx' and revision='$newrevision'")
or goto bad;
# Backwards compat
DBQueryWarn("insert into archive_tags set idx=NULL, version=$TAGVERSION, ".
" tag='$newtag', view='$view', ".
" archive_idx='$archive_idx', ".
" tagtype='$TAGTYPE_COMMIT', ".
" date_created=UNIX_TIMESTAMP(now())")
or goto bad;
okay:
TBScriptUnlock();
chdir($cwd)
if (defined($cwd));
return 0;
bad:
if (defined($newrevision)) {
# args for new records.
$revision = (defined($revision) ? "'$revision'" : "NULL");
$current_tag = (defined($current_tag) ? "'$current_tag'" : "NULL");
$previous_tag = (defined($oldprev_tag) ? "'$oldprev_tag'" : "NULL");
DBQueryWarn("update archive_views set ".
" revision=$revision, ".
" current_tag=$current_tag, ".
" previous_tag=$previous_tag ".
"where archive_idx='$archive_idx' and view='$view'");
DBQueryFatal("update archive_revisions set ".
" tag=$current_tag ".
"where archive_idx='$archive_idx' ".
" and revision='$newrevision'");
DBQueryWarn("delete from archive_revisions ".
"where revision='$newrevision' and ".
" archive_idx='$archive_idx'");
}
TBScriptUnlock();
chdir($cwd)