#!/usr/bin/perl -w # # Copyright (c) 2008-2016 University of Utah and the Flux Group. # # {{{GENIPUBLIC-LICENSE # # GENI Public License # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and/or hardware specification (the "Work") to # deal in the Work without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Work, and to permit persons to whom the Work # is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Work. # # THE WORK IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE WORK OR THE USE OR OTHER DEALINGS # IN THE WORK. # # }}} # use strict; use English; use Getopt::Long; use XML::Simple; use File::Temp qw(tempfile :mktemp tmpnam :POSIX); use Data::Dumper; use Cwd qw(realpath); # # Create a quick VM. # sub usage() { print "Usage: quickvm [-u uuid] [--site site:1=aggregate ...] \n"; exit(1); } my @optlist = ('d', 'v', 'u=s', 'a=s', 'S', 'k=s', 'i'); my $debug = 0; my $verbose = 1; my $ignorefailures = 0; my $xmlfile; my $webtask; my $webtask_id; my $localuser = 0; my $usestitcher= 0; my $quickuuid; my $this_user; my $xmlparse; my $instance; my $privkeyfile; my $slice; my $sitemap; my $usetracker = 0; my @aggregate_urns = (); # Protos sub fatal($); sub UserError($); sub SnapShot($$$); sub CreateDatasetCreds($$$$$); sub CreateSlivers(); sub RunStitcher(); # # Configure variables # my $TB = "@prefix@"; my $TBOPS = "@TBOPSEMAIL@"; my $TBLOGS = "@TBLOGSEMAIL@"; my $OURDOMAIN = "@OURDOMAIN@"; my $MAINSITE = @TBMAINSITE@; my $PGENIDOMAIN = "@PROTOGENI_DOMAIN@"; my $PROTOGENI_URL = "@PROTOGENI_URL@"; my $SACERT = "$TB/etc/genisa.pem"; my $CMCERT = "$TB/etc/genicm.pem"; my $SSHKEYGEN = "/usr/bin/ssh-keygen"; my $SSHSETUP = "$TB/sbin/aptssh-setup"; my $ADDPUBKEY = "$TB/sbin/addpubkey"; my $UPDATEGENIUSER= "$TB/sbin/protogeni/updategeniuser"; my $STITCHER = "$TB/gcf/src/stitcher.py"; my $OPENSSL = "/usr/bin/openssl"; my $MANAGEINSTANCE= "$TB/bin/manage_instance"; my $DEFAULT_URN = "urn:publicid:IDN+${OURDOMAIN}+authority+cm"; my $GUEST_URN = "urn:publicid:IDN+apt.emulab.net+authority+cm"; my $default_aggregate_urn = $DEFAULT_URN; # un-taint path $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin'; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; # # Turn off line buffering on output # $| = 1; # Load the Testbed support stuff. use lib "@prefix@/lib"; use EmulabConstants; use libtestbed; use libaudit; use APT_Profile; use APT_Instance; use APT_Geni; use APT_Dataset; use APT_Aggregate; use User; use Project; use Group; use OSinfo; use emutil; use libEmulab; use GeniDB; use GeniUser; use GeniCertificate; use GeniCredential; use GeniSlice; use GeniAuthority; use GeniHRN; use Genixmlrpc; use GeniResponse; use GeniXML; use WebTask; use Logfile; use EmulabFeatures; # # Parse command arguments. Once we return from getopts, all that should be # left are the required arguments. # Getopt::Long::Configure("no_ignore_case"); my %options = (); if (! GetOptions(\%options, @optlist, "site=s%" => \$sitemap)) { usage(); } if (defined($options{"a"})) { $default_aggregate_urn = $options{"a"}; } if (defined($options{"k"})) { $privkeyfile = $options{"k"}; } if (defined($options{"d"})) { $debug = 1; } if (defined($options{"i"})) { $ignorefailures = 1; } if (defined($options{"v"})) { $verbose = 1; } if (defined($options{"S"})) { $usestitcher = 1; } if (defined($options{"u"})) { $quickuuid = $options{"u"}; } if (@ARGV < 1) { usage(); } $xmlfile = shift(@ARGV); # # Check the filename when invoked from the web interface; must be a # file in /tmp. # if (getpwuid($UID) ne "nobody") { $this_user = User->ThisUser(); if (! defined($this_user)) { fatal("You ($UID) do not exist!"); } $localuser = 1; } if (!defined($this_user) || !$this_user->IsAdmin()) { if ($xmlfile =~ /^([-\w\.\/]+)$/) { $xmlfile = $1; } else { fatal("Bad data in pathname: $xmlfile"); } # Use realpath to resolve any symlinks. my $translated = realpath($xmlfile); if ($translated =~ /^(\/tmp\/[-\w\.\/]+)$/) { $xmlfile = $1; } else { fatal("Bad data in translated pathname: $xmlfile"); } } # Email record. if (! $debug) { my $opts = LIBAUDIT_LOGTBLOGS()|LIBAUDIT_LOGONLY(); AuditStart(0, undef, $opts); # # Once we determine the project, we can add the appropriate log CC # } # Connect to the SA DB. DBConnect(GENISA_DBNAME()); # # Load the SA cert to act as caller context. # my $sa_certificate = GeniCertificate->LoadFromFile($SACERT); if (!defined($sa_certificate)) { fatal("Could not load certificate from $SACERT\n"); } my $sa_authority = GeniAuthority->Lookup($sa_certificate->urn()); if (!defined($sa_authority)) { fatal("Could not load SA authority object"); } # Guest users default to the APT cluster on the Mothership. if ($MAINSITE && !$localuser) { $default_aggregate_urn = $GUEST_URN; } # # We use the normal XMLRPC route, so need a context. # my $context = Genixmlrpc->Context($sa_certificate); if (!defined($context)) { fatal("Could not create context to talk to CM"); } Genixmlrpc->SetContext($context); # # Must wrap the parser in eval since it exits on error. # $xmlparse = eval { XMLin($xmlfile, VarAttr => 'name', ContentKey => '-content', SuppressEmpty => undef); }; fatal($@) if ($@); # # Make sure all the required arguments were provided. # foreach my $key ("username", "email", "profile", "portal") { fatal("Missing required attribute '$key'") if (! (exists($xmlparse->{'attribute'}->{"$key"}) && defined($xmlparse->{'attribute'}->{"$key"}) && $xmlparse->{'attribute'}->{"$key"} ne "")); } # # Gather up args and sanity check. # my ($value, $user_urn, $user_uid, $user_hrn, $user_email, $project, $pid, $sshkey, $profile, $profileid, $version, $rspecstr, $errmsg, $userslice_id, $portal); # This is used internally to determine which portal was used. $portal = $xmlparse->{'attribute'}->{"portal"}->{'value'}; # # Username and email has to be acceptable to Emulab user system. # $value = $xmlparse->{'attribute'}->{"username"}->{'value'}; if (! TBcheck_dbslot($value, "users", "uid", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { fatal("Illegal username: $value - " . TBFieldErrorString()); } $user_uid = $value; $user_urn = GeniHRN::Generate("$OURDOMAIN", "user", $user_uid); $user_hrn = "${PGENIDOMAIN}.${user_uid}"; $value = $xmlparse->{'attribute'}->{"email"}->{'value'}; if (! TBcheck_dbslot($value, "users", "usr_email", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { fatal("Illegal email address: $value"); } $user_email = $value; # # The instance name is optional, we will make one up if not supplied. # if (exists($xmlparse->{'attribute'}->{"instance_name"}) && $xmlparse->{'attribute'}->{"instance_name"}->{'value'} ne "") { $value = $xmlparse->{'attribute'}->{"instance_name"}->{'value'}; if (! TBcheck_dbslot($value, "experiments", "eid", TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) { fatal("Illegal instance name: $value"); } $userslice_id = $value; } # # Profile. # # This is a safe lookup. $value = $xmlparse->{'attribute'}->{"profile"}->{'value'}; $profile = APT_Profile->Lookup($value); if (!defined($profile)) { fatal("No such profile: $value"); } $profileid = $profile->profileid(); $version = $profile->version(); # # Optional rspec, as for a Parameterized Profile. # if (exists($xmlparse->{'attribute'}->{"rspec"})) { $rspecstr = $xmlparse->{'attribute'}->{"rspec"}->{'value'}; } else { $rspecstr = $profile->CheckFirewall(!$localuser); } # # Update rspec with site aggregate urns. # # SetSites will tell us if we must use stitcher. # my $needstitcher = 0; my $tmp = APT_Profile::SetSites(\$rspecstr, $sitemap, $default_aggregate_urn, \@aggregate_urns, \$needstitcher, \$errmsg); if ($tmp) { ($tmp < 0 ? fatal($errmsg) : UserError($errmsg)); } # # Yep, this can happen when users do not put any nodes in their rspec. # if (!@aggregate_urns) { UserError("There are no nodes in your experiment, syntax error?"); } # but do not override command line force. $usestitcher = 1 if ($needstitcher); # # Look for datasets; need to verify that the datasets being referenced # still exist and are still permissible to use, and we have to generate # credentials for those datasets (if not a global dataset). The tricky # aspect is that while a dataset and a profile have project permissions, # the experiment has no project association, so if the profile/dataset # perms are okay, then we send over a credential that tells the CM to # allow this experiment to use that dataset in that project. # $errmsg = "Bad dataset"; if (APT_Profile::CheckDatasets($rspecstr, $profile->pid(), \$errmsg)) { UserError($errmsg); } # # Use ssh-keygen to see if the key is valid and convertable. We first # try to get the fingerprint, which will tells us if its already in # openssh format. If not, try to convert it. # if (exists($xmlparse->{'attribute'}->{"sshkey"}) && defined($xmlparse->{'attribute'}->{"sshkey"}) && $xmlparse->{'attribute'}->{"sshkey"} ne "") { $sshkey = $xmlparse->{'attribute'}->{"sshkey"}->{'value'}; my ($fh, $keyfile) = tempfile(UNLINK => 0); print $fh $sshkey; if (system("$SSHKEYGEN -l -f $keyfile >/dev/null 2>/dev/null")) { if (! open(KEYGEN, "$SSHKEYGEN -i -f $keyfile 2>/dev/null |")) { fatal("Could not start ssh-keygen"); } $sshkey = ; if (!close(KEYGEN)) { UserError("Could not parse ssh key!"); } } close($fh); unlink($keyfile); } chomp($sshkey) if (defined($sshkey)); # # See if the GeniUser exists. Create if not, but that means we # have to create an ssl certificate (which the user will never see) # so that we can operate on behalf of the user (via speaksfor). # # Note that we want to check for the user local account ahead of # SA account, to bypass their guest account that might still be # in the table. # my $geniuser; if ($localuser) { my $emulab_user = User->Lookup($user_uid); # # Hmm, users with real accounts who never used Geni, but now want # to use APT/Cloud, have no encrypted SSL certificate. Rather then # force them through the web ui (and have to explain it), create one # for them using a random passphrase. The user will not know the # passphrase, but for most users it will not matter. # # This is also going to catch expired certificates, we will regenerate # them using the existing passphrase. # if ($emulab_user->HasValidEncryptedCert() == 0 && $emulab_user->GenEncryptedCert()) { fatal("Could not (re)generate encrypted certificate"); } # Now this will work; without a certificate, above line would fail. if (defined($emulab_user)) { $geniuser = GeniUser::LocalUser->Create($emulab_user); } } else { $geniuser = GeniUser->Lookup($user_urn); # # In Utah, check for alternate SA # if (!defined($geniuser) && $MAINSITE) { foreach my $urn (@aggregate_urns) { if ($urn ne $GUEST_URN) { UserError("Guests are not allowed to use cluster: $urn"); } } $user_urn = GeniHRN::Generate("aptlab.net", "user", $user_uid); $user_hrn = "aptlab.${user_uid}"; $geniuser = GeniUser->LookupGuestOnly($user_urn); } } if (!defined($geniuser)) { if ($localuser) { fatal("Could not lookup local user $user_urn"); } # # Do not allow overlap with local users. # if (User->Lookup($user_uid)) { fatal("User $user_uid exists in the local user table"); } print "Geni user does not exist; creating one ...\n" if ($debug); # # Want to remember the auth token we emailed for later. # my $auth_token = $xmlparse->{'attribute'}->{"auth_token"}->{'value'}; if ($auth_token !~ /^[\w]+$/) { fatal("Bad auth token: $auth_token"); } my $blob = {"urn" => $user_urn, "hrn" => $user_hrn, "email" => $user_email, "showuuid" => 1}; if ($MAINSITE) { $blob->{'useaptca'} = 1; } my $certificate = GeniCertificate->Create($blob); fatal("Could not create certificate") if (!defined($certificate)); $geniuser = GeniUser->Create($certificate, $sa_authority); fatal("Could not create new geni user") if (!defined($geniuser)); $geniuser->SetAuthToken($auth_token); # # Setup browser ssh. # system("$SSHSETUP " . $geniuser->uuid()); fatal("Could not create ssh key pair") if ($?); } my $user_uuid = $geniuser->uuid(); # So we know this user has dome something lately. $geniuser->BumpActivity(); if ($localuser) { my $emulab_user = $geniuser->emulab_user(); if ($emulab_user->IsNonLocal()) { # # A user created from a Geni certificate via geni-login. We # asked for the current ssh keys from the MA when they logged # in, but we ask again to make sure have the latest keys. # system("$UPDATEGENIUSER -s " . $emulab_user->uid()); if (0) { fatal("Could not update ssh keys for nonlocal user"); } # # Check project membership, must be a member of at least one # valid project at the GPO portal. # system("$UPDATEGENIUSER -p " . $emulab_user->uid()); if ($?) { UserError("Could not get your project membership from your ". "member authority. It is probably offline, please try ". "again later."); } # Nonlocal users get the holding project can now join/create # real projects, so we get the pid passed in. } elsif (defined($sshkey) && !$emulab_user->LookupSSHKey($sshkey)) { # # XXX This is deprecated; we no longer show the ssh box to # real users. Remove when we are satisfied with new # ssh key management page. # # # A local user. We mark keys that come through this path # with the isaptkey flag (-a to addpubkey) so that we know # which key in the DB it is. The reason for this is that the # user might be a classic emulab user, but is now using the # APT/Cloud UI. The key provided in the web interface will # replace this key. # # XXX This is in flux, we now allow APT users to manage all # their keys via the web interface. # my ($fh, $keyfile) = tempfile(UNLINK => 0); print $fh $sshkey; if (system("$ADDPUBKEY -a -u $user_uid -f $keyfile")) { fatal("Could not add new ssh pubkey"); } close($fh); unlink($keyfile); } # # Hmm, users with real accounts who never used Geni, but now want # to use APT/Cloud, have no encrypted SSL certificate. Rather then # force them through the web ui (and have to explain it), create one # for them using a random passphrase. The user will not know the # passphrase, but for most users it will not matter. # # This is also going to catch expired certificates, we will regenerate # them using the existing passphrase. # if ($emulab_user->HasValidEncryptedCert() == 0 && $emulab_user->GenEncryptedCert()) { fatal("Could not (re)generate encrypted certificate"); } # Local users are required to select a project. if (! exists($xmlparse->{'attribute'}->{"pid"})) { fatal("No project provided for new instance"); } $project = Project->Lookup($xmlparse->{'attribute'}->{"pid"}->{"value"}); if (!defined($project)) { fatal("Project provided does not exist"); } if (!$project->AccessCheck($emulab_user, TB_PROJECT_CREATEEXPT)) { fatal("No permission to create experiments in project ". $project->pid()); } $pid = $project->pid(); # Use of the Image Tracker is a Portal directive at the moment. $usetracker = 1 if (GetSiteVar("protogeni/use_imagetracker") && EmulabFeatures->FeatureEnabled("APT_UseImageTracker", $emulab_user, $project)); } elsif (!$localuser) { if (defined($sshkey)) { # # Guest user; remember key. For now we accept only one key. We store # it simply so we can display it again for the user in the web # interface. We allow key reuse for existing users, see above. # $geniuser->DeleteKeys(); $geniuser->AddKey($sshkey); } # Guest users get a holding project. $pid = $APT_HOLDINGPROJECT; $project = Project->Lookup($pid); if (!defined($project)) { fatal("Project $pid does not exist"); } } # # Now we know where to send to logs. # if (!$debug) { AddAuditInfo("cc", $project->LogsEmailAddress()); if ($MAINSITE && $project->isEmulab()) { # Mostly people use the Cloudlab UI. AddAuditInfo("cc", "cloudlab-logs\@cloudlab.us"); } } # Generate the extra credentials that tells the backend this experiment # can access the datasets. my @dataset_credentials = (); if (defined($profile)) { my $retval = CreateDatasetCreds($rspecstr, $profile->pid(), $geniuser, \$errmsg, \@dataset_credentials); if ($retval) { ($retval < 0 ? fatal($errmsg) : UserError($errmsg)); } } # # # Now generate a slice registration and credential # my $safe_uid = $user_uid; $safe_uid =~ s/_/-/; my $slice_id = (defined($userslice_id) ? $userslice_id : $safe_uid . "-QV" . TBGetUniqueIndex('next_quickvm', 1)); my $slice_urn = GeniHRN::Generate("${OURDOMAIN}:${pid}", "slice", $slice_id); my $slice_hrn = "${PGENIDOMAIN}.${pid}.${slice_id}"; my $SERVER_NAME = (exists($ENV{"SERVER_NAME"}) ? $ENV{"SERVER_NAME"} : ""); # # Make sure slice is unique. Probably retry here at some point. # if (GeniSlice->Lookup($slice_hrn) || GeniSlice->Lookup($slice_urn)) { if (defined($userslice_id)) { UserError("Slice name already in use, please use another. If you ". "just terminated an experiment with this name, it takes a ". "minute or two for the name to become available again."); } else { fatal("Could not form a unique slice name"); } } # # Generate a certificate for this new slice. # my $slice_certificate = GeniCertificate->Create({'urn' => $slice_urn, 'hrn' => $slice_hrn, 'showuuid' => 1, 'email'=> $user_email}); if (!defined($slice_certificate)) { fatal("Could not generate certificate for $slice_urn"); } # Slice is created as locked. $slice = GeniSlice->Create($slice_certificate, $geniuser, $sa_authority, undef, 1); if (!defined($slice)) { $slice_certificate->Delete(); fatal("Could not create new slice object for $slice_urn"); } # These get quick expirations, unless it is a real user. if ($slice->SetExpiration(time() + (($localuser ? 16 : 3) * 3600)) != 0) { fatal("Could not set the slice expiration for $slice_urn"); } my $slice_uuid = $slice->uuid(); # # Generate a new ssl key/cert to be used to derive an ssh key pair # or whatever else is needed. This is sent along as an option when the # sliver is created (or provisioned, when stitching). # # This is going to be a real geni certificate, albeit a slice # certificate in the alternate CA domain, that can be used at the # "portal" XMLRPC interface. The key is unencrypted and put on the # nodes, hence the alternate CA, and the XMLRPC server will not allow # this certificate to do anything, except at the portal RPC server. # my $alt_urn = GeniHRN::Generate("aptlab.net:${pid}", "slice", $slice_id); my $alt_hrn = "aptlab.${pid}.${slice_id}"; my $alt_url = "$PROTOGENI_URL/portal"; my $altblob = {"urn" => $alt_urn, "hrn" => $alt_hrn, "url" => $alt_url, "uuid" => $slice_uuid, "email" => $user_email, "nostore" => 1, "keyfile" => $privkeyfile, "useaptca" => 1, "showuuid" => 1}; my $alt_certificate = GeniCertificate->Create($altblob); fatal("Could not create alt certificate") if (!defined($alt_certificate)); # # Encrypt blocks. # $tmp = APT_Profile::EncryptBlocks(\$rspecstr, $alt_certificate, \$errmsg); if ($tmp) { ($tmp < 0 ? fatal($errmsg) : UserError($errmsg)); } # # Tell the CM to do normal NFS mounts if this is the "Emulab" portal # making the request. The CM is of course free to ignore this. # # XXX Need to handle this differently if we use the stitcher. # if ($portal ne "emulab") { if (APT_Profile::ClearPortalTag(\$rspecstr, $errmsg)) { fatal($errmsg); } } elsif (APT_Profile::AddPortalTag(\$rspecstr, $portal, $errmsg)) { fatal($errmsg); } # # Generate credentials we need. # my ($slice_credential, $speaksfor_credential) = APT_Geni::GenCredentials($slice, $geniuser, undef, 0); if (! (defined($speaksfor_credential) && defined($slice_credential))) { fatal("Could not generate credentials"); } # # Got this far, lets create a quickvm record. # my $quickvm_uuid = (defined($quickuuid) ? $quickuuid : NewUUID()); if (!defined($quickvm_uuid)) { fatal("Could not generate a new uuid"); } my $blob = {'uuid' => $quickvm_uuid, 'name' => $slice_id, 'profile_id' => $profileid, 'profile_version' => $version, 'slice_uuid' => $slice_uuid, 'creator' => $geniuser->uid(), 'creator_idx' => $geniuser->idx(), 'creator_uuid' => $geniuser->uuid(), 'status' => "created", 'servername' => $SERVER_NAME, 'rspec' => $rspecstr, 'cert' => $alt_certificate->cert(), 'privkey' => $alt_certificate->PrivKeyDelimited(), }; if (defined($project)) { $blob->{"pid"} = $project->pid(); $blob->{"pid_idx"} = $project->pid_idx(); } $errmsg = undef; $instance = APT_Instance->Create($blob, \$errmsg); if (!defined($instance)) { fatal(defined($errmsg) ? $errmsg : "Could not create instance record for $quickvm_uuid"); } # # Get the set of keys (accounts) that need to be sent along. We build # them in CM format, but convert to AM format later if needed. # my $sshkeys; if ($instance->GetSSHKeys(\$sshkeys) < 0 || !@{$sshkeys}) { $slice->Delete(); $instance->Delete(); fatal("Error constructing ssh key list"); } # We use this list of references for ParRun below. my @aggregate_list = (); foreach my $aggregate_urn (@aggregate_urns) { my $aptaggregate = APT_Aggregate->Lookup($aggregate_urn); if (!defined($aptaggregate)) { UserError("$aggregate_urn is not a valid (known) aggregate"); } # Check for disabled/adminonly aggregates. if ($aptaggregate->disabled()) { UserError("$aggregate_urn is currently offline, try again later"); } if ($aptaggregate->adminonly() && !(defined($this_user) && $this_user->IsAdmin())) { UserError("Only administrators may use $aggregate_urn"); } my $authority = GeniAuthority->Lookup($aggregate_urn); if (!defined($authority)) { $authority = GeniAuthority->CreateFromRegistry("cm", $aggregate_urn); if (!defined($authority)) { fatal("Could not load CM authority object for $aggregate_urn"); } } my $aggobj = $instance->AddAggregate($aggregate_urn); if (!defined($aggobj)) { fatal("Could not create aggregate object for $aggregate_urn"); } $aggobj->_authority($authority); push(@aggregate_list, $aggobj); } # To keep stuff happy until multisite support finished. $instance->Update({'aggregate_urn' => $aggregate_urns[0]}); # # Create a webtask so that we can store additional information about # the sliver while we wait. # $webtask = WebTask->Create($instance->uuid()); if (!defined($webtask)) { fatal("Could not create a webtask!"); } $webtask_id = $webtask->task_id(); $webtask->AutoStore(1); print STDERR "\n"; print STDERR "User: $user_urn\n"; print STDERR "Email: $user_email" . (!$localuser ? " (guest)" : "") . "\n"; if (defined($profile)) { print STDERR "Profile: " . $profile->name() . ":${version}\n"; } print STDERR "Slice: $slice_urn\n"; print STDERR "Server: $SERVER_NAME\n"; print STDERR "Cluster: "; print STDERR join(",", map($_->aggregate_urn(), @aggregate_list)) . "\n"; print STDERR "\n"; print STDERR "$rspecstr\n"; # # Exit and let caller poll for status. # if (!$debug) { libaudit::AuditPrefork(); my $child = fork(); if ($child) { # Parent exits but avoid libaudit email. exit(0); } # All of the logging magic happens in here. libaudit::AuditFork(); } # Bind the process id. $webtask->SetProcessID($PID); if ($usestitcher) { my $rval = RunStitcher(); if ($rval) { $slice->UnLock(); $instance->SetStatus("failed"); $instance->RecordError($rval, (defined($webtask->output()) ? $webtask->output() : "")); $webtask->Exited($rval); exit($rval); } } else { my $rval = CreateSlivers(); # # We do not want email for most mapping errors, so look at the # return code to see if we want to kill the log (user will see the # error in the web ui). # if ($rval) { $instance->RecordError($rval, (defined($webtask->output()) ? $webtask->output() : "")); if ($rval == GENIRESPONSE_BADARGS || $rval == GENIRESPONSE_INSUFFICIENT_NODES || $rval == GENIRESPONSE_INSUFFICIENT_MEMORY || $rval == GENIRESPONSE_INSUFFICIENT_BANDWIDTH || $rval == GENIRESPONSE_NO_MAPPING) { AuditAbort() if (!$debug); } $webtask->Exited($rval); exit($rval); } } $instance->SetStatus("provisioned"); $instance->ComputeNodeCounts(); # # Now wait for the sliver to be ready, which means polling. # # Shorten default timeout. # Genixmlrpc->SetTimeout(60); # # Okay, fire off the waits for each aggregate # my @return_codes = (); if (ParRun({"maxwaittime" => 99999, "maxchildren" => scalar(@aggregate_list)}, \@return_codes, \&APT_Instance::Aggregate::WaitForSliver, @aggregate_list)) { # # The parent caught a signal. Leave things intact so that we can # kill things cleanly later. # $slice->UnLock(); print STDERR "Internal error in WaitForSlivers\n"; $webtask->output("Internal error in WaitForSlivers"); $instance->SetStatus("failed"); $webtask->Exited(1); exit(-1); } print "$slice_urn\n"; # # If we were canceled, then none of the stuff below matters, we # are going to do a terminate. # if ($instance->IsCanceled()) { $instance->SetStatus("canceled"); $slice->UnLock(); system("$MANAGEINSTANCE -t $webtask_id terminate $quickvm_uuid"); exit(0); } # Count up nodes running a startup service. my $startuprunning = 0; # # Check the exit codes; any failure is a total failure (for now). # my $failed = 0; foreach my $aggobj (@aggregate_list) { my $code = shift(@return_codes); # Updated in a forked child, must refresh. $aggobj->Refresh(); print $aggobj->aggregate_urn() . "\n"; if ($code) { $failed++; print "WaitforSliver Failure!\n"; if (defined($aggobj->webtask()->output())) { $webtask->output($aggobj->webtask()->output()); $webtask->Exited($aggobj->webtask()->exitcode()); print $aggobj->webtask()->output() . "\n"; } else { $webtask->output("WaitforSliver Failure at " . $aggobj->aggregate_urn()); $webtask->Exited(1); } # Promote the log up to the instance so that so its easy to find. $instance->SetPublicURL($aggobj->public_url()) if (defined($aggobj->public_url())); } else { my $statusblob = $aggobj->webtask()->sliverstatus(); print Dumper($statusblob); foreach my $details (values(%{ $statusblob })) { # Startup command is still running. $startuprunning++ if (exists($details->{'execute_state'}) && $details->{'execute_state'} ne "exited"); } } if (defined($aggobj->public_url())) { print $aggobj->public_url() . "\n"; } print "\n" . $aggobj->manifest() . "\n\n"; print "------------------------------------------------------------\n\n"; } $slice->UnLock(); if ($failed) { if ($ignorefailures) { $instance->SetStatus("ready"); } else { $instance->SetStatus("failed"); } # Webtask exit status set above. $instance->RecordError($webtask->exitcode(), (defined($webtask->output()) ? $webtask->output() : "")); } else { $instance->SetStatus("ready"); # # If there are still execute services running, lets keep polling # using the monitor. # if ($startuprunning) { print "$MANAGEINSTANCE -t $webtask_id monitor $quickvm_uuid -w\n"; system("$MANAGEINSTANCE -t $webtask_id monitor $quickvm_uuid -w") } else { $webtask->Exited(0); } } exit(0); # # Create credentials to access datasets. # sub CreateDatasetCreds($$$$$) { my ($xml, $pid, $user, $pmsg, $pref) = @_; my @credentials = (); my $rspec = GeniXML::Parse($xml); if (! defined($rspec)) { print STDERR "CreateDatasetCreds: Could not parse rspec\n"; return -1; } foreach my $ref (GeniXML::FindNodes("n:node", $rspec)->get_nodelist()) { foreach my $blockref (GeniXML::FindNodesNS("n:blockstore", $ref, $GeniXML::EMULAB_NS)->get_nodelist()) { my $dataset_id = GeniXML::GetText("persistent", $blockref); if (!defined($dataset_id)) { # persistent is deprecated. $dataset_id = GeniXML::GetText("dataset", $blockref); } # # We only care about datasets here, we let the backend # do the error checking on ephemeral blockstores. # next if (!defined($dataset_id)); my $class = GeniXML::GetText("class", $blockref); if (!defined($class)) { $class = "remote"; } # Image backed. No checking since the image has to be global # anyway. Needs more thought. next if ($class eq "local"); my ($authority, $type, $id) = GeniHRN::Parse($dataset_id); my ($dataset_domain) = split(":", $authority); # # Separate project from name; this is how the rspec specifies # the dataset they want, since it might be in another project # if ($id =~ /^([-\w]+)\/\/(.+)$/) { $pid = $1; $id = $2; } my $dataset = APT_Dataset->Lookup("$pid/$id"); if (!defined($dataset)) { $dataset = APT_Dataset->LookupByRemoteURN($dataset_id); if (!defined($dataset)) { # If it is for a local dataset, see if it plain lease # created via the classic interface or command line. # The backend can find those. if ($dataset_domain eq $OURDOMAIN) { $dataset = Lease->Lookup($pid, $id); if (defined($dataset)) { # No need for a credential. next; } } } if (!defined($dataset)) { $$pmsg = "Persistent dataset '$pid/$id' does not exist"; return 1; } } my $certificate = $dataset->GetCertificate(); if (!defined($certificate)) { $$pmsg = "No certificate for dataset '$pid/$id'"; return -1; } my $credential = APT_Geni::GenCredentials($certificate, $geniuser, ["blockstores"]); if (!defined($credential)) { $$pmsg = "Could not create credential for dataset '$pid/$id'"; return -1; } push(@credentials, $credential->asString()); } } @$pref = @credentials; return 0; } # # Create a sliver at a single aggregate. This is called from parrun # so it needs to return success or failure, we lookup the results # in the DB. # sub CreateSliver($) { my ($ref) = @_; my $aggobj = $ref; $aggobj->Refresh(); my $webtask = $aggobj->webtask(); my $authority = $aggobj->_authority(); my $cmurl = $authority->url(); my $urn = $authority->urn(); $webtask->Refresh(); # Debugging $cmurl = APT_Instance::devurl($cmurl); Genixmlrpc->SetTimeout(900); # # This creates the sliver and starts it. We have to watch for the # server being too busy. # my $tries = 15; my $response; while (1) { $response = Genixmlrpc::CallMethod($cmurl, undef, "CreateSliver", { "slice_urn" => $slice_urn, "rspec" => $rspecstr, "keys" => $sshkeys, "credentials" => [$slice_credential->asString(), $speaksfor_credential->asString(), @dataset_credentials ], "certificate" => $alt_certificate->cert(), "key" => $alt_certificate->PrivKeyDelimited(), "usetracker" => $usetracker, }); if (!defined($response) || $response->code() != GENIRESPONSE_SUCCESS) { if (defined($response) && $response->code() == GENIRESPONSE_SERVER_UNAVAILABLE && $tries >= 0) { print STDERR "Server for $urn reports too busy, ". "waiting a while ...\n"; sleep(int(rand(20)) + 10); $tries--; next; } if (defined($response)) { $webtask->output($response->output()); $webtask->Exited($response->code()); } else { $webtask->Exited(-1); } $aggobj->SetStatus("failed"); if (defined($response) && defined($response->logurl())) { $aggobj->SetPublicURL($response->logurl()); } print STDERR "CreateSliver failed on $urn: ". (defined($response) ? $response->output() : "") . "\n"; return -1; } last; } # This will get overwritten later. if (defined($response) && defined($response->logurl())) { $aggobj->SetPublicURL($response->logurl()); } my $manifest = $response->value()->[1]; if (!defined($manifest)) { $webtask->Exited(-1); $aggobj->SetStatus("failed"); print STDERR "CreateSliver $urn: No manifest returned\n"; return -1; } $aggobj->SetStatus("provisioned"); $aggobj->SetManifest($manifest); return 0; } sub CreateSlivers() { my ($perrmsg) = @_; $instance->SetStatus("provisioning"); my @return_codes = (); if (ParRun({"maxwaittime" => 99999, "maxchildren" => scalar(@aggregate_list)}, \@return_codes, \&CreateSliver, @aggregate_list)) { # # The parent caught a signal. Leave things intact so that we can # kill things cleanly later. # $slice->UnLock(); $instance->SetStatus("failed"); return -1; } # # Check the exit codes; any failure is a total failure (for now). # foreach my $aggobj (@aggregate_list) { # # Have to refresh the sliver objects since they were updated in a fork. # Need the manifests for the call to ComputeNodeCounts below. # $aggobj->Refresh(); my $code = shift(@return_codes); if ($code) { $slice->UnLock(); # Promote the log up to the instance so that so its easy to find. $instance->SetPublicURL($aggobj->public_url()) if (defined($aggobj->public_url())); # Ditto the error output. $webtask->output($aggobj->webtask()->output()) if (defined($aggobj->webtask()->output())); # This will be the createsliver exit code if we got one, or -1. $code = $aggobj->webtask()->exitcode() if (defined($aggobj->webtask()->exitcode())); # Do this last so that the web interface does not see failed # before the reason is in the webtask. $instance->SetStatus("failed"); return $code; } } return 0; } # # Run the stitcher to allocate resources. We use this whenever we have # links that cross aggregates # sub RunStitcher() { my $tmpdir = mktemp("/tmp/stitcher.XXXXXX"); my $slicecredfile = "$tmpdir/slicecred.xml"; my $speaksforfile = "$tmpdir/speaksforcred.xml"; my $al2scredfile = "$tmpdir/al2scred.xml"; my $rspecfile = "$tmpdir/rspec.xml"; my $stdoutfile = "$tmpdir/stitcher.stdout"; my $stderrfile = "$tmpdir/stitcher.stderr"; # Who we are speaking for. my $speaksfor_urn = $slice_credential->owner_urn(); my $failed = 0; my $errcode = GENIRESPONSE_STITCHER_ERROR(); # # The AM API uses a different ssh key structure. # my $users = []; foreach my $user (@{$sshkeys}) { my @tmp = map { $_->{'key'} } @{$user->{'keys'}}; push(@{$users}, {"urn" => $user->{'urn'}, "keys" => [ @tmp ] }); } # # Hey, I think stitcher/omni has as many options as snmpit. Wow! # my $command = "$STITCHER --fileDir $tmpdir --cred $speaksforfile ". "--slicecredfile $slicecredfile --usercredfile $slicecredfile ". ($speaksfor_credential->type() eq "speaksfor" ? "--al2scredfile $al2scredfile " : "") . "--debug ". # We do not want these two files defaulting to user home dir. "--GetVersionCacheName=$tmpdir/get_version_cache.json ". "--AggNickCacheName=$tmpdir/agg_nick_cache ". "--scsURL https://nutshell.maxgigapop.net:8443/geni/xmlrpc ". "--speaksfor $speaksfor_urn -V3 allocate $slice_urn $rspecfile"; return -1 if (! mkdir("$tmpdir", 0755)); print "Using $tmpdir for stitcher\n" if ($debug || $verbose); print "Stitcher command: $command\n" if ($debug || $verbose); goto bad if ($instance->WriteCredentials($tmpdir)); goto bad if (system("/bin/cp -fp /usr/testbed/etc/stitcher/omni_config $tmpdir")); goto bad if (system("/bin/cp -fp /usr/testbed/etc/stitcher/agg_nick_cache ". "$tmpdir/agg_nick_cache")); if (!open(XML, ">$rspecfile")) { print STDERR "Could not open $rspecfile: $!\n"; goto bad; } print XML $rspecstr; close(XML); # # Use a log file for the stitcher output, so we can spew it. # The file in the tmp dir has to exist. # system("/bin/cp /dev/null $stdoutfile"); my $logfile = Logfile->Create($project->GetProjectGroup(), $stdoutfile); if (!defined($logfile)) { print STDERR "Could not create logfile\n"; } else { $logfile->Open(); $logfile->SetPublic(1); $instance->SetLogFile($logfile); } # # Okay, run the stitcher. Only to allocate, we will do the provisions # so that we can pass the ssh keys more easily. # $instance->SetStatus("stitching"); system("cd $tmpdir; $command > $stdoutfile 2> $stderrfile"); if ($?) { $errcode = $? >> 8; $errcode = GENIRESPONSE_STITCHER_ERROR() if ($errcode == 1); if (-s $stderrfile) { my $stuff = `cat $stderrfile`; # # Try to find some useful output. # my $morestuff; if (open(SLOG, $stdoutfile)) { while () { if ($_ =~ /^Allocation of slivers in .* failed:/) { $morestuff = $_; if ($_ =~ /protogeni AM code: (\d*):/) { $errcode = $1; } } elsif (defined($morestuff)) { if ($_ =~ /^Allocate slivers in slice .* failed/) { last; } $morestuff .= $_; } } close(SLOG); $stuff .= "\n\n" . $morestuff if (defined($morestuff)); } $webtask->output($stuff); print $morestuff . "\n\n" if (defined($morestuff)); system("/bin/cat $stderrfile"); } else { $webtask->output("Stitcher failed!"); } # # Even if we fail, want to pick up whatever aggregates the stitcher # decided to use, so that we can ensure all slivers get cleaned up # at termination. # $failed = 1; } if (defined($logfile)) { $logfile->Close(); $logfile->Store(); } # # The stitcher puts the list of aggregates into a file, read that so # we can add any new ones to the instance. Just read the directory and # find it. # my @agglist = (); my $agglistfile; my $al2smanifest; opendir(DIR, $tmpdir); my @files = readdir(DIR); closedir(DIR); foreach my $file (@files) { if ($file =~ /amlist.txt$/) { $agglistfile = "$tmpdir/$file"; } elsif ($file =~ /manifest-rspec-al2s/) { $al2smanifest = "$tmpdir/$file"; } } # # The stitcher will not create this file if it passes straight through # to omni, as it will do if no cross aggregate links. # if (defined($agglistfile)) { if (!open(TXT, $agglistfile)) { print STDERR "Could not open $agglistfile: $!\n"; goto bad; } while () { next if ($_ =~ /^#/); if ($_ =~ /^([^,]*),(.*)$/) { push(@agglist, $2); } } if (!@agglist) { print STDERR "Stitcher did not contact any aggregates!\n"; goto bad; } close(TXT); } foreach my $urn (@agglist) { next if (exists($instance->AggregateHash()->{$urn})); my $authority = GeniAuthority->Lookup($urn); if (!defined($authority)) { $authority = GeniAuthority->CreateFromRegistry("CM", $urn); if (!defined($authority)) { print STDERR "Could not lookup authority: $urn\n"; goto bad; } } my $aggobj = $instance->AddAggregate($urn); $aggobj->_authority($authority); push(@aggregate_list, $aggobj); } goto bad if ($failed); # # Pass this function to ParRun; Provision slivers and request manifests. # my $coderef = sub { my ($ref) = @_; my $aggobj = $ref; $aggobj->Refresh(); my $webtask = $aggobj->webtask(); my $authority = $aggobj->_authority(); my $urn = $authority->urn(); my $errmsg = "Provision failed on $urn"; $webtask->Refresh(); # # AL2S was done with a createsliver, so we know its ready. # The thing we lack is a manifest, and I cannot seem to get # get ListResources to work there, so just read the file. # if ($aggobj->isAL2S()) { if (!defined($al2smanifest) || ! -e $al2smanifest) { print STDERR "No manifest for AL2S\n"; $aggobj->SetStatus("failed"); $webtask->output("No manifest for AL2S"); $webtask->Exited(-1); return -1; } my $manifest_string = ""; if (! open(MAN, $al2smanifest)) { print STDERR "Could not open $al2smanifest\n"; $aggobj->SetStatus("failed"); $webtask->output("Could not open manifest file"); $webtask->Exited(-1); return -1; } while () { $manifest_string .= $_; } close(MAN); $aggobj->SetManifest($manifest_string); $aggobj->SetStatus("provisioned"); print STDERR $manifest_string . "\n"; return 0; } print "Provisioning at $urn\n"; if ($aggobj->Provision(\$errmsg, $users, $alt_certificate->cert(), $alt_certificate->PrivKeyDelimited())) { $aggobj->SetStatus("failed"); $webtask->output($errmsg); $webtask->Exited(-1); print STDERR "Provision failed on $urn: $errmsg\n"; return -1; } $aggobj->SetStatus("provisioned"); print "Requesting manifest from $urn\n"; my $manifest = $aggobj->GetManifest(); if (!defined($manifest)) { $aggobj->SetStatus("failed"); $webtask->output("Could not get manifest from $urn"); $webtask->Exited(-1); return -1; } # Web interface wants this as soon as possible. $aggobj->SetManifest($manifest); print "Forcing correct slice expiration\n"; my $response = $aggobj->Extend($slice->ExpirationGMT(), $this_user); if (!defined($response) || $response->code() != GENIRESPONSE_SUCCESS) { $aggobj->SetStatus("failed"); $webtask->output("Renew failed on $urn"); $webtask->Exited(-1); print STDERR "Renew failed on $urn\n"; return -1; } print "Calling SliverStart at $urn\n"; $response = $aggobj->SliverAction(\$errmsg, "start"); if (! defined($response)) { $aggobj->SetStatus("failed"); $webtask->output($errmsg); $webtask->Exited(-1); print STDERR "SliverStart failed on $urn: $errmsg\n"; return -1; } # This will get overwritten later during the wait. $aggobj->SetPublicURL($response->logurl()) if (defined($response->logurl())); return 0; }; my @return_codes = (); if (ParRun({"maxwaittime" => 99999, "maxchildren" => scalar(@aggregate_list)}, \@return_codes, $coderef, @aggregate_list)) { # # The parent caught a signal. Leave things intact so that we can # kill things cleanly later. # goto bad; } # # Check the exit codes; any failure is a total failure (for now). # foreach my $aggobj (@aggregate_list) { # # Have to refresh the sliver objects since they were updated in a fork. # $aggobj->Refresh(); my $code = shift(@return_codes); if ($code) { $webtask->output($aggobj->webtask()->output()) if (defined($aggobj->webtask()->output())); goto bad; } } # system("/bin/rm -rf $tmpdir") # if (!$debug && defined($tmpdir) && -e $tmpdir); return 0; bad: # # Dump the stitcher output. Ick. # if (defined($tmpdir) && -e "$tmpdir/stitcher.log") { print "------------- Stitcher Log ---------------\n"; system("/bin/cat $tmpdir/stitcher.log"); print "-----------------------------------------\n"; } # system("/bin/rm -rf $tmpdir") # if (!$debug && defined($tmpdir) && -e $tmpdir); return $errcode; } sub fatal($) { my ($mesg) = $_[0]; $slice->Delete() if (defined($slice)); $instance->SetStatus("failed") if (defined($instance)); print STDERR Dumper($xmlparse) if (defined($xmlparse)); print STDERR "*** $0:\n". " $mesg\n"; exit(-1); } sub UserError($) { my($mesg) = $_[0]; AuditAbort() if (!$debug); print $mesg; exit(1); }