nseswap.in 7.12 KB
Newer Older
1 2 3
#!/usr/bin/perl -wT

#
4
# Copyright (c) 2000-2003, 2007 University of Utah and the Flux Group.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
24 25
#

26
use Fcntl ':flock';
27 28 29
use English;
use Getopt::Std;
use Socket;
30
use IO::Handle;     # thousands of lines just for autoflush :-(
31 32 33 34 35 36 37 38 39 40 41
    
#
# In an experiment with simulated nodes, if some instance of the
# simulator (nse) on a physical node is unable to track real time, the
# node requests that the experiment be re-mapped and run with a more
# conservative co-location factor. This script implements the above
# functionality.
#
sub usage()
{
    print STDOUT
42
	"Usage: nseswap [-v] pid eid <eventargs>\n";
43 44 45
 
    exit(-1);
}
46
my  $optlist = "v";
47 48 49 50 51 52 53 54

#
# Configure variables
#
my $TB       = "@prefix@";
my $TBOPS    = "@TBOPSEMAIL@";
my $CONTROL  = "@USERNODE@";
my $TESTMODE = @TESTMODE@;
55
my $TBLOGS = "@TBLOGSEMAIL@";
56 57

# Locals
58
sub swapout_on_max_retries();
59 60
my $pid;
my $eid;
61 62
my $eventargs;
my $simhost = "";
63 64
my $max_retries = 100;
my $verbose = 1;
65 66 67 68 69 70 71

sub printdb ($)
{
    if ($verbose) {
	print $_[0];
    }
}
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89

#
# Turn off line buffering on output
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "$TB/bin:$TB/sbin:/bin:/usr/bin:/sbin:/usr/sbin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
90
use User;
91

92 93 94 95 96 97 98 99 100 101 102 103 104
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"v"})) {
    $verbose = 1;
}

if (@ARGV != 3) {
105 106 107 108 109
    usage();
}

$pid   = $ARGV[0];
$eid   = $ARGV[1];
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
$eventargs  = $ARGV[2];

#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w.]+)$/) {
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
if ($eid =~ /^([-\@\w.]+)$/) {
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}

128 129 130 131 132 133 134 135
#
# Verify user and get his DB uid and other info for later.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
}

136 137 138 139 140 141
my $exptidx;
if (!TBExptIDX($pid, $eid, \$exptidx)) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid!");
}

142 143 144 145 146 147
my $argpat  = q(SIMHOST=([-\w]+));
if ( $eventargs =~ /$argpat/ ) {
    $simhost = $1;
}
my $lockfile    = "/var/tmp/$pid-$eid-nseswap-lockfile";
	       
148
TBDebugTimeStampsOn();
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
my $query_result =
    DBQueryFatal("select node_id from reserved where pid='$pid' and ".
		 "eid='$eid' and vname='$simhost'");

if (! $query_result->numrows) {
    # print warning in some log
    print STDERR "*** $0: \"$simhost\" is not in the reserved table\n";
    exit(1);
}
my ($node_id) = $query_result->fetchrow_array();

# Update the DB with info from the NSESWAP event
# and be done with it
DBQueryFatal("lock tables reserved write");
DBQueryFatal("update reserved set simhost_violation='1' ".
            "where node_id='$node_id' ".
	    "and pid='$pid' and eid='$eid'");
printdb "node:$node_id simhost=$simhost simhost_violation set\n";
    
DBQueryFatal("unlock tables");

$query_result =
         DBQueryFatal("select vname from v2pmap where ".
		      "pid='$pid' and eid='$eid' and node_id='$node_id'");
173

174 175 176 177 178 179 180 181 182 183 184 185
while( ($vname) = $query_result->fetchrow_array() ) {

    my $query2_result = DBQueryFatal("select nodeweight from ".
	                             "virt_simnode_attributes where ".
				     "pid='$pid' and eid='$eid' and ".
				     "vname='$vname'");
    my $nodeweight = 2;				 
    if ( $query2_result->numrows ) {
	($nodeweight) = $query2_result->fetchrow_array();
	$nodeweight *= 2;
    }
    DBQueryFatal("replace into virt_simnode_attributes ".
186 187
	         "(exptidx,pid,eid,vname,nodeweight) values ".
		 "('$exptidx','$pid','$eid','$vname','$nodeweight')");
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
}

#
# We need to serialize this script since multiple pnodes
# could be reporting an error where nse can't keep up.
# The first pnode that caused this will run and wait for
# a little while to see if there are other pnodes reporting
# errors. Eventually, the first nseswap will cause re-swapin
# of the experiment. The subsequent nseswap scripts will just
# update the DB and be done with it
#
umask(002);
open(LOCK, ">>$lockfile") || fatal("Couldn't open $lockfile\n");
if (flock(LOCK, LOCK_EX|LOCK_NB)) {

    swapout_on_max_retries();

205 206 207
    my $qr = DBQueryFatal("select sim_reswap_count from experiments ".
			  "where eid='$eid' and pid='$pid'");
    my ($sim_reswap_count) = $qr->fetchrow_array();
208 209
    # We wait for a few seconds to let any other pnodes that may not
    # be able to track real-time
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
    if ($sim_reswap_count == 0) {
	sleep(60);
    } else {
	sleep(300);
    }
    if( $verbose ) {
	LOCK->autoflush(1);
	print LOCK "################ Reswap Count:$sim_reswap_count " . 
	           "################\n";
	$qr = DBQueryFatal("select vname,node_id,simhost_violation from reserved ".
	                   "where pid='$pid' and eid='$eid' " .
	                   "and erole='simhost' order by vname");
	while( my ($vname,$node_id,$violation) = $qr->fetchrow_array()) {
	    my $qr2 = DBQueryFatal("select vname from v2pmap " .
		                   "where pid='$pid' and eid='$eid' " .
	                           "and node_id='$node_id' order by vname");
	    print LOCK "vname:$vname node_id:$node_id numvnodes:" . $qr2->numrows() .
	               " " . ($violation ? "violation; " : " ; ") ;
	    while( my ($vnode) = $qr2->fetchrow_array()) {
		print LOCK "$vnode ";
	    }
	    print LOCK "\n";
	}
	print LOCK "################################################ \n";
    }
235 236 237 238 239

    DBQueryFatal("update experiments set sim_reswap_count=sim_reswap_count+1 ".
	         "where eid='$eid' and pid='$pid'");

    # do a swap modify 
240
    system("swapexp -w -e -r -s modify $pid $eid");
241 242 243 244 245 246
}

#
# Close the lock file. Exiting releases it, but might as well.
#
close(LOCK);
247
exit(0);
248 249 250 251 252 253 254 255 256 257

sub swapout_on_max_retries() {

    my $query_result =
          DBQueryFatal("select sim_reswap_count from experiments where eid='$eid' ".
	               "and pid='$pid'");

    my ($sim_reswap_count) = $query_result->fetchrow_array();

    if ($sim_reswap_count >= $max_retries) {
258 259
	my $user_name  = $this_user->name();
	my $user_email = $this_user->email();
260

261 262 263
        my $message =
            "Experiment $pid/$eid reached max retries:$max_retries ".
	    "trying to re-map\n".
264 265 266 267 268 269 270 271 272 273 274 275 276 277
            "simulated nodes. Forcibly swapping out the experiment\n";

    	SENDMAIL("$user_name <$user_email>",
	  	 "Experiment $pid/$eid Swapping out",
		 $message,
		 $TBOPS,
		 "Bcc: $TBLOGS");

    	system("swapexp -f -s out $pid $eid");
	sleep(10);
    	exit(2);
    }
    return;
}