libplab.py.in 39.5 KB
Newer Older
1
# -*- python -*-
Kirk Webb's avatar
Kirk Webb committed
2
3
4
5
6
7
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#

8
"""
Kirk Webb's avatar
Kirk Webb committed
9
10
11
12
13
14
15
16
17
Library for interfacing with Plab.  This abstracts out the concepts of
Plab central, slices, and nodes.  All data (except static things like
certificates) is kept in the Emulab DB.  Unlike the regular dslice
svm, this one supports dynamically changing which nodes are in a
slice.

This requires an already obtained dslice certficate and key.  By
default it expects to find these in the @prefix@/etc/plab/
subdirectory.
18
19
20
"""

import sys
21
22
23
sys.path.append("@prefix@/lib")

import os, time
24
import string
Kirk Webb's avatar
   
Kirk Webb committed
25
import traceback
Kirk Webb's avatar
   
Kirk Webb committed
26
import socket
Kirk Webb's avatar
Kirk Webb committed
27
28
29
import httplib
import xml.parsers.expat
import re
Kirk Webb's avatar
   
Kirk Webb committed
30

Kirk Webb's avatar
   
Kirk Webb committed
31
from popen2 import Popen4
Kirk Webb's avatar
   
Kirk Webb committed
32
from warnings import warn
33

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#
# Testbed and DB access libs
#
from libtestbed import *
from libdb import *

#
# Plab modules to import
#
from mod_PLC import mod_PLC
from mod_dslice import mod_dslice

agents = {'PLC'    : mod_PLC,
          'dslice' : mod_dslice}

Kirk Webb's avatar
Kirk Webb committed
49
50
51
52
53
54
#
# output control vars
#
verbose = 0
debug = 0

55
56
57
#
# Constants
#
58
DEF_AGENT = "PLC";
59

60
RENEW_TIME = 2*24*60*60  # Renew two days before lease expires
Kirk Webb's avatar
   
Kirk Webb committed
61
62
63

RENEW_TIMEOUT = 1*60     # give the node manager a minute to respond to renew
FREE_TIMEOUT  = 1*60     # give the node manager a minute to respond to free
Kirk Webb's avatar
Kirk Webb committed
64
NODEPROBEINT  = 10
65

66
67
68
TBOPS = "@TBOPSEMAIL@".replace("\\","")
MAILTAG = "@THISHOMEBASE@"

69
RESERVED_PID = "emulab-ops"
70
RESERVED_EID = "hwdown"       # start life in hwdown
71
72
MONITOR_PID  = "emulab-ops"
MONITOR_EID  = "plab-monitor"
73
74

MAGIC_INET2_GATEWAYS = ("205.124.237.10", )
Kirk Webb's avatar
Kirk Webb committed
75
76
MAGIC_INET_GATEWAYS = ("205.124.249.123", "205.124.249.113",
                       "205.124.249.121", "205.124.249.115")
77
LOCAL_PLAB_DOMAIN = ".flux.utah.edu"
78
LOCAL_PLAB_LINKTYPE = "pcplabinet2"
79
# right now these are the only 2.0 machines running the new slice interface:
80
81
82
83
84
85
#ALLOWED_NODES = ('198.78.49.59', '18.31.0.213', '169.229.50.85',
#                 '169.229.50.89', '128.112.152.124', '12.46.129.23',
#                 '64.41.221.196', '132.239.17.226', '128.223.6.113',
#                 '128.208.4.199', '128.2.198.199', '155.98.35.2',
#                 '155.98.35.3')
ALLOWED_NODES = ()
86
NUMVNODES = 20
87

Kirk Webb's avatar
   
Kirk Webb committed
88
PLABNODE = "@prefix@/sbin/plabnode"
89
SSH = "@prefix@/bin/sshtb"
90
91
NAMED_SETUP = "@prefix@/sbin/named_setup"

Kirk Webb's avatar
Kirk Webb committed
92
93
94
95
ROOTBALL_URL = "http://localhost:1492/" # ensure this ends in a slash

DEF_PLAB_URL = "www.planet-lab.org"
DEF_SITE_XML = "/xml/sites.xml"
96

97
DEF_ROOTBALL_NAME = "@PLAB_ROOTBALL@"
98
SLICEPREFIX = "@PLAB_SLICEPREFIX@"
Kirk Webb's avatar
Kirk Webb committed
99
100
101
102
NODEPREFIX  = "plab"

BADSITECHARS = re.compile(r"\W+")
PLABBASEPRIO = 20000
103

104
105
106
107
108
#
# var to track failed renewals
#
failedrenew = []

109
110
111
112
113
#
# Disable line buffering
#
sys.stdout = os.fdopen(sys.stdout.fileno(), sys.stdout.mode, 0)

Kirk Webb's avatar
   
Kirk Webb committed
114
115
116
117
118
#
# Ensure SIGPIPE doesn't bite us:
#
signal.signal(signal.SIGPIPE, signal.SIG_IGN)

119

120
121
122
#
# Plab abstraction
#
Kirk Webb's avatar
Kirk Webb committed
123

Kirk Webb's avatar
Kirk Webb committed
124
125
126
#
# Class responsible for parsing planetlab sites file
#
Kirk Webb's avatar
Kirk Webb committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
class siteParser:

    def __init__(self):
        self.parser = xml.parsers.expat.ParserCreate()
        self.parser.StartElementHandler = self.__site_start_elt
        self.parser.EndElementHandler = self.__site_end_elt
        self.__hosts = []
        self.__sitename = ""
        
    def getPlabNodeInfo(self):
                
        conn = httplib.HTTPSConnection(DEF_PLAB_URL)
        conn.request("GET", DEF_SITE_XML)
        res = conn.getresponse()
        if res.status != 200:
            raise RuntimeError, "HTTP Error getting site list:\n" \
                  "Code: %d Reason: %s" % \
                  (res.status, res.reason)
        try:
            self.parser.ParseFile(res)
            pass
        except xml.parsers.expat.ExpatError, e:
            print "Error parsing XML file, lineno: %d, offset: %d:\n%s" % \
                  (e.lineno, e.offset, xml.parsers.expat.ErrorString(e.code))
            raise

        return self.__hosts

    def __site_start_elt(self, name, attrs):
        
        if name == "PLANETLAB_SITES":
            pass
        
        elif name == "SITE":
            self.__sitename = attrs['SHORT_SITE_NAME']
            pass
        
        elif name == "HOST":
            self.__hosts.append({'HNAME' : attrs['NAME'],
                                 'IP'    : attrs['IP'],
                                 'NODEID': attrs['NODE_ID'],
                                 'SITE'  : self.__sitename})
            pass
        
        else:
            print "Unknown element in site file: %s: %s" % (name, attrs)
            pass
        
        return

    def __site_end_elt(self, name):
        
        if name == "SITE":
            self.__sitename = "Unknown"
            pass
        return

        
185
class Plab:
Kirk Webb's avatar
Kirk Webb committed
186
    def __init__(self, agent=None):
187
188
189
        if not agent:
            self.agent = agents[DEF_AGENT]()
            pass
Kirk Webb's avatar
Kirk Webb committed
190
        if debug:
191
192
193
            print "Using module: %s" % self.agent.modname
            pass
        pass
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

    def createSlice(self, pid, eid):
        """
        Slice factory function
        """
        slice = Slice(self, pid, eid)
        slice._create()
        return slice

    def loadSlice(self, pid, eid):
        """
        Slice factory function
        """
        slice = Slice(self, pid, eid)
        slice._load()
        return slice

Kirk Webb's avatar
Kirk Webb committed
211
    def updateNodeEntries(self, ignorenew = False):
212
        """
Kirk Webb's avatar
Kirk Webb committed
213
        Finds out which Plab nodes are available, and
214
215
216
217
218
        update the DB accordingly.  If ignorenew is True, this will only
        make sure that the data in the DB is correct, and not complete.
        If ignorenew is False (the default), this will do a complete
        update of the DB.  However, this can take some time, as
        information about new nodes (such as link type) must be
Kirk Webb's avatar
Kirk Webb committed
219
        discovered.
220
221
222
223
224
225

        Note that this seemingly innocent funciton actually does a lot of
        magic.  This is the main/only way that Plab nodes get into the
        nodes DB, and this list is updated dynamically.  It also gathers
        static data about new nodes.

Kirk Webb's avatar
Kirk Webb committed
226
227
228
229
        Deleting nodes that are no longer available may not be the
        best approach due to the overhead of adding new nodes (ie, if
        the node disappears for a while, then comes back). Therefore,
        this is not currently done.
230
        """
Kirk Webb's avatar
Kirk Webb committed
231
232
233
        
        print "Getting available Plab nodes ..."

234
        avail = []
235
        try:
Kirk Webb's avatar
Kirk Webb committed
236
237
238
            parser = siteParser()
            avail = parser.getPlabNodeInfo()
            pass
239
        # XXX: rewrite to use more elegant exception info gathering.
240
241
        except:
            extype, exval, extrace = sys.exc_info()
242
            print "Error talking to agent: %s: %s" % (extype, exval)
Kirk Webb's avatar
Kirk Webb committed
243
            if debug:
244
245
246
                print extrace
            print "Going back to sleep until next scheduled poll"
            return
Kirk Webb's avatar
   
Kirk Webb committed
247

Kirk Webb's avatar
Kirk Webb committed
248
        if debug:
249
250
            print "Got advertisement list:"
            print avail
Kirk Webb's avatar
Kirk Webb committed
251
            pass
Kirk Webb's avatar
   
Kirk Webb committed
252

253
254
        # Enforce allowed nodes limitation, if any.
        if len(ALLOWED_NODES):
255
            ravail = []
Kirk Webb's avatar
Kirk Webb committed
256
257
258
259
260
            for nodeent in avail:
                if nodeent['IP'] in ALLOWED_NODES:
                    ravail.append(nodeent)
                    pass
                pass
261
262
            print "Advertisements in allowed nodes list:\n%s" % ravail
            avail = ravail
Kirk Webb's avatar
Kirk Webb committed
263
            pass
264

265
        # Get node info we already have.
266
        known = self.__getKnownPnodes()
Kirk Webb's avatar
Kirk Webb committed
267
        if debug:
268
269
            print "Got known pnodes:"
            print known
Kirk Webb's avatar
Kirk Webb committed
270
            pass
271

Kirk Webb's avatar
Kirk Webb committed
272
        # Create list of nodes to add or update
Kirk Webb's avatar
Kirk Webb committed
273
274
        toadd = []  # List of node entries to add to DB
        for nodeent in avail:
Kirk Webb's avatar
Kirk Webb committed
275
276
            # Replace sequences of bad chars in the site entity with
            # a single "-".
Kirk Webb's avatar
Kirk Webb committed
277
            nodeent['SITE'] = BADSITECHARS.sub("-", nodeent['SITE'])
Kirk Webb's avatar
Kirk Webb committed
278
            nid = nodeent['NODEID']
Kirk Webb's avatar
Kirk Webb committed
279
280
            # If we don't know about this node, then add it and mark
            # that we are _not_ doing an update.
Kirk Webb's avatar
Kirk Webb committed
281
282
283
            if not known.has_key(nid):
                toadd.append((nodeent, False))
                pass
Kirk Webb's avatar
Kirk Webb committed
284
285
286
            # If we do know abou this node, check to see if any of its
            # attributes have changed, and if so, add it to the list and
            # mark if for update.
Kirk Webb's avatar
Kirk Webb committed
287
288
289
290
291
292
293
294
295
            else:
                kent = known[nid]
                if kent['HNAME'] != nodeent['HNAME'] or \
                   kent['IP']    != nodeent['IP'] or \
                   kent['SITE']  != nodeent['SITE']:
                    toadd.append((nodeent, True))
                    pass
                pass
            pass
Kirk Webb's avatar
Kirk Webb committed
296
297

        # Process the add/update list - add to, or update in the DB.
298
        if len(toadd):
Kirk Webb's avatar
Kirk Webb committed
299
            # Are we ignoring new entries?
300
            if ignorenew:
Kirk Webb's avatar
Kirk Webb committed
301
                if verbose:
302
                    print "%d new Plab nodes, but ignored for now" % len(toadd)
Kirk Webb's avatar
Kirk Webb committed
303
304
                    pass
                pass
Kirk Webb's avatar
Kirk Webb committed
305
            # If not ignoring, do the addition/update.
306
            else:
Kirk Webb's avatar
Kirk Webb committed
307
                addstr = ""
Kirk Webb's avatar
Kirk Webb committed
308
309
310
                updstr = ""
                print "There are %d new/changed Plab nodes." % len(toadd)
                for nodeent, update in toadd:
Kirk Webb's avatar
Kirk Webb committed
311
                    # Get the linktype here so we can report it in email.
Kirk Webb's avatar
Kirk Webb committed
312
                    self.__findLinkType(nodeent)
Kirk Webb's avatar
Kirk Webb committed
313
                    if debug:
Kirk Webb's avatar
Kirk Webb committed
314
315
                        print "Found linktype %s for node %s" % \
                              (nodeent['LINKTYPE'], nodeent['IP'])
Kirk Webb's avatar
Kirk Webb committed
316
317
                        pass
                    # Add/update the node in the DB.
Kirk Webb's avatar
Kirk Webb committed
318
                    self.__addNode(nodeent, update)
Kirk Webb's avatar
Kirk Webb committed
319
                    # Rest of block adds a line for the add/update messages.
Kirk Webb's avatar
Kirk Webb committed
320
321
322
323
324
325
326
327
328
329
330
331
332
                    nodestr = "%s\t\t%s\t\t%s\t\t%s\t\t%s\n" % \
                              (nodeent['NODEID'],
                               nodeent['IP'],
                               nodeent['HNAME'],
                               nodeent['SITE'],
                               nodeent['LINKTYPE'])
                    if update:
                        updstr += nodestr
                        pass
                    else:
                        addstr += nodestr
                        pass
                    pass
333

Kirk Webb's avatar
Kirk Webb committed
334
                # We need to update DNS since we've added hosts..
335
336
                print "Forcing a named map update ..."
                os.spawnl(os.P_WAIT, NAMED_SETUP, NAMED_SETUP)
Kirk Webb's avatar
Kirk Webb committed
337
                # Now announce that we've added/updated nodes.
Kirk Webb's avatar
Kirk Webb committed
338
339
                SENDMAIL(TBOPS,
                         "Plab nodes have been added/updated in the DB.",
Kirk Webb's avatar
Kirk Webb committed
340
                         "The following plab nodes have been added to the DB:\n"
Kirk Webb's avatar
Kirk Webb committed
341
342
343
344
345
346
347
                         "NodeID\t\tHostname\t\tIP\t\tSite\t\tLinktype\n\n"
                         "%s\n\n"
                         "The following plab nodes have been updated in the DB:\n"
                         "NodeID\t\tHostname\t\tIP\t\tSite\n\n"
                         "%s\n\n" % \
                         (addstr, updstr),
                         TBOPS)
348
                print "Done adding new Plab nodes."
Kirk Webb's avatar
Kirk Webb committed
349
350
351
                pass
            pass
        return
352

353
354
355
356
357
    def __getKnownPnodes(self):
        """
        getFree helper function.  Returns a dict of IP:node_id pairs
        for the Plab nodes that currently exist in the DB.
        """
Kirk Webb's avatar
Kirk Webb committed
358
359
        res = DBQueryFatal("select i.node_id, i.IP, w.hostname, w.site"
                           " from interfaces as i"
360
361
                           " left join nodes as np on"
                           "  np.node_id = i.node_id"
Kirk Webb's avatar
Kirk Webb committed
362
363
                           " left join widearea_nodeinfo as w on"
                           "  i.node_id = w.node_id"
364
365
                           " where np.type = 'pcplabphys'")
        ret = {}
Kirk Webb's avatar
Kirk Webb committed
366
367
368
369
370
371
        for nodeid, ip, hname, site in res:
            nid = nodeid.replace(NODEPREFIX, "")
            ret[nid] = {'HNAME' : hname,
                        'IP'    : ip,
                        'SITE'  : site}
            pass
372
        return ret
Kirk Webb's avatar
   
Kirk Webb committed
373
        
Kirk Webb's avatar
Kirk Webb committed
374
    def __findLinkType(self, nodeent):
375
376
377
378
379
380
381
        """
        getFree helper function.  Figures out the link type of the given
        host.  This first performs a traceroute and checks for the U of
        U's I2 gateway to classify Internet2 hosts.  If this test fails,
        it checks if the hostname is international.  If this test fails,
        this simply specifies an inet link type.

Kirk Webb's avatar
Kirk Webb committed
382
        This can't detect DSL links..
383
        """
384
        # Is host international (or flux/emulab local)?
385
        from socket import gethostbyaddr, getfqdn, herror
Kirk Webb's avatar
Kirk Webb committed
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
        
        if not nodeent.has_key('HNAME'):
            try:
                (hname, ) = gethostbyaddr(ip)
                nodeent['HNAME'] = getfqdn(hname)
                pass
            except herror:
                nodeent['HNAME'] = nodeent['IP']
                print "WARNING: Failed to get hostname for %s" % nodeent['IP']
                pass
            pass
        
        tld = nodeent['HNAME'].split(".")[-1].lower()
        if not tld in ("edu", "org", "net", "com", "gov", "us", "ca"):
            nodeent['LINKTYPE'] = "pcplabintl"
            return
        
        # Is it us?
        if nodeent['HNAME'].endswith(LOCAL_PLAB_DOMAIN):
            nodeent['LINKTYPE'] = LOCAL_PLAB_LINKTYPE
            return
        
408
        # Is host on I2?
Kirk Webb's avatar
Kirk Webb committed
409
        traceroute = os.popen("traceroute -nm 10 -q 1 %s" % nodeent['IP'])
410
411
412
413
414
        trace = traceroute.read()
        traceroute.close()

        for gw in MAGIC_INET2_GATEWAYS:
            if trace.find(gw) != -1:
Kirk Webb's avatar
Kirk Webb committed
415
416
                nodeent['LINKTYPE'] = "pcplabinet2"
                return
417

418
419
420
421
        for gw in MAGIC_INET_GATEWAYS:
            if trace.find(gw) != -1:
                break
        else:
Kirk Webb's avatar
Kirk Webb committed
422
            print "WARNING: Unknown gateway for host %s" % nodeent['IP']
423

424
        # Must be plain 'ole Internet
Kirk Webb's avatar
Kirk Webb committed
425
426
        nodeent['LINKTYPE'] = "pcplabinet"
        return
427

Kirk Webb's avatar
Kirk Webb committed
428
    def __addNode(self, nodeent, update = False):
429
430
431
432
433
434
435
        """
        getFree helper function.  Adds a new Plab pnode and associated
        vnodes to the DB.  linktype should be one of (inet2, inet, intl,
        dsl).

        XXX This duplicates a lot of the functionality of newwanode.
        Note that, very unlike newwanode, the node is initially up,
436
437
438
        since it had to be up to be added in the first place.  This also
        adds some additional fields that newwanode doesn't, and takes
        advantage of the fact that the Plab nodes may be added in bulk.
439
        """
Kirk Webb's avatar
   
Kirk Webb committed
440
441
        # block out common termination signals while adding a node
        osigs = disable_sigs(TERMSIGS)
442
        defosid, controliface = self.__getNodetypeInfo()
Kirk Webb's avatar
Kirk Webb committed
443
        nodeid = NODEPREFIX + nodeent['NODEID']
Kirk Webb's avatar
Kirk Webb committed
444
        priority = PLABBASEPRIO + int(nodeent['NODEID'])
Kirk Webb's avatar
Kirk Webb committed
445
        hostonly = nodeent['HNAME'].replace(".", "-")
446
        
Kirk Webb's avatar
Kirk Webb committed
447
448
        print "Creating pnode %s as %s, priority %d." % \
              (nodeent['IP'], nodeid, priority)
449

Kirk Webb's avatar
   
Kirk Webb committed
450
        try:
Kirk Webb's avatar
Kirk Webb committed
451
            DBQueryFatal("replace into nodes"
452
                         " (node_id, type, phys_nodeid, role, priority,"
Kirk Webb's avatar
   
Kirk Webb committed
453
                         "  op_mode, def_boot_osid,"
454
                         "  allocstate, allocstate_timestamp,"
Kirk Webb's avatar
   
Kirk Webb committed
455
                         "  eventstate, state_timestamp)"
456
                         " values (%s, %s, %s, %s, %s,"
Kirk Webb's avatar
   
Kirk Webb committed
457
458
459
460
                         "  %s, %s, %s, now(), %s, now())",
                         (nodeid, 'pcplabphys', nodeid,
                          'testnode', priority*100,
                          'ALWAYSUP', defosid,
461
                          'FREE_CLEAN',
Kirk Webb's avatar
   
Kirk Webb committed
462
                          'ISUP'))
463

Kirk Webb's avatar
Kirk Webb committed
464
            DBQueryFatal("replace into widearea_nodeinfo"
465
466
                         " (node_id, contact_uid, hostname, site)"
                         " values (%s, %s, %s, %s)",
Kirk Webb's avatar
Kirk Webb committed
467
                         (nodeid, 'bnc', nodeent['HNAME'], nodeent['SITE']))
468

Kirk Webb's avatar
Kirk Webb committed
469
            DBQueryFatal("replace into interfaces"
Kirk Webb's avatar
   
Kirk Webb committed
470
471
472
                         " (node_id, card, port, IP, interface_type,"
                         " iface, role)"
                         " values (%s, %s, %s, %s, %s, %s, %s)",
Kirk Webb's avatar
Kirk Webb committed
473
474
                         (nodeid, 0, 1, nodeent['IP'], 'fxp',
                          controliface, 'ctrl'))
Kirk Webb's avatar
   
Kirk Webb committed
475

Kirk Webb's avatar
Kirk Webb committed
476
            DBQueryFatal("replace into reserved"
Kirk Webb's avatar
   
Kirk Webb committed
477
478
                         " (node_id, pid, eid, rsrv_time, vname)"
                         " values (%s, %s, %s, now(), %s)",
479
                         (nodeid, RESERVED_PID, RESERVED_EID, hostonly))
Kirk Webb's avatar
   
Kirk Webb committed
480

Kirk Webb's avatar
Kirk Webb committed
481
            DBQueryFatal("replace into node_auxtypes"
Kirk Webb's avatar
   
Kirk Webb committed
482
483
                         " (node_id, type, count)"
                         " values (%s, %s, %s)",
Kirk Webb's avatar
Kirk Webb committed
484
                         (nodeid, nodeent['LINKTYPE'], 1))
Kirk Webb's avatar
   
Kirk Webb committed
485
            
Kirk Webb's avatar
Kirk Webb committed
486
            DBQueryFatal("replace into node_auxtypes"
487
488
489
490
                         " (node_id, type, count)"
                         " values (%s, %s, %s)",
                         (nodeid, 'pcplab', 1))
            
Kirk Webb's avatar
Kirk Webb committed
491
            DBQueryFatal("replace into node_status"
492
493
                         " (node_id, status, status_timestamp)"
                         " values (%s, %s, now())",
Kirk Webb's avatar
Kirk Webb committed
494
                         (nodeid, 'down'))
Kirk Webb's avatar
   
Kirk Webb committed
495

Kirk Webb's avatar
Kirk Webb committed
496
497
498
499
500
            # Don't do any vnode additions if we are just updating.
            if update:
                enable_sigs(osigs)
                return

Kirk Webb's avatar
   
Kirk Webb committed
501
502
503
504
505
506
            vnodetype = "pcplab"
            vnodeid = ""
            for n in range(NUMVNODES):
                vprio = (priority * 100) + (n+1)
                sshdport = 38000+(n+1)
                vnodeid = "%s-%d" % (nodeid, n+1)
Kirk Webb's avatar
Kirk Webb committed
507
                if verbose:
Kirk Webb's avatar
   
Kirk Webb committed
508
                    print "Creating vnode %s, priority %d" % (vnodeid, vprio)
Kirk Webb's avatar
Kirk Webb committed
509
                    pass
Kirk Webb's avatar
   
Kirk Webb committed
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
                    
                DBQueryFatal("insert into nodes"
                             " (node_id, type, phys_nodeid, role, priority,"
                             "  op_mode, def_boot_osid, update_accounts,"
                             "  allocstate, allocstate_timestamp,"
                             "  eventstate, state_timestamp, sshdport)"
                             " values (%s, %s, %s, %s, %s,"
                             "  %s, %s, %s, %s, now(), %s, now(), %s)",
                             (vnodeid, vnodetype, nodeid, 'virtnode', vprio,
                              'PCVM', defosid, 1,
                              'FREE_CLEAN',
                              'SHUTDOWN', sshdport))
                
                DBQueryFatal("insert into node_status"
                             " (node_id, status, status_timestamp)"
                             " values (%s, %s, now())",
                             (vnodeid, 'up'))
                
Kirk Webb's avatar
Kirk Webb committed
528
529
530
                pass
                
            # Put the last vnode created into the special monitoring expt.
Kirk Webb's avatar
   
Kirk Webb committed
531
532
533
534
            DBQueryFatal("insert into reserved"
                         " (node_id, pid, eid, rsrv_time, vname)"
                         " values (%s, %s, %s, now(), %s)",
                         (vnodeid, MONITOR_PID, MONITOR_EID, vnodeid))
Kirk Webb's avatar
Kirk Webb committed
535
536
            pass
        
Kirk Webb's avatar
   
Kirk Webb committed
537
538
539
540
541
542
        except:
            print "Error adding PLAB node to DB: someone needs to clean up!"
            tbmsg = "".join(traceback.format_exception(*sys.exc_info()))
            SENDMAIL(TBOPS, "Error adding new plab node to DB: %s\n" %
                     nodeid, "Some operation failed while trying to add a"
                     " newly discovered plab node to the DB:\n %s"
543
                     "\n Please clean up!\n" % tbmsg, TBOPS)
Kirk Webb's avatar
   
Kirk Webb committed
544
545
            enable_sigs(osigs)
            raise
546

Kirk Webb's avatar
   
Kirk Webb committed
547
548
        # last but not least, unblock signals
        enable_sigs(osigs)
Kirk Webb's avatar
Kirk Webb committed
549
        return
550

551
552
    def __getNodetypeInfo(self):
        """
553
554
        addNode helper function.  Returns a (defosid, controliface) 
        tuple for the Plab pnode type.  Caches the result since
555
        it doesn't change.
556
557
        """
        if not hasattr(self, "__getNodetypeInfoCache"):
Kirk Webb's avatar
Kirk Webb committed
558
            if debug:
559
                print "Getting node type info"
Kirk Webb's avatar
Kirk Webb committed
560
                pass
561
            res = DBQueryFatal("select osid, control_iface"
562
                               " from node_types"
563
564
565
                               " where type = 'pcplabphys'")
            assert (len(res) == 1), "Failed to get node type info"
            (self.__getNodetypeInfoCache, ) = res
Kirk Webb's avatar
Kirk Webb committed
566
567
            pass
        
568
569
        return self.__getNodetypeInfoCache

Kirk Webb's avatar
Kirk Webb committed
570
    # XXX: deprecated - should probably just be removed
571
572
573
574
575
    def __nextFreeNodeid(self):
        """
        addNode helper function.  Returns a (nodeid, priority) tuple of
        the next free nodeid and priority for Plab nodes.
        """
Kirk Webb's avatar
Kirk Webb committed
576
        if debug:
577
578
579
580
581
582
583
584
585
586
            print "Getting next free nodeid"
        DBQueryFatal("lock tables nextfreenode write")
        try:
            res = DBQueryFatal("select nextid, nextpri from nextfreenode"
                               " where nodetype = 'pcplab'")
            assert (len(res) == 1), "Unable to find next free nodeid"
            DBQueryFatal("update nextfreenode"
                         " set nextid = nextid + 1, nextpri = nextpri + 1"
                         " where nodetype = 'pcplab'")
            ((nodeid, priority), ) = res
Kirk Webb's avatar
Kirk Webb committed
587
            pass
588
589
        finally:
            DBQueryFatal("unlock tables")
Kirk Webb's avatar
Kirk Webb committed
590
591
            pass
        
592
593
        return nodeid, priority

Kirk Webb's avatar
Kirk Webb committed
594
595
    # XXX: might want to just call into slice.renew and let module specific
    # code deal with individual node renew if it tracks at that level.
596
    def renew(self):
597
598
599
        """
        Renews all of the Plab leases that are going to expire soon.
        """
Kirk Webb's avatar
Kirk Webb committed
600
        
601
        print "Renewing Plab leases ..."
602
603
        # Ugh, MySQL doesn't know UTC until v4.1.1, and unix_timestamp()
        # returns the local time
604
605
        now = int(time.mktime(time.gmtime()))
        endtime = now + RENEW_TIME
Kirk Webb's avatar
Kirk Webb committed
606
607
608
609
610
611
612
613
614
615
616
        res = DBQueryFatal("select NULL, pid, eid,"
                            " unix_timestamp(leaseend) from plab_slices"
                            " where leaseend != NULL and"
                            "  %s > unix_timestamp(leaseend)",
                            (endtime, ))
        res += DBQueryFatal("select node_id, pid, eid,"
                            " unix_timestamp(leaseend) from plab_slice_nodes"
                            " where leaseend != NULL and"
                            "  %s > unix_timestamp(leaseend)",
                            (endtime, ))
        
617
        loadedSlices = {}
Kirk Webb's avatar
Kirk Webb committed
618
        global failedrenew # XXX
619
620
        newfail = []
        failsoon = []
Kirk Webb's avatar
Kirk Webb committed
621
622
        ret = 0

623
624
        for entry in res:
            nodeid, pid, eid, tstamp = entry
Kirk Webb's avatar
Kirk Webb committed
625
626
627
628
629
630
631
632
633
634
635
636

            if tstamp <= now:
                if nodeid:
                    print "WARNING: Node lease for %s (%s/%s) has expired!" % \
                          (nodeid, pid, eid)
                    pass
                else:
                    print "WARNING: Slice lease for %s/%s has expired!" % \
                          (pid, eid)
                    pass
                continue

637
638
            try:
                slice = loadedSlices[(pid, eid)]
Kirk Webb's avatar
Kirk Webb committed
639
                pass
640
641
642
            except KeyError:
                slice = self.loadSlice(pid, eid)
                loadedSlices[(pid, eid)] = slice
Kirk Webb's avatar
Kirk Webb committed
643
                pass
644
            
Kirk Webb's avatar
Kirk Webb committed
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
            if nodeid:
                node = slice.loadNode(nodeid)
                ret = node.renew()
                pass
            else:
                try:
                    res, slicemeta, leaselen = \
                         self.agent.renewSlice(slice.slicename)
                    ret = 1
                    pass
                except:
                    ret = 0

            if not ret:
                if nodeid:
                    print "Failed to renew lease for %s (%s/%s)" % \
                          (nodeid, pid, eid)
                    pass
                else:
                    print "Failed to renew lease for %s/%s" % \
                          (pid, eid)
                    pass
667
668
                if entry not in failedrenew:
                    newfail.append(entry)
Kirk Webb's avatar
Kirk Webb committed
669
                    pass
Kirk Webb's avatar
Kirk Webb committed
670
                if (tstamp - now) < (2*3600):
671
                    failsoon.append(entry)
Kirk Webb's avatar
Kirk Webb committed
672
673
                    pass
                pass
674
675
676
            else:
                if entry in failedrenew:
                    failedrenew.remove(entry)
Kirk Webb's avatar
Kirk Webb committed
677
                    
678
679
        if newfail:
            failedrenew += newfail
Kirk Webb's avatar
Kirk Webb committed
680
            failstr = ""
681
            for n in newfail:
Kirk Webb's avatar
Kirk Webb committed
682
683
684
685
686
687
688
                if n[0]:
                    failstr += "%s (%s/%s)\n" % n[:3]
                    pass
                else:
                    failstr += "%s/%s\n" % n[1:3]
                    pass
                pass
689
            SENDMAIL(TBOPS, "Lease renewal(s) failed",
Kirk Webb's avatar
Kirk Webb committed
690
                     "Failed to renew lease on the following nodes:\n%s" %
691
                     failstr + "\n\nPlease check the plabrenew log", TBOPS)
Kirk Webb's avatar
Kirk Webb committed
692
            pass
Kirk Webb's avatar
Kirk Webb committed
693

694
695
696
        if failsoon:
            failstr = ""
            for n in failsoon:
Kirk Webb's avatar
Kirk Webb committed
697
698
699
700
701
702
703
704
                if n[0]:
                    failstr += "%s (%s/%s): expires: %s\n" % \
                               (n[:3] + (time.ctime(n[3]),))
                    pass
                else:
                    failstr += "%s/%s: expires: %s\n" % \
                               (n[1:3] + (time.ctime(n[3]),))
                    pass
705
706
707
            SENDMAIL(TBOPS, "WARNING: PLAB leases are about to expire",
                     "The following plab leases are about to expire:\n%s" %
                     failstr + "\n\nPlease look into it!", TBOPS)
Kirk Webb's avatar
Kirk Webb committed
708
709
710
711
            pass
        return
    
    pass # end class Plab
712

713
714
715
716
717
718

#
# Slice abstraction
#

class Slice:
Kirk Webb's avatar
   
Kirk Webb committed
719
720

    def __init__(self, plab, pid, eid, slicename = None):
721
722
        self.plab = plab
        self.pid, self.eid = pid, eid
723
        self.slicemeta = None
Kirk Webb's avatar
   
Kirk Webb committed
724
725
        self.slicename = slicename
        return
726
727
728
729
730
731
    
    def _create(self):
        """
        Creates a new slice that initially contains no nodes.  Don't call
        this directly, use Plab.createSlice instead.
        """
Kirk Webb's avatar
   
Kirk Webb committed
732
733
734
735
736
737
738
739
740
741
        if not self.slicename:
            res = DBQueryFatal("select idx from experiments "
                               "where pid=%s "
                               "and eid=%s",
                               (self.pid, self.eid))
            if not len(res):
                raise RuntimeError, "Didn't get any results while looking for idx"
            eindex = res[0][0]
            self.slicename = "%s_%s" % (SLICEPREFIX, eindex)
            pass
742
        
743
        print "Creating Plab slice %s." % self.slicename
Kirk Webb's avatar
Kirk Webb committed
744
        
745
        try:
Kirk Webb's avatar
Kirk Webb committed
746
747
748
749
            res, self.slicemeta, self.leaseend = \
                 self.plab.agent.createSlice(self)

            DBQueryFatal("insert into plab_slices"
Kirk Webb's avatar
Kirk Webb committed
750
                         " (pid, eid, slicename, slicemeta, leaseend)"
Kirk Webb's avatar
Kirk Webb committed
751
752
753
                         " values (%s, %s, %s, %s, FROM_UNIXTIME(%s))",
                         (self.pid, self.eid, self.slicename,
                          self.slicemeta, self.leaseend))
754
755
            pass
        except:
Kirk Webb's avatar
Kirk Webb committed
756
            self.plab.agent.deleteSlice(self)
757
758
            DBQueryFatal("delete from plab_slices where slicename=%s",
                         (self.slicename,))
759
760
            raise

761
762
        return res

763
764
765
766
    def _load(self):
        """
        Loads an already allocated slice from the DB.  Don't call this
        directly, use Plab.loadSlice instead.
767
768
769

        XXX This should probably be made lazy, since not all operations
        really need it
770
        """
Kirk Webb's avatar
Kirk Webb committed
771
        if verbose:
772
            print "Loading slice for pid/eid %s/%s" % (self.pid, self.eid)
773
        res = DBQueryFatal("select slicename, slicemeta from plab_slices"
774
775
776
777
778
779
                           " where pid = %s and eid = %s",
                           (self.pid, self.eid))
        assert (len(res) > 0), \
               "No slice found for %s-%s" % (self.pid, self.eid)
        assert (len(res) == 1), \
               "Multiple slices found for %s-%s" % (self.pid, self.eid)
780
781
        ((self.slicename, self.slicemeta), ) = res
        pass
782

Kirk Webb's avatar
Kirk Webb committed
783
784
785
786
787
788
789
790
791
792
793
794
    def renew(self):
        """
        Renews slice lease, if applicable to selected backend agent.
        """
        print "Renewing lease for slice %s" % self.slicename

        ret = 0
        try:
            res, self.slicemeta, self.leaseend = \
                 self.plab.agent.renewSlice(self)
            
            DBQueryFatal("replace into plab_slices"
Kirk Webb's avatar
Kirk Webb committed
795
                         " (pid, eid, slicename, slicemeta, leaseend)"
Kirk Webb's avatar
Kirk Webb committed
796
797
798
799
800
801
802
803
804
805
806
807
808
                         " values (%s, %s, %s, %s, FROM_UNIXTIME(%s))",
                         (self.pid, self.eid, self.slicename,
                          self.slicemeta, self.leaseend))
            ret = 1
            pass
        except:
            print "Slice renewal failed!"
            traceback.print_exc()
            ret = 0
            pass
        
        return ret        

809
810
811
812
813
814
815
    def destroy(self):
        """
        Frees all nodes in this slice and destroys the slice.  Note
        that this will really pound the DB if there are many nodes left
        in the slice, but those should be removed by Emulab before the
        slice is destroyed.
        """
816
        print "Destroying Plab slice %s." % self.slicename
817
818
819
820
821
822
823
824
        res = DBQueryFatal("select node_id from plab_slice_nodes"
                           " where slicename = %s",
                           (self.slicename))
        print "\tRemoving any remaining nodes in slice.."
        for (nodeid,) in res:
            node = self.loadNode(nodeid)
            node.free()
            del node  # Encourage the GC'er
825

Kirk Webb's avatar
   
Kirk Webb committed
826
        osigs = disable_sigs(TERMSIGS)
827

828
        try:
Kirk Webb's avatar
Kirk Webb committed
829
            self.plab.agent.deleteSlice(self)
830
831
832
833
834
835
            pass
        except:
            print "Failed to delete slice!"
            traceback.print_exc()
            pass
        
Kirk Webb's avatar
   
Kirk Webb committed
836
        try:
837
            print "\tRemoving slice DB entry."
Kirk Webb's avatar
   
Kirk Webb committed
838
839
840
841
842
843
            DBQueryFatal("delete from plab_slices where slicename = %s",
                         (self.slicename,))
        except:
            print "Error deleting slice from DB!"
            tbstr = "".join(traceback.format_exception(*sys.exc_info()))
            SENDMAIL(TBOPS, "Error deleting slice from DB",
844
                     "Slice deletion error:\n\n%s" % tbstr, TBOPS)
Kirk Webb's avatar
   
Kirk Webb committed
845
846
847
848
            enable_sigs(osigs)
            raise
        
        enable_sigs(osigs)
849

850
    def createNode(self, nodeid, force=False):
851
852
853
        """
        Node factory function
        """
854
855
856
857
858
859
860
861
        # XXX: KRW - The following is a hack to help me with testing.
        if not nodeid.startswith("plab"):
            IP = socket.gethostbyname(nodeid)
            qres = DBQueryFatal("select n.node_id from nodes as n left join "
                                "interfaces as i on n.node_id = i.node_id "
                                "where i.IP = %s", (IP,))
            assert (len(qres) > 0), "Node does not exist in DB: %s" % nodeid
            nodeid = qres[0][0] + "-20"
862
        node = Node(self, nodeid)
863
        node._create(force)
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
        return node

    def loadNode(self, nodeid):
        """
        Node factory function
        """
        node = Node(self, nodeid)
        node._load()
        return node

#
# Node abstraction
#

class Node:
Kirk Webb's avatar
Kirk Webb committed
879
    def __init__(self, slice, nodeid, pollNode = False):
880
        self.slice = slice
881
        self.nodeid = nodeid
882
883
        self.IP = self.__findIP()
        self.nodemeta = None
Kirk Webb's avatar
Kirk Webb committed
884
        self.pollNode = pollNode
Kirk Webb's avatar
Kirk Webb committed
885
        return
886

Kirk Webb's avatar
   
Kirk Webb committed
887
    # XXX: may want to rethink signal handling here.
888
    def _create(self, force=False):
889
890
891
892
893
894
        """
        Creates a new node.  This physically allocates the node into the
        slice through the dslice agent and node manager.  Note that no
        node setup is performed.  Don't call this directly, use
        Slice.createNode instead.
        """
Kirk Webb's avatar
   
Kirk Webb committed
895
896
897
898
899
900
901
        
        # First, make sure there isn't already an entry in the DB
        try:
            self._load()
        except:
            pass
        else:
902
903
904
905
906
907
            if force:
                print "Node entry exists in DB, but creation forced anyway."
            else:
                raise RuntimeError, "Entry for plab node %s already exists " \
                      "in the DB" % self.nodeid
            pass
Kirk Webb's avatar
   
Kirk Webb committed
908

909
910
        print "Creating Plab node %s on %s." % (self.nodeid, self.IP)
        res, self.nodemeta, self.leaselen = \
Kirk Webb's avatar
Kirk Webb committed
911
             self.slice.plab.agent.createNode(self)
Kirk Webb's avatar
   
Kirk Webb committed
912
        
913
914
915
        DBQueryFatal("replace into plab_slice_nodes"
                     " (pid, eid, slicename, node_id,"
                     " nodemeta, leaseend)"
916
                     " values (%s, %s, %s, %s, %s, %s)",
917
918
919
                     (self.slice.pid, self.slice.eid,
                      self.slice.slicename, self.nodeid,
                      self.nodemeta, self.leaselen))
Kirk Webb's avatar
   
Kirk Webb committed
920

Kirk Webb's avatar
Kirk Webb committed
921
922
923
924
925
926
927
928
929
930
931
932
933
934
        if self.pollNode:
            TIMESTAMP("Waiting for %s to respond" % self.nodeid)
            while True:
                try:
                    self.__perform("/bin/true")
                    pass
                except:
                    time.sleep(NODEPROBEINT)
                    pass
                else: break
                pass
            TIMESTAMP("Node %s ready." % self.nodeid)
            pass        

935
        TIMESTAMP("createnode finished on %s." % self.nodeid)
936
        return
937
938
939
940
941
942

    def _load(self):
        """
        Loads an already allocated node from the DB.  Don't call this
        directly, use Slice.loadNode instead.
        """
Kirk Webb's avatar
Kirk Webb committed
943
        if verbose:
944
            print "Loading node %s" % self.nodeid
945
        res = DBQueryFatal("select slicename, nodemeta"
946
947
948
949
950
951
952
                           " from plab_slice_nodes where node_id = %s",
                           (self.nodeid))
        assert (len(res) > 0), \
               "Node %s (slice %s) not found" % \
               (self.nodeid, self.slice.slicename)
        assert (len(res) == 1), \
               "Multiple nodes found for nodeid %s" % self.nodeid
953
        ((slicename, self.nodemeta), ) = res
954
955
956
        assert (slicename == self.slice.slicename), \
               "Node %s loaded by slice %s, but claims to be in slice %s" % \
               (self.nodeid, self.slice.slicename, slicename)
Kirk Webb's avatar
   
Kirk Webb committed
957

958
959
    def free(self):
        """
960
        Frees the node and kills the VM.  Note that this does not
Kirk Webb's avatar
   
Kirk Webb committed
961
962
963
        shutdown anything inside the vserver.  Warning: forks a process
        to carry out the actual work!
        """
964
965
966
967
        res = ForkCmd(self._free, timeout=FREE_TIMEOUT,
                      disable_sigs_parent=TERMSIGS,
                      disable_sigs_child=TERMSIGS)
        return res[0] | res[1]
Kirk Webb's avatar
   
Kirk Webb committed
968
969
        
    def _free(self):
970
        """
Kirk Webb's avatar
   
Kirk Webb committed
971
972
973
974
975
        Frees the node and kills the VM.  Note that this does not
        shutdown anything inside the vserver.  Don't call this directly;
        instead, use Node.free()
        """
        deleted = 0
976
        TIMESTAMP("freenode %s started." % self.nodeid)
977
        print "Freeing Plab node %s." % self.nodeid
Kirk Webb's avatar
   
Kirk Webb committed
978
979
980
981
982
983
984
985
986
987
        
        # Remove the DB entry first.
        try:
            DBQueryFatal("delete from plab_slice_nodes where node_id = %s",
                         (self.nodeid,))
        except:
            print "Uh oh, couldn't remove plab sliver record from the DB!"
            tbstr = "".join(traceback.format_exception(*sys.exc_info()))
            SENDMAIL(TBOPS, "Error: Couldn't remove plab vnode from DB",
                     "Unable to delete entry for sliver %s from the DB:"
988
                     "\n\n%s" % (self.nodeid, tbstr), TBOPS)
989
            pass
Kirk Webb's avatar
   
Kirk Webb committed
990

991
        deleted = self.slice.plab.agent.freeNode(self)
992
        
993
        TIMESTAMP("freenode %s finished." % self.nodeid)
994
        return not deleted
995

996
    def renew(self):
Kirk Webb's avatar
   
Kirk Webb committed
997
998
999
1000
        """
        Renew the lease for this node.  Note that this method
        forks and runs another private method to actually do the
        work!
For faster browsing, not all history is shown. View entire blame