libplab.py.in 38.8 KB
Newer Older
1
# -*- python -*-
Kirk Webb's avatar
Kirk Webb committed
2
3
4
5
6
7
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#

8
"""
Kirk Webb's avatar
Kirk Webb committed
9
10
11
12
13
14
15
16
17
Library for interfacing with Plab.  This abstracts out the concepts of
Plab central, slices, and nodes.  All data (except static things like
certificates) is kept in the Emulab DB.  Unlike the regular dslice
svm, this one supports dynamically changing which nodes are in a
slice.

This requires an already obtained dslice certficate and key.  By
default it expects to find these in the @prefix@/etc/plab/
subdirectory.
18
19
20
"""

import sys
21
22
23
sys.path.append("@prefix@/lib")

import os, time
24
import string
Kirk Webb's avatar
   
Kirk Webb committed
25
import traceback
Kirk Webb's avatar
   
Kirk Webb committed
26
import socket
Kirk Webb's avatar
Kirk Webb committed
27
28
29
import httplib
import xml.parsers.expat
import re
Kirk Webb's avatar
   
Kirk Webb committed
30
31

from warnings import warn
32

33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#
# Testbed and DB access libs
#
from libtestbed import *
from libdb import *

#
# Plab modules to import
#
from mod_PLC import mod_PLC
from mod_dslice import mod_dslice

agents = {'PLC'    : mod_PLC,
          'dslice' : mod_dslice}

Kirk Webb's avatar
Kirk Webb committed
48
49
50
51
52
53
#
# output control vars
#
verbose = 0
debug = 0

54
55
56
#
# Constants
#
57
DEF_AGENT = "PLC";
58

59
RENEW_TIME = 2*24*60*60  # Renew two days before lease expires
Kirk Webb's avatar
   
Kirk Webb committed
60
61
62

RENEW_TIMEOUT = 1*60     # give the node manager a minute to respond to renew
FREE_TIMEOUT  = 1*60     # give the node manager a minute to respond to free
Kirk Webb's avatar
Kirk Webb committed
63
NODEPROBEINT  = 10
64

65
66
67
TBOPS = "@TBOPSEMAIL@".replace("\\","")
MAILTAG = "@THISHOMEBASE@"

68
RESERVED_PID = "emulab-ops"
69
RESERVED_EID = "hwdown"       # start life in hwdown
70
71
MONITOR_PID  = "emulab-ops"
MONITOR_EID  = "plab-monitor"
72
73

MAGIC_INET2_GATEWAYS = ("205.124.237.10", )
Kirk Webb's avatar
Kirk Webb committed
74
75
MAGIC_INET_GATEWAYS = ("205.124.249.123", "205.124.249.113",
                       "205.124.249.121", "205.124.249.115")
76
LOCAL_PLAB_DOMAIN = ".flux.utah.edu"
77
LOCAL_PLAB_LINKTYPE = "pcplabinet2"
78
# right now these are the only 2.0 machines running the new slice interface:
79
80
81
82
83
84
#ALLOWED_NODES = ('198.78.49.59', '18.31.0.213', '169.229.50.85',
#                 '169.229.50.89', '128.112.152.124', '12.46.129.23',
#                 '64.41.221.196', '132.239.17.226', '128.223.6.113',
#                 '128.208.4.199', '128.2.198.199', '155.98.35.2',
#                 '155.98.35.3')
ALLOWED_NODES = ()
85
NUMVNODES = 20
86

Kirk Webb's avatar
   
Kirk Webb committed
87
PLABNODE = "@prefix@/sbin/plabnode"
88
SSH = "@prefix@/bin/sshtb"
89
90
NAMED_SETUP = "@prefix@/sbin/named_setup"

Kirk Webb's avatar
Kirk Webb committed
91
92
93
94
ROOTBALL_URL = "http://localhost:1492/" # ensure this ends in a slash

DEF_PLAB_URL = "www.planet-lab.org"
DEF_SITE_XML = "/xml/sites.xml"
95

96
DEF_ROOTBALL_NAME = "@PLAB_ROOTBALL@"
97
SLICEPREFIX = "@PLAB_SLICEPREFIX@"
Kirk Webb's avatar
Kirk Webb committed
98
99
100
101
NODEPREFIX  = "plab"

BADSITECHARS = re.compile(r"\W+")
PLABBASEPRIO = 20000
102

103
104
105
106
107
#
# var to track failed renewals
#
failedrenew = []

108
109
110
111
112
#
# Disable line buffering
#
sys.stdout = os.fdopen(sys.stdout.fileno(), sys.stdout.mode, 0)

Kirk Webb's avatar
   
Kirk Webb committed
113
114
115
116
117
#
# Ensure SIGPIPE doesn't bite us:
#
signal.signal(signal.SIGPIPE, signal.SIG_IGN)

118

119
120
121
#
# Plab abstraction
#
Kirk Webb's avatar
Kirk Webb committed
122

Kirk Webb's avatar
Kirk Webb committed
123
124
125
#
# Class responsible for parsing planetlab sites file
#
Kirk Webb's avatar
Kirk Webb committed
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
class siteParser:

    def __init__(self):
        self.parser = xml.parsers.expat.ParserCreate()
        self.parser.StartElementHandler = self.__site_start_elt
        self.parser.EndElementHandler = self.__site_end_elt
        self.__hosts = []
        self.__sitename = ""
        
    def getPlabNodeInfo(self):
                
        conn = httplib.HTTPSConnection(DEF_PLAB_URL)
        conn.request("GET", DEF_SITE_XML)
        res = conn.getresponse()
        if res.status != 200:
            raise RuntimeError, "HTTP Error getting site list:\n" \
                  "Code: %d Reason: %s" % \
                  (res.status, res.reason)
        try:
            self.parser.ParseFile(res)
            pass
        except xml.parsers.expat.ExpatError, e:
            print "Error parsing XML file, lineno: %d, offset: %d:\n%s" % \
                  (e.lineno, e.offset, xml.parsers.expat.ErrorString(e.code))
            raise

        return self.__hosts

    def __site_start_elt(self, name, attrs):
        
        if name == "PLANETLAB_SITES":
            pass
        
        elif name == "SITE":
            self.__sitename = attrs['SHORT_SITE_NAME']
            pass
        
        elif name == "HOST":
            self.__hosts.append({'HNAME' : attrs['NAME'],
                                 'IP'    : attrs['IP'],
                                 'NODEID': attrs['NODE_ID'],
                                 'SITE'  : self.__sitename})
            pass
        
        else:
            print "Unknown element in site file: %s: %s" % (name, attrs)
            pass
        
        return

    def __site_end_elt(self, name):
        
        if name == "SITE":
            self.__sitename = "Unknown"
            pass
        return

        
184
class Plab:
Kirk Webb's avatar
Kirk Webb committed
185
    def __init__(self, agent=None):
186
187
188
        if not agent:
            self.agent = agents[DEF_AGENT]()
            pass
Kirk Webb's avatar
Kirk Webb committed
189
        if debug:
190
191
192
            print "Using module: %s" % self.agent.modname
            pass
        pass
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209

    def createSlice(self, pid, eid):
        """
        Slice factory function
        """
        slice = Slice(self, pid, eid)
        slice._create()
        return slice

    def loadSlice(self, pid, eid):
        """
        Slice factory function
        """
        slice = Slice(self, pid, eid)
        slice._load()
        return slice

Kirk Webb's avatar
Kirk Webb committed
210
    def updateNodeEntries(self, ignorenew = False):
211
        """
Kirk Webb's avatar
Kirk Webb committed
212
        Finds out which Plab nodes are available, and
213
214
215
216
217
        update the DB accordingly.  If ignorenew is True, this will only
        make sure that the data in the DB is correct, and not complete.
        If ignorenew is False (the default), this will do a complete
        update of the DB.  However, this can take some time, as
        information about new nodes (such as link type) must be
Kirk Webb's avatar
Kirk Webb committed
218
        discovered.
219
220
221
222
223
224

        Note that this seemingly innocent funciton actually does a lot of
        magic.  This is the main/only way that Plab nodes get into the
        nodes DB, and this list is updated dynamically.  It also gathers
        static data about new nodes.

Kirk Webb's avatar
Kirk Webb committed
225
226
227
228
        Deleting nodes that are no longer available may not be the
        best approach due to the overhead of adding new nodes (ie, if
        the node disappears for a while, then comes back). Therefore,
        this is not currently done.
229
        """
Kirk Webb's avatar
Kirk Webb committed
230
231
232
        
        print "Getting available Plab nodes ..."

233
        avail = []
234
        try:
Kirk Webb's avatar
Kirk Webb committed
235
236
237
            parser = siteParser()
            avail = parser.getPlabNodeInfo()
            pass
238
        # XXX: rewrite to use more elegant exception info gathering.
239
240
        except:
            extype, exval, extrace = sys.exc_info()
241
            print "Error talking to agent: %s: %s" % (extype, exval)
Kirk Webb's avatar
Kirk Webb committed
242
            if debug:
243
244
245
                print extrace
            print "Going back to sleep until next scheduled poll"
            return
Kirk Webb's avatar
   
Kirk Webb committed
246

Kirk Webb's avatar
Kirk Webb committed
247
        if debug:
248
249
            print "Got advertisement list:"
            print avail
Kirk Webb's avatar
Kirk Webb committed
250
            pass
Kirk Webb's avatar
   
Kirk Webb committed
251

252
253
        # Enforce allowed nodes limitation, if any.
        if len(ALLOWED_NODES):
254
            ravail = []
Kirk Webb's avatar
Kirk Webb committed
255
256
257
258
259
            for nodeent in avail:
                if nodeent['IP'] in ALLOWED_NODES:
                    ravail.append(nodeent)
                    pass
                pass
260
261
            print "Advertisements in allowed nodes list:\n%s" % ravail
            avail = ravail
Kirk Webb's avatar
Kirk Webb committed
262
            pass
263

264
        # Get node info we already have.
265
        known = self.__getKnownPnodes()
Kirk Webb's avatar
Kirk Webb committed
266
        if debug:
267
268
            print "Got known pnodes:"
            print known
Kirk Webb's avatar
Kirk Webb committed
269
            pass
270

Kirk Webb's avatar
Kirk Webb committed
271
        # Create list of nodes to add or update
Kirk Webb's avatar
Kirk Webb committed
272
273
        toadd = []  # List of node entries to add to DB
        for nodeent in avail:
Kirk Webb's avatar
Kirk Webb committed
274
275
            # Replace sequences of bad chars in the site entity with
            # a single "-".
Kirk Webb's avatar
Kirk Webb committed
276
            nodeent['SITE'] = BADSITECHARS.sub("-", nodeent['SITE'])
Kirk Webb's avatar
Kirk Webb committed
277
            nid = nodeent['NODEID']
Kirk Webb's avatar
Kirk Webb committed
278
279
            # If we don't know about this node, then add it and mark
            # that we are _not_ doing an update.
Kirk Webb's avatar
Kirk Webb committed
280
281
282
            if not known.has_key(nid):
                toadd.append((nodeent, False))
                pass
Kirk Webb's avatar
Kirk Webb committed
283
284
285
            # If we do know abou this node, check to see if any of its
            # attributes have changed, and if so, add it to the list and
            # mark if for update.
Kirk Webb's avatar
Kirk Webb committed
286
287
288
289
290
291
292
293
294
            else:
                kent = known[nid]
                if kent['HNAME'] != nodeent['HNAME'] or \
                   kent['IP']    != nodeent['IP'] or \
                   kent['SITE']  != nodeent['SITE']:
                    toadd.append((nodeent, True))
                    pass
                pass
            pass
Kirk Webb's avatar
Kirk Webb committed
295
296

        # Process the add/update list - add to, or update in the DB.
297
        if len(toadd):
Kirk Webb's avatar
Kirk Webb committed
298
            # Are we ignoring new entries?
299
            if ignorenew:
Kirk Webb's avatar
Kirk Webb committed
300
                if verbose:
301
                    print "%d new Plab nodes, but ignored for now" % len(toadd)
Kirk Webb's avatar
Kirk Webb committed
302
303
                    pass
                pass
Kirk Webb's avatar
Kirk Webb committed
304
            # If not ignoring, do the addition/update.
305
            else:
Kirk Webb's avatar
Kirk Webb committed
306
                addstr = ""
Kirk Webb's avatar
Kirk Webb committed
307
308
309
                updstr = ""
                print "There are %d new/changed Plab nodes." % len(toadd)
                for nodeent, update in toadd:
Kirk Webb's avatar
Kirk Webb committed
310
                    # Get the linktype here so we can report it in email.
Kirk Webb's avatar
Kirk Webb committed
311
                    self.__findLinkType(nodeent)
Kirk Webb's avatar
Kirk Webb committed
312
                    if debug:
Kirk Webb's avatar
Kirk Webb committed
313
314
                        print "Found linktype %s for node %s" % \
                              (nodeent['LINKTYPE'], nodeent['IP'])
Kirk Webb's avatar
Kirk Webb committed
315
316
                        pass
                    # Add/update the node in the DB.
Kirk Webb's avatar
Kirk Webb committed
317
                    self.__addNode(nodeent, update)
Kirk Webb's avatar
Kirk Webb committed
318
                    # Rest of block adds a line for the add/update messages.
Kirk Webb's avatar
Kirk Webb committed
319
320
321
322
323
324
325
326
327
328
329
330
331
                    nodestr = "%s\t\t%s\t\t%s\t\t%s\t\t%s\n" % \
                              (nodeent['NODEID'],
                               nodeent['IP'],
                               nodeent['HNAME'],
                               nodeent['SITE'],
                               nodeent['LINKTYPE'])
                    if update:
                        updstr += nodestr
                        pass
                    else:
                        addstr += nodestr
                        pass
                    pass
332

Kirk Webb's avatar
Kirk Webb committed
333
                # We need to update DNS since we've added hosts..
334
335
                print "Forcing a named map update ..."
                os.spawnl(os.P_WAIT, NAMED_SETUP, NAMED_SETUP)
Kirk Webb's avatar
Kirk Webb committed
336
                # Now announce that we've added/updated nodes.
Kirk Webb's avatar
Kirk Webb committed
337
338
                SENDMAIL(TBOPS,
                         "Plab nodes have been added/updated in the DB.",
Kirk Webb's avatar
Kirk Webb committed
339
                         "The following plab nodes have been added to the DB:\n"
Kirk Webb's avatar
Kirk Webb committed
340
341
342
343
344
345
346
                         "NodeID\t\tHostname\t\tIP\t\tSite\t\tLinktype\n\n"
                         "%s\n\n"
                         "The following plab nodes have been updated in the DB:\n"
                         "NodeID\t\tHostname\t\tIP\t\tSite\n\n"
                         "%s\n\n" % \
                         (addstr, updstr),
                         TBOPS)
347
                print "Done adding new Plab nodes."
Kirk Webb's avatar
Kirk Webb committed
348
349
350
                pass
            pass
        return
351

352
353
354
355
356
    def __getKnownPnodes(self):
        """
        getFree helper function.  Returns a dict of IP:node_id pairs
        for the Plab nodes that currently exist in the DB.
        """
Kirk Webb's avatar
Kirk Webb committed
357
358
        res = DBQueryFatal("select i.node_id, i.IP, w.hostname, w.site"
                           " from interfaces as i"
359
360
                           " left join nodes as np on"
                           "  np.node_id = i.node_id"
Kirk Webb's avatar
Kirk Webb committed
361
362
                           " left join widearea_nodeinfo as w on"
                           "  i.node_id = w.node_id"
363
364
                           " where np.type = 'pcplabphys'")
        ret = {}
Kirk Webb's avatar
Kirk Webb committed
365
366
367
368
369
370
        for nodeid, ip, hname, site in res:
            nid = nodeid.replace(NODEPREFIX, "")
            ret[nid] = {'HNAME' : hname,
                        'IP'    : ip,
                        'SITE'  : site}
            pass
371
        return ret
Kirk Webb's avatar
   
Kirk Webb committed
372
        
Kirk Webb's avatar
Kirk Webb committed
373
    def __findLinkType(self, nodeent):
374
375
376
377
378
379
380
        """
        getFree helper function.  Figures out the link type of the given
        host.  This first performs a traceroute and checks for the U of
        U's I2 gateway to classify Internet2 hosts.  If this test fails,
        it checks if the hostname is international.  If this test fails,
        this simply specifies an inet link type.

Kirk Webb's avatar
Kirk Webb committed
381
        This can't detect DSL links..
382
        """
383
        # Is host international (or flux/emulab local)?
384
        from socket import gethostbyaddr, getfqdn, herror
Kirk Webb's avatar
Kirk Webb committed
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
        
        if not nodeent.has_key('HNAME'):
            try:
                (hname, ) = gethostbyaddr(ip)
                nodeent['HNAME'] = getfqdn(hname)
                pass
            except herror:
                nodeent['HNAME'] = nodeent['IP']
                print "WARNING: Failed to get hostname for %s" % nodeent['IP']
                pass
            pass
        
        tld = nodeent['HNAME'].split(".")[-1].lower()
        if not tld in ("edu", "org", "net", "com", "gov", "us", "ca"):
            nodeent['LINKTYPE'] = "pcplabintl"
            return
        
        # Is it us?
        if nodeent['HNAME'].endswith(LOCAL_PLAB_DOMAIN):
            nodeent['LINKTYPE'] = LOCAL_PLAB_LINKTYPE
            return
        
407
        # Is host on I2?
Kirk Webb's avatar
Kirk Webb committed
408
        traceroute = os.popen("traceroute -nm 10 -q 1 %s" % nodeent['IP'])
409
410
411
412
413
        trace = traceroute.read()
        traceroute.close()

        for gw in MAGIC_INET2_GATEWAYS:
            if trace.find(gw) != -1:
Kirk Webb's avatar
Kirk Webb committed
414
415
                nodeent['LINKTYPE'] = "pcplabinet2"
                return
416

417
418
419
420
        for gw in MAGIC_INET_GATEWAYS:
            if trace.find(gw) != -1:
                break
        else:
Kirk Webb's avatar
Kirk Webb committed
421
            print "WARNING: Unknown gateway for host %s" % nodeent['IP']
422

423
        # Must be plain 'ole Internet
Kirk Webb's avatar
Kirk Webb committed
424
425
        nodeent['LINKTYPE'] = "pcplabinet"
        return
426

Kirk Webb's avatar
Kirk Webb committed
427
    def __addNode(self, nodeent, update = False):
428
429
430
431
432
433
434
        """
        getFree helper function.  Adds a new Plab pnode and associated
        vnodes to the DB.  linktype should be one of (inet2, inet, intl,
        dsl).

        XXX This duplicates a lot of the functionality of newwanode.
        Note that, very unlike newwanode, the node is initially up,
435
436
437
        since it had to be up to be added in the first place.  This also
        adds some additional fields that newwanode doesn't, and takes
        advantage of the fact that the Plab nodes may be added in bulk.
438
        """
Kirk Webb's avatar
   
Kirk Webb committed
439
440
        # block out common termination signals while adding a node
        osigs = disable_sigs(TERMSIGS)
441
        defosid, controliface = self.__getNodetypeInfo()
Kirk Webb's avatar
Kirk Webb committed
442
        nodeid = NODEPREFIX + nodeent['NODEID']
Kirk Webb's avatar
Kirk Webb committed
443
        priority = PLABBASEPRIO + int(nodeent['NODEID'])
Kirk Webb's avatar
Kirk Webb committed
444
        hostonly = nodeent['HNAME'].replace(".", "-")
445
        
Kirk Webb's avatar
Kirk Webb committed
446
447
        print "Creating pnode %s as %s, priority %d." % \
              (nodeent['IP'], nodeid, priority)
448

Kirk Webb's avatar
   
Kirk Webb committed
449
        try:
Kirk Webb's avatar
Kirk Webb committed
450
            DBQueryFatal("replace into nodes"
451
                         " (node_id, type, phys_nodeid, role, priority,"
Kirk Webb's avatar
   
Kirk Webb committed
452
                         "  op_mode, def_boot_osid,"
453
                         "  allocstate, allocstate_timestamp,"
Kirk Webb's avatar
   
Kirk Webb committed
454
                         "  eventstate, state_timestamp)"
455
                         " values (%s, %s, %s, %s, %s,"
Kirk Webb's avatar
   
Kirk Webb committed
456
457
458
459
                         "  %s, %s, %s, now(), %s, now())",
                         (nodeid, 'pcplabphys', nodeid,
                          'testnode', priority*100,
                          'ALWAYSUP', defosid,
460
                          'FREE_CLEAN',
Kirk Webb's avatar
   
Kirk Webb committed
461
                          'ISUP'))
462

Kirk Webb's avatar
Kirk Webb committed
463
            DBQueryFatal("replace into widearea_nodeinfo"
464
465
                         " (node_id, contact_uid, hostname, site)"
                         " values (%s, %s, %s, %s)",
Kirk Webb's avatar
Kirk Webb committed
466
                         (nodeid, 'bnc', nodeent['HNAME'], nodeent['SITE']))
467

Kirk Webb's avatar
Kirk Webb committed
468
            DBQueryFatal("replace into interfaces"
Kirk Webb's avatar
   
Kirk Webb committed
469
470
471
                         " (node_id, card, port, IP, interface_type,"
                         " iface, role)"
                         " values (%s, %s, %s, %s, %s, %s, %s)",
Kirk Webb's avatar
Kirk Webb committed
472
473
                         (nodeid, 0, 1, nodeent['IP'], 'fxp',
                          controliface, 'ctrl'))
Kirk Webb's avatar
   
Kirk Webb committed
474

Kirk Webb's avatar
Kirk Webb committed
475
            DBQueryFatal("replace into reserved"
Kirk Webb's avatar
   
Kirk Webb committed
476
477
                         " (node_id, pid, eid, rsrv_time, vname)"
                         " values (%s, %s, %s, now(), %s)",
478
                         (nodeid, RESERVED_PID, RESERVED_EID, hostonly))
Kirk Webb's avatar
   
Kirk Webb committed
479

Kirk Webb's avatar
Kirk Webb committed
480
            DBQueryFatal("replace into node_auxtypes"
Kirk Webb's avatar
   
Kirk Webb committed
481
482
                         " (node_id, type, count)"
                         " values (%s, %s, %s)",
Kirk Webb's avatar
Kirk Webb committed
483
                         (nodeid, nodeent['LINKTYPE'], 1))
Kirk Webb's avatar
   
Kirk Webb committed
484
            
Kirk Webb's avatar
Kirk Webb committed
485
            DBQueryFatal("replace into node_auxtypes"
486
487
488
489
                         " (node_id, type, count)"
                         " values (%s, %s, %s)",
                         (nodeid, 'pcplab', 1))
            
Kirk Webb's avatar
Kirk Webb committed
490
            DBQueryFatal("replace into node_status"
491
492
                         " (node_id, status, status_timestamp)"
                         " values (%s, %s, now())",
Kirk Webb's avatar
Kirk Webb committed
493
                         (nodeid, 'down'))
Kirk Webb's avatar
   
Kirk Webb committed
494

Kirk Webb's avatar
Kirk Webb committed
495
496
497
498
499
            # Don't do any vnode additions if we are just updating.
            if update:
                enable_sigs(osigs)
                return

Kirk Webb's avatar
   
Kirk Webb committed
500
501
502
503
504
505
            vnodetype = "pcplab"
            vnodeid = ""
            for n in range(NUMVNODES):
                vprio = (priority * 100) + (n+1)
                sshdport = 38000+(n+1)
                vnodeid = "%s-%d" % (nodeid, n+1)
Kirk Webb's avatar
Kirk Webb committed
506
                if verbose:
Kirk Webb's avatar
   
Kirk Webb committed
507
                    print "Creating vnode %s, priority %d" % (vnodeid, vprio)
Kirk Webb's avatar
Kirk Webb committed
508
                    pass
Kirk Webb's avatar
   
Kirk Webb committed
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
                    
                DBQueryFatal("insert into nodes"
                             " (node_id, type, phys_nodeid, role, priority,"
                             "  op_mode, def_boot_osid, update_accounts,"
                             "  allocstate, allocstate_timestamp,"
                             "  eventstate, state_timestamp, sshdport)"
                             " values (%s, %s, %s, %s, %s,"
                             "  %s, %s, %s, %s, now(), %s, now(), %s)",
                             (vnodeid, vnodetype, nodeid, 'virtnode', vprio,
                              'PCVM', defosid, 1,
                              'FREE_CLEAN',
                              'SHUTDOWN', sshdport))
                
                DBQueryFatal("insert into node_status"
                             " (node_id, status, status_timestamp)"
                             " values (%s, %s, now())",
                             (vnodeid, 'up'))
                
Kirk Webb's avatar
Kirk Webb committed
527
528
529
                pass
                
            # Put the last vnode created into the special monitoring expt.
Kirk Webb's avatar
   
Kirk Webb committed
530
531
532
533
            DBQueryFatal("insert into reserved"
                         " (node_id, pid, eid, rsrv_time, vname)"
                         " values (%s, %s, %s, now(), %s)",
                         (vnodeid, MONITOR_PID, MONITOR_EID, vnodeid))
Kirk Webb's avatar
Kirk Webb committed
534
535
            pass
        
Kirk Webb's avatar
   
Kirk Webb committed
536
537
538
539
540
541
        except:
            print "Error adding PLAB node to DB: someone needs to clean up!"
            tbmsg = "".join(traceback.format_exception(*sys.exc_info()))
            SENDMAIL(TBOPS, "Error adding new plab node to DB: %s\n" %
                     nodeid, "Some operation failed while trying to add a"
                     " newly discovered plab node to the DB:\n %s"
542
                     "\n Please clean up!\n" % tbmsg, TBOPS)
Kirk Webb's avatar
   
Kirk Webb committed
543
544
            enable_sigs(osigs)
            raise
545

Kirk Webb's avatar
   
Kirk Webb committed
546
547
        # last but not least, unblock signals
        enable_sigs(osigs)
Kirk Webb's avatar
Kirk Webb committed
548
        return
549

550
551
    def __getNodetypeInfo(self):
        """
552
553
        addNode helper function.  Returns a (defosid, controliface) 
        tuple for the Plab pnode type.  Caches the result since
554
        it doesn't change.
555
556
        """
        if not hasattr(self, "__getNodetypeInfoCache"):
Kirk Webb's avatar
Kirk Webb committed
557
            if debug:
558
                print "Getting node type info"
Kirk Webb's avatar
Kirk Webb committed
559
                pass
560
            res = DBQueryFatal("select osid, control_iface"
561
                               " from node_types"
562
563
564
                               " where type = 'pcplabphys'")
            assert (len(res) == 1), "Failed to get node type info"
            (self.__getNodetypeInfoCache, ) = res
Kirk Webb's avatar
Kirk Webb committed
565
566
            pass
        
567
568
        return self.__getNodetypeInfoCache

Kirk Webb's avatar
Kirk Webb committed
569
    # XXX: deprecated - should probably just be removed
570
571
572
573
574
    def __nextFreeNodeid(self):
        """
        addNode helper function.  Returns a (nodeid, priority) tuple of
        the next free nodeid and priority for Plab nodes.
        """
Kirk Webb's avatar
Kirk Webb committed
575
        if debug:
576
577
578
579
580
581
582
583
584
585
            print "Getting next free nodeid"
        DBQueryFatal("lock tables nextfreenode write")
        try:
            res = DBQueryFatal("select nextid, nextpri from nextfreenode"
                               " where nodetype = 'pcplab'")
            assert (len(res) == 1), "Unable to find next free nodeid"
            DBQueryFatal("update nextfreenode"
                         " set nextid = nextid + 1, nextpri = nextpri + 1"
                         " where nodetype = 'pcplab'")
            ((nodeid, priority), ) = res
Kirk Webb's avatar
Kirk Webb committed
586
            pass
587
588
        finally:
            DBQueryFatal("unlock tables")
Kirk Webb's avatar
Kirk Webb committed
589
590
            pass
        
591
592
        return nodeid, priority

Kirk Webb's avatar
Kirk Webb committed
593
594
    # XXX: might want to just call into slice.renew and let module specific
    # code deal with individual node renew if it tracks at that level.
595
    def renew(self):
596
597
598
        """
        Renews all of the Plab leases that are going to expire soon.
        """
Kirk Webb's avatar
Kirk Webb committed
599
        
600
        print "Renewing Plab leases ..."
601
602
        # Ugh, MySQL doesn't know UTC until v4.1.1, and unix_timestamp()
        # returns the local time
603
604
        now = int(time.mktime(time.gmtime()))
        endtime = now + RENEW_TIME
Kirk Webb's avatar
Kirk Webb committed
605
606
607
608
609
610
611
612
613
614
615
        res = DBQueryFatal("select NULL, pid, eid,"
                            " unix_timestamp(leaseend) from plab_slices"
                            " where leaseend != NULL and"
                            "  %s > unix_timestamp(leaseend)",
                            (endtime, ))
        res += DBQueryFatal("select node_id, pid, eid,"
                            " unix_timestamp(leaseend) from plab_slice_nodes"
                            " where leaseend != NULL and"
                            "  %s > unix_timestamp(leaseend)",
                            (endtime, ))
        
616
        loadedSlices = {}
Kirk Webb's avatar
Kirk Webb committed
617
        global failedrenew # XXX
618
619
        newfail = []
        failsoon = []
Kirk Webb's avatar
Kirk Webb committed
620
621
        ret = 0

622
623
        for entry in res:
            nodeid, pid, eid, tstamp = entry
Kirk Webb's avatar
Kirk Webb committed
624
625
626
627
628
629
630
631
632
633
634
635

            if tstamp <= now:
                if nodeid:
                    print "WARNING: Node lease for %s (%s/%s) has expired!" % \
                          (nodeid, pid, eid)
                    pass
                else:
                    print "WARNING: Slice lease for %s/%s has expired!" % \
                          (pid, eid)
                    pass
                continue

636
637
            try:
                slice = loadedSlices[(pid, eid)]
Kirk Webb's avatar
Kirk Webb committed
638
                pass
639
640
641
            except KeyError:
                slice = self.loadSlice(pid, eid)
                loadedSlices[(pid, eid)] = slice
Kirk Webb's avatar
Kirk Webb committed
642
                pass
643
            
Kirk Webb's avatar
Kirk Webb committed
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
            if nodeid:
                node = slice.loadNode(nodeid)
                ret = node.renew()
                pass
            else:
                try:
                    res, slicemeta, leaselen = \
                         self.agent.renewSlice(slice.slicename)
                    ret = 1
                    pass
                except:
                    ret = 0

            if not ret:
                if nodeid:
                    print "Failed to renew lease for %s (%s/%s)" % \
                          (nodeid, pid, eid)
                    pass
                else:
                    print "Failed to renew lease for %s/%s" % \
                          (pid, eid)
                    pass
666
667
                if entry not in failedrenew:
                    newfail.append(entry)
Kirk Webb's avatar
Kirk Webb committed
668
                    pass
Kirk Webb's avatar
Kirk Webb committed
669
                if (tstamp - now) < (2*3600):
670
                    failsoon.append(entry)
Kirk Webb's avatar
Kirk Webb committed
671
672
                    pass
                pass
673
674
675
            else:
                if entry in failedrenew:
                    failedrenew.remove(entry)
Kirk Webb's avatar
Kirk Webb committed
676
                    
677
678
        if newfail:
            failedrenew += newfail
Kirk Webb's avatar
Kirk Webb committed
679
            failstr = ""
680
            for n in newfail:
Kirk Webb's avatar
Kirk Webb committed
681
682
683
684
685
686
687
                if n[0]:
                    failstr += "%s (%s/%s)\n" % n[:3]
                    pass
                else:
                    failstr += "%s/%s\n" % n[1:3]
                    pass
                pass
688
            SENDMAIL(TBOPS, "Lease renewal(s) failed",
Kirk Webb's avatar
Kirk Webb committed
689
                     "Failed to renew lease on the following nodes:\n%s" %
690
                     failstr + "\n\nPlease check the plabrenew log", TBOPS)
Kirk Webb's avatar
Kirk Webb committed
691
            pass
Kirk Webb's avatar
Kirk Webb committed
692

693
694
695
        if failsoon:
            failstr = ""
            for n in failsoon:
Kirk Webb's avatar
Kirk Webb committed
696
697
698
699
700
701
702
703
                if n[0]:
                    failstr += "%s (%s/%s): expires: %s\n" % \
                               (n[:3] + (time.ctime(n[3]),))
                    pass
                else:
                    failstr += "%s/%s: expires: %s\n" % \
                               (n[1:3] + (time.ctime(n[3]),))
                    pass
704
705
706
            SENDMAIL(TBOPS, "WARNING: PLAB leases are about to expire",
                     "The following plab leases are about to expire:\n%s" %
                     failstr + "\n\nPlease look into it!", TBOPS)
Kirk Webb's avatar
Kirk Webb committed
707
708
709
710
            pass
        return
    
    pass # end class Plab
711

712
713
714
715
716
717

#
# Slice abstraction
#

class Slice:
718
    # XXX: maybe add slicename arg
719
720
721
    def __init__(self, plab, pid, eid):
        self.plab = plab
        self.pid, self.eid = pid, eid
722
        self.slicemeta = None
723
724
725
726
727
728
    
    def _create(self):
        """
        Creates a new slice that initially contains no nodes.  Don't call
        this directly, use Plab.createSlice instead.
        """
Kirk Webb's avatar
   
Kirk Webb committed
729
730
731
732
        res = DBQueryFatal("select idx from experiments "
                              "where pid=%s "
                              "and eid=%s",
                              (self.pid, self.eid))
Kirk Webb's avatar
   
Kirk Webb committed
733
        if not len(res):
Kirk Webb's avatar
   
Kirk Webb committed
734
            raise RuntimeError, "Didn't get any results while looking for idx"
Kirk Webb's avatar
   
Kirk Webb committed
735
        eindex = res[0][0]
Kirk Webb's avatar
   
Kirk Webb committed
736
        self.slicename = "%s_%s" % (SLICEPREFIX, eindex)
737
        
738
        print "Creating Plab slice %s." % self.slicename
Kirk Webb's avatar
Kirk Webb committed
739
        
740
        try:
Kirk Webb's avatar
Kirk Webb committed
741
742
743
744
            res, self.slicemeta, self.leaseend = \
                 self.plab.agent.createSlice(self)

            DBQueryFatal("insert into plab_slices"
Kirk Webb's avatar
Kirk Webb committed
745
                         " (pid, eid, slicename, slicemeta, leaseend)"
Kirk Webb's avatar
Kirk Webb committed
746
747
748
                         " values (%s, %s, %s, %s, FROM_UNIXTIME(%s))",
                         (self.pid, self.eid, self.slicename,
                          self.slicemeta, self.leaseend))
749
750
            pass
        except:
Kirk Webb's avatar
Kirk Webb committed
751
            self.plab.agent.deleteSlice(self)
752
753
            DBQueryFatal("delete from plab_slices where slicename=%s",
                         (self.slicename,))
754
755
            raise

756
757
        return res

758
759
760
761
    def _load(self):
        """
        Loads an already allocated slice from the DB.  Don't call this
        directly, use Plab.loadSlice instead.
762
763
764

        XXX This should probably be made lazy, since not all operations
        really need it
765
        """
Kirk Webb's avatar
Kirk Webb committed
766
        if verbose:
767
            print "Loading slice for pid/eid %s/%s" % (self.pid, self.eid)
768
        res = DBQueryFatal("select slicename, slicemeta from plab_slices"
769
770
771
772
773
774
                           " where pid = %s and eid = %s",
                           (self.pid, self.eid))
        assert (len(res) > 0), \
               "No slice found for %s-%s" % (self.pid, self.eid)
        assert (len(res) == 1), \
               "Multiple slices found for %s-%s" % (self.pid, self.eid)
775
776
        ((self.slicename, self.slicemeta), ) = res
        pass
777

Kirk Webb's avatar
Kirk Webb committed
778
779
780
781
782
783
784
785
786
787
788
789
    def renew(self):
        """
        Renews slice lease, if applicable to selected backend agent.
        """
        print "Renewing lease for slice %s" % self.slicename

        ret = 0
        try:
            res, self.slicemeta, self.leaseend = \
                 self.plab.agent.renewSlice(self)
            
            DBQueryFatal("replace into plab_slices"
Kirk Webb's avatar
Kirk Webb committed
790
                         " (pid, eid, slicename, slicemeta, leaseend)"
Kirk Webb's avatar
Kirk Webb committed
791
792
793
794
795
796
797
798
799
800
801
802
803
                         " values (%s, %s, %s, %s, FROM_UNIXTIME(%s))",
                         (self.pid, self.eid, self.slicename,
                          self.slicemeta, self.leaseend))
            ret = 1
            pass
        except:
            print "Slice renewal failed!"
            traceback.print_exc()
            ret = 0
            pass
        
        return ret        

804
805
806
807
808
809
810
    def destroy(self):
        """
        Frees all nodes in this slice and destroys the slice.  Note
        that this will really pound the DB if there are many nodes left
        in the slice, but those should be removed by Emulab before the
        slice is destroyed.
        """
811
        print "Destroying Plab slice %s." % self.slicename
812
813
814
815
816
817
818
819
        res = DBQueryFatal("select node_id from plab_slice_nodes"
                           " where slicename = %s",
                           (self.slicename))
        print "\tRemoving any remaining nodes in slice.."
        for (nodeid,) in res:
            node = self.loadNode(nodeid)
            node.free()
            del node  # Encourage the GC'er
820

Kirk Webb's avatar
   
Kirk Webb committed
821
        osigs = disable_sigs(TERMSIGS)
822

823
        try:
Kirk Webb's avatar
Kirk Webb committed
824
            self.plab.agent.deleteSlice(self)
825
826
827
828
829
830
            pass
        except:
            print "Failed to delete slice!"
            traceback.print_exc()
            pass
        
Kirk Webb's avatar
   
Kirk Webb committed
831
        try:
832
            print "\tRemoving slice DB entry."
Kirk Webb's avatar
   
Kirk Webb committed
833
834
835
836
837
838
            DBQueryFatal("delete from plab_slices where slicename = %s",
                         (self.slicename,))
        except:
            print "Error deleting slice from DB!"
            tbstr = "".join(traceback.format_exception(*sys.exc_info()))
            SENDMAIL(TBOPS, "Error deleting slice from DB",
839
                     "Slice deletion error:\n\n%s" % tbstr, TBOPS)
Kirk Webb's avatar
   
Kirk Webb committed
840
841
842
843
            enable_sigs(osigs)
            raise
        
        enable_sigs(osigs)
844

845
    def createNode(self, nodeid, force=False):
846
847
848
        """
        Node factory function
        """
849
850
851
852
853
854
855
856
        # XXX: KRW - The following is a hack to help me with testing.
        if not nodeid.startswith("plab"):
            IP = socket.gethostbyname(nodeid)
            qres = DBQueryFatal("select n.node_id from nodes as n left join "
                                "interfaces as i on n.node_id = i.node_id "
                                "where i.IP = %s", (IP,))
            assert (len(qres) > 0), "Node does not exist in DB: %s" % nodeid
            nodeid = qres[0][0] + "-20"
857
        node = Node(self, nodeid)
858
        node._create(force)
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
        return node

    def loadNode(self, nodeid):
        """
        Node factory function
        """
        node = Node(self, nodeid)
        node._load()
        return node

#
# Node abstraction
#

class Node:
Kirk Webb's avatar
Kirk Webb committed
874
    def __init__(self, slice, nodeid, pollNode = False):
875
        self.slice = slice
876
        self.nodeid = nodeid
877
878
        self.IP = self.__findIP()
        self.nodemeta = None
Kirk Webb's avatar
Kirk Webb committed
879
        self.pollNode = pollNode
Kirk Webb's avatar
Kirk Webb committed
880
        return
881

Kirk Webb's avatar
   
Kirk Webb committed
882
    # XXX: may want to rethink signal handling here.
883
    def _create(self, force=False):
884
885
886
887
888
889
        """
        Creates a new node.  This physically allocates the node into the
        slice through the dslice agent and node manager.  Note that no
        node setup is performed.  Don't call this directly, use
        Slice.createNode instead.
        """
Kirk Webb's avatar
   
Kirk Webb committed
890
891
892
893
894
895
896
        
        # First, make sure there isn't already an entry in the DB
        try:
            self._load()
        except:
            pass
        else:
897
898
899
900
901
902
            if force:
                print "Node entry exists in DB, but creation forced anyway."
            else:
                raise RuntimeError, "Entry for plab node %s already exists " \
                      "in the DB" % self.nodeid
            pass
Kirk Webb's avatar
   
Kirk Webb committed
903

904
905
        print "Creating Plab node %s on %s." % (self.nodeid, self.IP)
        res, self.nodemeta, self.leaselen = \
Kirk Webb's avatar
Kirk Webb committed
906
             self.slice.plab.agent.createNode(self)
Kirk Webb's avatar
   
Kirk Webb committed
907
        
908
909
910
        DBQueryFatal("replace into plab_slice_nodes"
                     " (pid, eid, slicename, node_id,"
                     " nodemeta, leaseend)"
911
                     " values (%s, %s, %s, %s, %s, %s)",
912
913
914
                     (self.slice.pid, self.slice.eid,
                      self.slice.slicename, self.nodeid,
                      self.nodemeta, self.leaselen))
Kirk Webb's avatar
   
Kirk Webb committed
915

Kirk Webb's avatar
Kirk Webb committed
916
917
918
919
920
921
922
923
924
925
926
927
928
929
        if self.pollNode:
            TIMESTAMP("Waiting for %s to respond" % self.nodeid)
            while True:
                try:
                    self.__perform("/bin/true")
                    pass
                except:
                    time.sleep(NODEPROBEINT)
                    pass
                else: break
                pass
            TIMESTAMP("Node %s ready." % self.nodeid)
            pass        

930
        TIMESTAMP("createnode finished on %s." % self.nodeid)
931
        return
932
933
934
935
936
937

    def _load(self):
        """
        Loads an already allocated node from the DB.  Don't call this
        directly, use Slice.loadNode instead.
        """
Kirk Webb's avatar
Kirk Webb committed
938
        if verbose:
939
            print "Loading node %s" % self.nodeid
940
        res = DBQueryFatal("select slicename, nodemeta"
941
942
943
944
945
946
947
                           " from plab_slice_nodes where node_id = %s",
                           (self.nodeid))
        assert (len(res) > 0), \
               "Node %s (slice %s) not found" % \
               (self.nodeid, self.slice.slicename)
        assert (len(res) == 1), \
               "Multiple nodes found for nodeid %s" % self.nodeid
948
        ((slicename, self.nodemeta), ) = res
949
950
951
        assert (slicename == self.slice.slicename), \
               "Node %s loaded by slice %s, but claims to be in slice %s" % \
               (self.nodeid, self.slice.slicename, slicename)
Kirk Webb's avatar
   
Kirk Webb committed
952

953
954
    def free(self):
        """
955
        Frees the node and kills the VM.  Note that this does not
Kirk Webb's avatar
   
Kirk Webb committed
956
957
958
        shutdown anything inside the vserver.  Warning: forks a process
        to carry out the actual work!
        """
959
960
961
962
        res = ForkCmd(self._free, timeout=FREE_TIMEOUT,
                      disable_sigs_parent=TERMSIGS,
                      disable_sigs_child=TERMSIGS)
        return res[0] | res[1]
Kirk Webb's avatar
   
Kirk Webb committed
963
964
        
    def _free(self):
965
        """
Kirk Webb's avatar
   
Kirk Webb committed
966
967
968
969
970
        Frees the node and kills the VM.  Note that this does not
        shutdown anything inside the vserver.  Don't call this directly;
        instead, use Node.free()
        """
        deleted = 0
971
        TIMESTAMP("freenode %s started." % self.nodeid)
972
        print "Freeing Plab node %s." % self.nodeid
Kirk Webb's avatar
   
Kirk Webb committed
973
974
975
976
977
978
979
980
981
982
        
        # Remove the DB entry first.
        try:
            DBQueryFatal("delete from plab_slice_nodes where node_id = %s",
                         (self.nodeid,))
        except:
            print "Uh oh, couldn't remove plab sliver record from the DB!"
            tbstr = "".join(traceback.format_exception(*sys.exc_info()))
            SENDMAIL(TBOPS, "Error: Couldn't remove plab vnode from DB",
                     "Unable to delete entry for sliver %s from the DB:"
983
                     "\n\n%s" % (self.nodeid, tbstr), TBOPS)
984
            pass
Kirk Webb's avatar
   
Kirk Webb committed
985

986
        deleted = self.slice.plab.agent.freeNode(self)
987
        
988
        TIMESTAMP("freenode %s finished." % self.nodeid)
989
        return not deleted
990

991
    def renew(self):
Kirk Webb's avatar
   
Kirk Webb committed
992
993
994
995
996
        """
        Renew the lease for this node.  Note that this method
        forks and runs another private method to actually do the
        work!
        """
997
        res = ForkCmd(self._renew, timeout = RENEW_TIMEOUT,
998
                      disable_sigs_parent = TERMSIGS)
Kirk Webb's avatar
Kirk Webb committed
999
        return not (res[0] | res[1])
Kirk Webb's avatar
   
Kirk Webb committed
1000

For faster browsing, not all history is shown. View entire blame