Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
emulab
emulab-devel
Commits
4f59bce7
Commit
4f59bce7
authored
Jan 27, 2016
by
Leigh B Stoller
Browse files
Rework how we update the webtasks for status and imaging, since we now
update from two places (here, and from the apt event daemon).
parent
bbeabb89
Changes
5
Hide whitespace changes
Inline
Side-by-side
apt/APT_Instance.pm.in
View file @
4f59bce7
...
...
@@ -163,6 +163,16 @@ sub DESTROY {
$
self
->{
'HASH'
}
=
undef
;
}
#
#
Flush
from
our
little
cache
,
as
for
the
expire
daemon
.
#
sub
Flush
($)
{
my
($
self
)
=
@
_
;
delete
($
instances
{$
self
->
uuid
()});
}
#
#
Refresh
a
class
instance
by
reloading
from
the
DB
.
#
...
...
@@ -879,6 +889,54 @@ sub GetSSHKeys($$)
return
0
;
}
#
#
Update
the
image
status
in
the
webtask
for
the
instance
.
This
is
not
#
clean
at
all
,
need
a
better
way
to
do
this
.
#
sub
UpdateImageStatus
($$)
{
my
($
self
,
$
details
)
=
@
_
;
DBQueryWarn
(
"lock tables web_tasks write, apt_instances write, "
.
" apt_instance_aggregates write"
)
or
return
{};
$
self
->
Refresh
();
if
($
self
->
status
()
ne
"imaging"
)
{
goto
done
;
}
my
$
webtask
=
WebTask
->
LookupByObject
($
self
->
uuid
());
if
(
!defined($webtask)) {
goto
done
;
}
#
#
This
will
need
to
change
;
we
can
get
updates
from
polling
or
#
from
the
event
stream
.
The
events
are
processed
out
of
band
from
#
from
the
polling
,
so
we
have
a
consistency
problem
.
In
addition
,
#
the
event
stream
is
sending
status
for
just
a
single
node
since
#
events
are
bounded
in
size
.
#
#
In
other
words
,
the
new
image
status
has
to
be
merged
into
the
#
existing
status
.
We
have
use
some
kind
of
lock
to
avoid
scrambling
#
the
json
data
,
and
for
now
it
is
a
table
lock
.
#
#
Also
,
the
blob
has
a
timestamp
in
it
,
so
we
can
sorta
tell
which
#
is
most
recent
(
not
perfect
,
but
does
not
really
need
to
be
).
#
if
(
defined
($
webtask
->
image_stamp
())
&&
defined
($
details
->{
'utc'
})
&&
$
details
->{
'utc'
}
<
$
webtask
->
image_stamp
())
{
goto
done
;
}
$
webtask
->
image_size
($
details
->{
'size'
});
$
webtask
->
image_status
($
details
->{
'status'
});
$
webtask
->
image_stamp
($
details
->{
'utc'
});
$
webtask
->
Store
();
done
:
DBQueryWarn
(
"unlock tables"
);
return
0
;
}
###################################################################
package
APT_Instance
::
Aggregate
;
use
emdb
;
...
...
@@ -1201,6 +1259,52 @@ sub GetGeniAuthority($)
return
APT_Geni
::
GetAuthority
($
self
->
aggregate_urn
());
}
#
#
Update
the
sliverstatus
in
the
webtask
.
#
sub
UpdateWebStatus
($$)
{
my
($
self
,
$
hash
)
=
@
_
;
DBQueryWarn
(
"lock tables web_tasks write"
)
or
return
{};
$
self
->
webtask
()->
Refresh
();
my
$
current
=
$
self
->
webtask
()->
sliverstatus
();
if
(
!defined($current)) {
$
current
=
{};
}
#
#
This
will
need
to
change
;
we
can
get
updates
from
polling
or
#
from
the
event
stream
.
The
events
are
processed
out
of
band
from
#
from
the
polling
,
so
we
have
a
consistency
problem
.
In
addition
,
#
the
event
stream
is
sending
status
for
just
a
single
node
since
#
events
are
bounded
in
size
.
#
#
In
other
words
,
the
new
node
state
has
to
be
merged
into
the
#
existing
status
.
We
have
use
some
kind
of
lock
to
avoid
scrambling
#
the
json
data
,
and
for
now
it
is
a
table
lock
.
#
#
Also
,
the
blob
has
a
timestamp
in
it
,
so
we
can
sorta
tell
which
#
is
most
recent
(
not
perfect
,
but
does
not
really
need
to
be
).
#
foreach
my
$
urn
(
keys
(%{
$
hash
}))
{
my
$
details
=
$
hash
->{$
urn
};
my
$
node_id
=
$
details
->{
'client_id'
};
if
(
!exists($current->{$node_id}) ||
!exists($current->{$node_id}->{"utc"}) ||
!exists($details->{"utc"}) ||
$
details
->{
"utc"
}
>=
$
current
->{$
node_id
}->{
"utc"
})
{
$
current
->{$
node_id
}
=
$
details
;
}
}
$
self
->
webtask
()->
sliverstatus
($
current
);
DBQueryWarn
(
"unlock tables"
);
return
$
current
;
}
#
#
Ask
aggregate
to
terminate
a
sliver
.
#
...
...
apt/create_instance.in
View file @
4f59bce7
...
...
@@ -908,12 +908,13 @@ sub WaitForSliver($)
# cares about. We get this on each loop, update so the web
# interface can show changes.
#
my
$statusblob
=
$aggobj
->
UpdateWebStatus
(
$repblob
->
{'
details
'});
my
$changed
=
0
;
my
$statusblob
=
{};
foreach
my
$urn
(
keys
(
%
{
$repblob
->
{'
details
'}}))
{
my
$details
=
$repblob
->
{'
details
'}
->
{
$urn
};
my
$node_id
=
$details
->
{'
client_id
'};
$statusblob
->
{
$node_id
}
=
$details
;
#
# Look at the last blob. If we changed, view that as progress.
#
...
...
@@ -944,7 +945,6 @@ sub WaitForSliver($)
}
}
}
$webtask
->
sliverstatus
(
$statusblob
);
$laststatus
=
$statusblob
;
if
(
exists
(
$repblob
->
{'
public_url
'}))
{
...
...
@@ -1006,9 +1006,6 @@ if (ParRun({"maxwaittime" => 99999, "maxchildren" => scalar(@aggregate_list)},
}
print
"
$slice_urn
\n
";
# Count up nodes running a startup service.
my
$startuprunning
=
0
;
#
# If we were canceled, then none of the stuff below matters, we
# are going to do a terminate.
...
...
@@ -1021,6 +1018,9 @@ if ($instance->IsCanceled()) {
exit
(
0
);
}
# Count up nodes running a startup service.
my
$startuprunning
=
0
;
#
# Check the exit codes; any failure is a total failure (for now).
#
...
...
apt/manage_instance.in
View file @
4f59bce7
...
...
@@ -459,8 +459,14 @@ sub DoSnapshot()
$webtask
=
WebTask
->
LookupOrCreate
(
$instance
->
uuid
(),
$webtask_id
);
# Convenient.
$webtask
->
AutoStore
(
1
);
# This is convenience for the web server.
if
(
defined
(
$webtask
))
{
$webtask
->
aggregate_urn
(
$aggregate
->
aggregate_urn
());
$webtask
->
client_id
(
$node_id
);
}
}
$instance
->
SetStatus
("
imaging
");
$aggregate
->
SetStatus
("
imaging
");
#
# This returns pretty fast, and then the imaging takes place in
...
...
@@ -472,6 +478,7 @@ sub DoSnapshot()
if
(
!
defined
(
$response
))
{
$errmsg
=
"
Internal error creating image
";
$instance
->
SetStatus
(
$old_status
);
$aggregate
->
SetStatus
(
$old_status
);
goto
uerror
;
}
if
(
$response
->
code
()
!=
GENIRESPONSE_SUCCESS
)
{
...
...
@@ -481,6 +488,7 @@ sub DoSnapshot()
$response
->
code
()
==
GENIRESPONSE_SERVER_UNAVAILABLE
||
$response
->
code
()
==
GENIRESPONSE_FORBIDDEN
);
$instance
->
SetStatus
(
$old_status
);
$aggregate
->
SetStatus
(
$old_status
);
goto
uerror
;
}
my
(
$image_urn
,
$image_url
,
...
...
@@ -541,7 +549,7 @@ sub DoSnapshot()
# Poll for a reasonable amount of time.
#
my
$seconds
=
1500
;
my
$interval
=
1
0
;
my
$interval
=
1
5
;
my
$ready
=
0
;
my
$sliver_ready
=
0
;
my
$failed
=
0
;
...
...
@@ -565,25 +573,8 @@ sub DoSnapshot()
$response
->
code
()
==
GENIRESPONSE_RPCERROR
);
my
$blob
=
$response
->
value
();
if
(
defined
(
$webtask
))
{
# Special for imaging status display
foreach
my
$urn
(
keys
(
%
{
$blob
->
{'
details
'}}))
{
my
$details
=
$blob
->
{'
details
'}
->
{
$urn
};
if
(
$urn
eq
$sliver_urn
)
{
$webtask
->
state
(
$details
->
{'
state
'});
$webtask
->
rawstate
(
$details
->
{'
rawstate
'});
}
}
}
# This is the per-aggregate status, we always set this for web UI.
my
$statusblob
=
{};
foreach
my
$urn
(
keys
(
%
{
$blob
->
{'
details
'}}))
{
my
$details
=
$blob
->
{'
details
'}
->
{
$urn
};
my
$node_id
=
$details
->
{'
client_id
'};
$statusblob
->
{
$node_id
}
=
$details
;
}
$aggregate
->
webtask
()
->
sliverstatus
(
$statusblob
);
$aggregate
->
UpdateWebStatus
(
$blob
->
{'
details
'});
if
(
$blob
->
{'
status
'}
eq
"
failed
")
{
$failed
=
1
;
...
...
@@ -610,34 +601,34 @@ sub DoSnapshot()
$response
->
code
()
==
GENIRESPONSE_SERVER_UNAVAILABLE
||
$response
->
code
()
==
GENIRESPONSE_RPCERROR
);
$
blob
=
$response
->
value
();
my
$image
blob
=
$response
->
value
();
if
(
defined
(
$webtask
))
{
$webtask
->
image_size
(
$blob
->
{'
size
'})
if
(
exists
(
$blob
->
{'
size
'}));
if
(
exists
(
$blob
->
{'
status
'}))
{
#
# If the image is ready, but needs to be copied back to
# its origin, hold of ready till later. We will wait for
# the copyback to finish, see below.
#
if
(
defined
(
$copyback_uuid
))
{
$webtask
->
image_status
("
copying
");
}
else
{
$webtask
->
image_status
(
$blob
->
{'
status
'});
}
my
%blobcopy
=
%
{
$imageblob
};
#
# If the image is ready, but needs to be copied back to
# its origin, hold of ready till later. We will wait for
# the copyback to finish, see below.
#
if
(
$imageblob
->
{'
status
'}
eq
"
ready
"
&&
defined
(
$copyback_uuid
))
{
$blobcopy
{'
status
'}
=
"
copying
";
}
# This is also being updated by the event system.
$instance
->
UpdateImageStatus
(
\
%blobcopy
);
}
if
(
$blob
->
{'
status
'}
eq
"
ready
")
{
if
(
$
image
blob
->
{'
status
'}
eq
"
ready
")
{
$ready
=
1
;
last
;
}
elsif
(
$blob
->
{'
status
'}
eq
"
failed
")
{
elsif
(
$
image
blob
->
{'
status
'}
eq
"
failed
")
{
$failed
=
1
;
last
;
}
}
# Cause of image status events.
$webtask
->
Refresh
()
if
(
defined
(
$webtask
));
if
(
$failed
)
{
$errmsg
=
"
Imaging failed
"
if
(
!
defined
(
$errmsg
));
...
...
@@ -675,6 +666,7 @@ sub DoSnapshot()
(
$update_profile
eq
"
all
"
?
1
:
0
));
}
$instance
->
SetStatus
("
ready
");
$aggregate
->
SetStatus
("
ready
");
#
# If there is a copyback_uuid, we want to wait for that to finish.
...
...
@@ -743,6 +735,7 @@ sub DoSnapshot()
StartMonitorInternal();
}
$instance
->SetStatus(
"
ready
"
);
$aggregate
->SetStatus(
"
ready
"
);
if (defined(
$logfile
)) {
SENDMAIL(
$TBOPS
,
"
Snapshot
failed
"
,
...
...
@@ -1345,17 +1338,8 @@ sub DoRefresh()
elsif (
$blob
->{'status'} eq
"
failed
"
) {
$sliver
->SetStatus(
"
failed
"
);
}
#
# Convert to something smaller, with info the web interface
# cares about.
#
my
$statusblob
= {};
foreach my
$urn
(keys(%{
$blob
->{'details'}})) {
my
$details
=
$blob
->{'details'}->{
$urn
};
my
$node_id
=
$details
->{'client_id'};
$statusblob
->{
$node_id
} =
$details
;
}
$webtask
->sliverstatus(
$statusblob
);
# This is the per-aggregate status, we always set this for web UI.
my
$statusblob
=
$sliver
->UpdateWebStatus(
$blob
->{'details'});
if (
$debug
) {
print STDERR Dumper(
$statusblob
);
}
...
...
@@ -1678,7 +1662,7 @@ sub StartMonitorInternal(;$)
# another node right away. For reboot/reload, nothing interesting
# is going to be reported for a while.
#
sleep(
15
);
sleep(
30
);
my $seconds = ($waitforstartup ? 7200 : 900);
my $interval = 15;
...
...
@@ -1718,27 +1702,22 @@ sub StartMonitorInternal(;$)
}
my $blob = $response->value();
#
# Convert to something smaller, with info the web interface
# cares about.
#
my $statusblob = {};
my $executing = 0;
foreach my $urn (keys(%{$blob->{
'
details
'
}})) {
my $details = $blob->{
'
details
'
}->{$urn};
my $node_id = $details->{
'
client_id
'
};
$statusblob->{$node_id} = $details;
# Startup command is still running.
$executing++
if (exists($details->{
'
execute_state
'
}) &&
$details->{
'
execute_state
'
} ne "exited");
}
# This is the per-aggregate status, we always set this for web UI.
my $statusblob = $sliver->UpdateWebStatus($blob->{
'
details
'
});
if ($debug) {
print STDERR Dumper($statusblob);
}
$webtask->sliverstatus($statusblob);
# Look for nodes still executing
my $executing = 0;
if ($waitforstartup) {
foreach my $node_id (keys(${$statusblob})) {
my $details = $statusblob->{
'
node_id
'
};
$executing++
if (exists($details->{
'
execute_state
'
}) &&
$details->{
'
execute_state
'
} ne "exited");
}
}
#
# We poll until the status goes ready. Might not be a good idea.
#
...
...
www/aptui/manage_profile.ajax
View file @
4f59bce7
<?php
#
# Copyright (c) 2000-201
5
University of Utah and the Flux Group.
# Copyright (c) 2000-201
6
University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
...
...
@@ -59,14 +59,6 @@ function Do_CloneStatus()
$taskdata
=
$webtask
->
TaskData
();
$blob
=
array
();
if
(
$webtask
->
exited
())
{
# Success, but not sure what to report. Come back to this later.
$blob
[
"exited"
]
=
$webtask
->
exited
();
$blob
[
"exitcode"
]
=
$webtask
->
exitcode
();
if
(
isset
(
$taskdata
[
"image_name"
]))
{
$blob
[
"image_name"
]
=
$taskdata
[
"image_name"
];
}
}
#
# Size is in KB to avoid bigint problems. But kill the KB.
#
...
...
@@ -79,9 +71,35 @@ function Do_CloneStatus()
else
{
$blob
[
"image_size"
]
=
0
;
}
$blob
[
"node_status"
]
=
$taskdata
[
"rawstate"
];
$blob
[
"image_status"
]
=
$taskdata
[
"image_status"
];
#
# Lets put the node status in too. The backend has helpfully told us
# the aggregate and node to track down the status.
#
if
(
isset
(
$taskdata
[
"aggregate_urn"
])
&&
isset
(
$taskdata
[
"client_id"
]))
{
$sliver
=
InstanceSliver
::
Lookup
(
$instance
,
$taskdata
[
"aggregate_urn"
]);
if
(
$sliver
)
{
$slwebtask
=
WebTask
::
Lookup
(
$sliver
->
webtask_id
());
$sliverstatus
=
$slwebtask
->
TaskValue
(
"sliverstatus"
);
if
(
$sliverstatus
)
{
foreach
(
$sliverstatus
as
$node_id
=>
$node_status
)
{
if
(
$node_id
==
$taskdata
[
"client_id"
])
{
$blob
[
"node_status"
]
=
$node_status
[
"rawstate"
];
break
;
}
}
}
}
}
if
(
$webtask
->
exited
())
{
# Success, but not sure what to report. Come back to this later.
$blob
[
"exited"
]
=
$webtask
->
exited
();
$blob
[
"exitcode"
]
=
$webtask
->
exitcode
();
if
(
isset
(
$taskdata
[
"image_name"
]))
{
$blob
[
"image_name"
]
=
$taskdata
[
"image_name"
];
}
}
SPITAJAX_RESPONSE
(
$blob
);
}
...
...
www/aptui/status.ajax
View file @
4f59bce7
...
...
@@ -886,11 +886,30 @@ function Do_SnapshotStatus()
else
{
$blob
[
"image_size"
]
=
0
;
}
$blob
[
"node_status"
]
=
$taskdata
[
"rawstate"
];
$blob
[
"image_status"
]
=
$taskdata
[
"image_status"
];
if
(
isset
(
$taskdata
[
"copyback_uuid"
]))
{
$blob
[
"copyback_uuid"
]
=
$taskdata
[
"copyback_uuid"
];
}
$blob
[
"image_status"
]
=
$taskdata
[
"image_status"
];
#
# Lets put the node status in too. The backend has helpfully told us
# the aggregate and node to track down the status.
#
if
(
isset
(
$taskdata
[
"aggregate_urn"
])
&&
isset
(
$taskdata
[
"client_id"
]))
{
$sliver
=
InstanceSliver
::
Lookup
(
$instance
,
$taskdata
[
"aggregate_urn"
]);
if
(
$sliver
)
{
$slwebtask
=
WebTask
::
Lookup
(
$sliver
->
webtask_id
());
$sliverstatus
=
$slwebtask
->
TaskValue
(
"sliverstatus"
);
if
(
$sliverstatus
)
{
foreach
(
$sliverstatus
as
$node_id
=>
$node_status
)
{
if
(
$node_id
==
$taskdata
[
"client_id"
])
{
$blob
[
"node_status"
]
=
$node_status
[
"rawstate"
];
break
;
}
}
}
}
}
if
(
$webtask
->
exited
())
{
# Success, but not sure what to report. Come back to this later.
$blob
[
"exited"
]
=
$webtask
->
exited
();
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment