From 1709fd35e764c82bcf18777bc47155c5feb1604c Mon Sep 17 00:00:00 2001 From: "David M. Johnson" Date: Thu, 21 Apr 2016 12:35:54 -0600 Subject: [PATCH] Fix up host-evacuate for delete-nodes; add disfunctional migrate option. (I'm not sure why migrate isn't working... the nodes try to migrate but don't complete the journey :).) --- setup-controller-delete-nodes.sh | 95 ++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 16 deletions(-) diff --git a/setup-controller-delete-nodes.sh b/setup-controller-delete-nodes.sh index 79049c8..853f833 100755 --- a/setup-controller-delete-nodes.sh +++ b/setup-controller-delete-nodes.sh @@ -17,6 +17,12 @@ if [ "$HOSTNAME" != "$CONTROLLER" ]; then exit 0; fi +DOMIGRATE=0 +if [ "$1" = '-m' ]; then + DOMIGRATE=1 + shift +fi + OLDNODES="$@" if [ -z "$OLDNODES" ] ; then @@ -24,6 +30,9 @@ if [ -z "$OLDNODES" ] ; then exit 1 fi +. $SETTINGS +. $OURDIR/admin-openrc.sh + # # For now, we just do the stupid thing and "evacuate" the VMs from the # old hypervisor to a new hypervisor. Let Openstack pick for now... @@ -31,18 +40,41 @@ fi # To do this, we disable the compute service, force it down, and # evacuate! # +VALIDNODES="" for node in $OLDNODES ; do echo "*** Forcing compute service on $node down and disabling it ..." fqdn=`getfqdn $node` - id=`nova service-list | awk "/ $fqdn / { print \\$2 }"` - nova service-disable $fqdn nova-compute - # Hm, this only supported in some versions, so... - nova service-force-down $fqdn nova-compute - # ... do this too, to make sure the service doesn't come up - $SSH $fqdn service nova-compute stop - echo "update services set updated_at=NULL where id=$id" \ - | mysql -u nova --password=${NOVA_DBPASS} nova + if [ -z "$fqdn" ]; then + echo "ERROR: could not get FQDN for node $node; skipping!" + continue + fi + #id=`nova service-list | awk -v IGNORECASE=1 "/ $fqdn / { print \\$2 }"` + id=`nova service-list | grep -i $fqdn | awk '// { print $2 }'` + if [ -z "$id" ]; then + echo "ERROR: could not get service id for nova-compute on node $node; skipping! ($id)" + + nova service-list + continue + fi + + VALIDNODES="$VALIDNODES $node" + + if [ $DOMIGRATE -eq 0 ]; then + nova service-disable $fqdn nova-compute + # Hm, this only supported in some versions, so... + nova service-force-down $fqdn nova-compute + # REALLY force it down :) + echo "update services set forced_down=1 where id=$id" \ + | mysql -u nova --password=${NOVA_DBPASS} nova + + # ... do this too, to make sure the service doesn't come up + # no, don't do it, we can't host-evacuate without it + #$SSH $fqdn service nova-compute stop + + echo "update services set updated_at=NULL where id=$id" \ + | mysql -u nova --password=${NOVA_DBPASS} nova + fi done # @@ -51,11 +83,33 @@ done # evacuated VMs). # fqdnlist="" -for node in $OLDNODES ; do +for node in $VALIDNODES ; do echo "*** Evacuating all instances from $node ..." fqdn=`getfqdn $node` + if [ -z "$fqdn" ]; then + echo "ERROR: could not get FQDN for node $node; skipping!" + continue + fi - nova host-evacuate $fqdn + if [ $DOMIGRATE -eq 1 ]; then + servers=`nova hypervisor-servers $fqdn | grep -i $fqdn | awk '// { print $2 }' | xargs` + for server in $servers ; do + nova migrate --poll $server + VM_OUTPUT=`nova show $server` + VM_STATUS=`echo "$VM_OUTPUT" | grep status | awk '{print $4}'` + while [ "$VM_STATUS" != "VERIFY_RESIZE" ]; do + echo -n "." + sleep 2 + VM_OUTPUT=`nova show $server` + VM_STATUS=`echo "$VM_OUTPUT" | grep status | awk '{print $4}'` + done + nova resize-confirm $server + echo "$server instance migrated and resized." + echo; + done + else + nova host-evacuate $fqdn + fi # Create a list for the next step so we don't have to keep resolving # FQDNs @@ -66,12 +120,12 @@ done # Ok, now we want to wait until all those nodes no longer have instances # on them. # -sucess=0 -while [ $success -ne 1 ]; do +success=0 +while [ ! $success -eq 1 ]; do success=1 for fqdn in $fqdnlist ; do sleep 8 - count=`nova hypervisor-servers $fqdn | awk '/ instance-.* / { print $2 }' | wc -l` + count=`nova hypervisor-servers $fqdn | awk -v IGNORECASE=1 '/ instance-.* / { print $2 }' | wc -l` if [ $count -gt 0 ]; then success=0 echo "*** $fqdn still has $count instances" @@ -79,12 +133,21 @@ while [ $success -ne 1 ]; do done done -for node in $OLDNODES ; do +for node in $VALIDNODES ; do echo "*** Deleting compute service on $node ..." fqdn=`getfqdn $node` - id=`nova service-list | awk "/ $fqdn / { print \\$2 }"` + if [ -z "$fqdn" ]; then + echo "ERROR: could not get FQDN for node $node; skipping!" + continue + fi + #id=`nova service-list | awk -v IGNORECASE=1 "/ $fqdn / { print \\$2 }"` + id=`nova service-list | grep -i $fqdn | awk '// { print $2 }'` + if [ -z "$id" ]; then + echo "ERROR: could not get service id for nova-compute on node $node; skipping!" + continue + fi nova service-delete $id done -echo "*** Evacuated and deleted nodes $OLDNODES !" +echo "*** Successfully evacuated and deleted nodes $OLDNODES !" exit 0 -- GitLab