Merge branch 'master' of ssh://git.fedorahosted.org/git/opus

ehelms · ehelms · commit 97b4c89d449f · 2010-05-24T10:46:39.000-04:00
diff --git a/admin.py b/admin.py
@@ -3,5 +3,11 @@
 
 import models
 
+class ApplicationAdmin(admin.ModelAdmin):
+    fieldsets = [
+        (None, {'fields': ['name', 'image_id', 'path', 'icon_url', 'ssh_key']}),
+        ('Advanced', {'fields': ['max_concurrent_instances', 'users_per_small', 'cluster_headroom', 'scale_interarrival'], 'classes': ['collapse']}),
+    ]
+
 admin.site.register(models.Instance)
-admin.site.register(models.Application)
+admin.site.register(models.Application, ApplicationAdmin)
diff --git a/models.py b/models.py
@@ -1,3 +1,5 @@
+from datetime import datetime
+
 from django.db import models
 from django.db.models import signals
 from django.contrib.auth.models import Permission, User
@@ -10,11 +12,18 @@ class Application(models.Model):
     name = models.CharField(max_length=64) # Pretty name of the application
     image_id = models.CharField(max_length=32, unique=True) # Image id of the image that the actual application lies on
     path = models.CharField(max_length=256,blank=True) # Path of the application to be run on the host
-    max_concurrent_instances = models.IntegerField()
-    users_per_small = models.IntegerField()
-    cluster_headroom = models.IntegerField()
+    max_concurrent_instances = models.IntegerField(default=0)
+    users_per_small = models.IntegerField(default=10)
+    cluster_headroom = models.IntegerField(default=0)
     icon_url = models.URLField()
     ssh_key = models.FileField("SSH Key", upload_to='vdi/sshkeys')
+    scale_interarrival = models.IntegerField(default=180)  # The interarrival time of the scale function running
+    to_be_run_at = models.DateTimeField(auto_now_add=True)
+
+    def is_time_to_run(self, last_run_at):
+        now = datetime.now()
+        return False
+        #if to_be_run_at > now
 
     def __str__(self):
         return self.name
diff --git a/tasks.py b/tasks.py
@@ -17,98 +17,106 @@
 log = core.log.getLogger()
 from django.http import HttpResponse, HttpResponseRedirect
 
-class Scale(PeriodicTask):
-    # By default this task runs every three minutes
-    run_every = timedelta(seconds=30)
-
-    def run(self):
-        for app in Application.objects.all():
-            # Create the cluster object to help us manage the cluster
-            cluster = AppCluster(app.pk)
-
-            # Clean up all idle users on all nodes for this application cluster
-            log.debug('APP NAME %s'%app.name)
-            cluster.logout_idle_users()
-
-            log.debug("Checking for active clusters")
-            for host in cluster.active:
-                log.debug("Found active host")
-                an = AppNode(host)
-                user_experience_tools.process_user_connections(an)
-
-            # Handle vms we were waiting on to boot up
-            booting = deltacloud_tools.get_instances(cluster.booting)
-            for vm in booting:
-                dns_name = vm.public_addresses[0]
-                log.debug('ASDF = %s' % dns_name)
-                if dns_name.find("amazonaws.com") > -1:
-                    # Resolve the domain name into an IP address
-                    # This adds a dependancy on the 'host' command
-                    output = Popen(["host", dns_name], stdout=PIPE).communicate()[0]
-                    ip = '.'.join(re.findall('(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', output)[0])
-                    try:
-                        # TODO: remove the hard coded '3389' & '22' below.  '3389' is for RDP and '22' is for SSH
-                        # TODO: remove the arbitrary '3' second timeout below
-                        socket.create_connection((ip,3389),3)
-                        socket.create_connection((ip,22),3)
-                    except Exception as e:
-                        log.debug("Server %s is not yet available" % ip)
-                        pass
-                    else:
-                        instance = Instance.objects.filter(instanceId=vm.id)[0]
-                        booting.remove(vm)
-                        instance.ip = ip
-                        instance.state = 2
-                        instance.save()
-                        log.debug("Moving instance %s into enabled state with ip %s" % (instance.instanceId,ip))
-            num_booting = len(booting)
-            if num_booting > 0:
-                log.debug("Application cluster '%s' is still waiting for %s cluster nodes to boot" % (cluster.name,len(booting)))
-
-
-            # Consider if the cluster needs to be scaled
-            log.debug('Considering %s app cluster for scaling ...' % cluster.name)
-            # Should I scale up?
-            log.debug('%s is avail (%s) < req (%s)?' % (cluster.app.name, cluster.avail_headroom, cluster.req_headroom))
-            if cluster.avail_headroom < cluster.req_headroom:
-                # Yes I should scale up
-                space_needed = cluster.req_headroom - cluster.avail_headroom
-                servers_needed = int(math.ceil(space_needed / float(cluster.app.users_per_small)))
-                log.debug('Available headroom (%s) is less than the cluster headroom goal (%s).  Starting %s additional cluster nodes now' % (cluster.avail_headroom,cluster.req_headroom,servers_needed))
-                for i in range(servers_needed):
-                    cluster.start_node()
-
-
-            # Handle instances we are supposed to shut down
-            toTerminate = []
-            for host in cluster.shutting_down:
-                log.debug('ASDASDASD    %s' % host.instanceId)
+class ScaleScheduler(PeriodicTask):
+
+    run_every = timedelta(seconds=5)  # Used for tick frequency
+
+    def is_due(self, last_run_at):
+        for app in Application.objects.exclude(to_be_run_at__gt=datetime.now()):
+            app.to_be_run_at = datetime.now() + timedelta(seconds=app.scale_interarrival)
+            app.save()
+            Scale.delay(app)
+        return (False, self.timedelta_seconds(self.run_every))
+
+tasks.register(ScaleScheduler)
+
+class Scale(Task):
+
+    def run(self, app):
+        # Create the cluster object to help us manage the cluster
+        cluster = AppCluster(app.pk)
+
+        # Clean up all idle users on all nodes for this application cluster
+        log.debug('APP NAME %s'%app.name)
+        cluster.logout_idle_users()
+
+        log.debug("Checking for active clusters")
+        for host in cluster.active:
+            log.debug("Found active host")
+            an = AppNode(host)
+            user_experience_tools.process_user_connections(an)
+
+        # Handle vms we were waiting on to boot up
+        booting = deltacloud_tools.get_instances(cluster.booting)
+        for vm in booting:
+            dns_name = vm.public_addresses[0]
+            log.debug('ASDF = %s' % dns_name)
+            if dns_name.find("amazonaws.com") > -1:
+                # Resolve the domain name into an IP address
+                # This adds a dependancy on the 'host' command
+                output = Popen(["host", dns_name], stdout=PIPE).communicate()[0]
+                ip = '.'.join(re.findall('(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', output)[0])
                 try:
-                    n = AppNode(host)
-                    log.debug('AppNode %s is waiting to be shut down and has %s connections' % (host.ip,n.sessions))
-                    if n.sessions == []:
-                        toTerminate.append(host)
-                        host.shutdownDateTime = datetime.now()
-                        host.save()
-                except HostNotConnectableError:
-                    # Ignore this host that doesn't seem to be ssh'able, but log it as an error
-                    log.warning('AppNode %s is NOT sshable and should be looked into.  It is currently waiting to shutdown')
-            deltacloud_tools.terminate_instances(toTerminate)
-
-
-            # Should I scale down?
-            overprov_num = cluster.avail_headroom - cluster.req_headroom
-            log.debug('overprov (%s) avail (%s) required(%s)' % (overprov_num,cluster.avail_headroom,cluster.req_headroom))
-            # Reverse the list to try to remove the servers at the end of the waterfall
-            inuse_reverse = cluster.inuse_map
-            inuse_reverse.reverse()
-            for (host,inuse) in inuse_reverse:
-                # The node must have 0 sessions and the cluster must be able to be smaller while still leaving enough headroom
-                if int(inuse) == 0 and overprov_num >= cluster.app.users_per_small:
-                    overprov_num = overprov_num - cluster.app.users_per_small
-                    host.state = 4
+                    # TODO: remove the hard coded '3389' & '22' below.  '3389' is for RDP and '22' is for SSH
+                    # TODO: remove the arbitrary '3' second timeout below
+                    socket.create_connection((ip,3389),3)
+                    socket.create_connection((ip,22),3)
+                except Exception as e:
+                    log.debug("Server %s is not yet available" % ip)
+                    pass
+                else:
+                    instance = Instance.objects.filter(instanceId=vm.id)[0]
+                    booting.remove(vm)
+                    instance.ip = ip
+                    instance.state = 2
+                    instance.save()
+                    log.debug("Moving instance %s into enabled state with ip %s" % (instance.instanceId,ip))
+        num_booting = len(booting)
+        if num_booting > 0:
+            log.debug("Application cluster '%s' is still waiting for %s cluster nodes to boot" % (cluster.name,len(booting)))
+
+
+        # Consider if the cluster needs to be scaled
+        log.debug('Considering %s app cluster for scaling ...' % cluster.name)
+        # Should I scale up?
+        log.debug('%s is avail (%s) < req (%s)?' % (cluster.app.name, cluster.avail_headroom, cluster.req_headroom))
+        if cluster.avail_headroom < cluster.req_headroom:
+            # Yes I should scale up
+            space_needed = cluster.req_headroom - cluster.avail_headroom
+            servers_needed = int(math.ceil(space_needed / float(cluster.app.users_per_small)))
+            log.debug('Available headroom (%s) is less than the cluster headroom goal (%s).  Starting %s additional cluster nodes now' % (cluster.avail_headroom,cluster.req_headroom,servers_needed))
+            for i in range(servers_needed):
+                cluster.start_node()
+
+
+        # Handle instances we are supposed to shut down
+        toTerminate = []
+        for host in cluster.shutting_down:
+            log.debug('ASDASDASD    %s' % host.instanceId)
+            try:
+                n = AppNode(host)
+                log.debug('AppNode %s is waiting to be shut down and has %s connections' % (host.ip,n.sessions))
+                if n.sessions == []:
+                    toTerminate.append(host)
+                    host.shutdownDateTime = datetime.now()
                     host.save()
-                    log.debug('Application Server %s has no sessions.  Removing that node from the cluster!' % host.ip)
-        return 'scaling complete @TODO put scaling event summary in this output'
+            except HostNotConnectableError:
+                # Ignore this host that doesn't seem to be ssh'able, but log it as an error
+                log.warning('AppNode %s is NOT sshable and should be looked into.  It is currently waiting to shutdown')
+        deltacloud_tools.terminate_instances(toTerminate)
+
 
-tasks.register(Scale)
+        # Should I scale down?
+        overprov_num = cluster.avail_headroom - cluster.req_headroom
+        log.debug('overprov (%s) avail (%s) required(%s)' % (overprov_num,cluster.avail_headroom,cluster.req_headroom))
+        # Reverse the list to try to remove the servers at the end of the waterfall
+        inuse_reverse = cluster.inuse_map
+        inuse_reverse.reverse()
+        for (host,inuse) in inuse_reverse:
+            # The node must have 0 sessions and the cluster must be able to be smaller while still leaving enough headroom
+            if int(inuse) == 0 and overprov_num >= cluster.app.users_per_small:
+                overprov_num = overprov_num - cluster.app.users_per_small
+                host.state = 4
+                host.save()
+                log.debug('Application Server %s has no sessions.  Removing that node from the cluster!' % host.ip)
+        return 'scaling complete @TODO put scaling event summary in this output'