Skip to content

Commit 97b4c89

Browse files
committed
Merge branch 'master' of ssh://git.fedorahosted.org/git/opus
2 parents e46c42c + 30a2c1e commit 97b4c89

File tree

3 files changed

+119
-96
lines changed

3 files changed

+119
-96
lines changed

admin.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,11 @@
33

44
import models
55

6+
class ApplicationAdmin(admin.ModelAdmin):
7+
fieldsets = [
8+
(None, {'fields': ['name', 'image_id', 'path', 'icon_url', 'ssh_key']}),
9+
('Advanced', {'fields': ['max_concurrent_instances', 'users_per_small', 'cluster_headroom', 'scale_interarrival'], 'classes': ['collapse']}),
10+
]
11+
612
admin.site.register(models.Instance)
7-
admin.site.register(models.Application)
13+
admin.site.register(models.Application, ApplicationAdmin)

models.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from datetime import datetime
2+
13
from django.db import models
24
from django.db.models import signals
35
from django.contrib.auth.models import Permission, User
@@ -10,11 +12,18 @@ class Application(models.Model):
1012
name = models.CharField(max_length=64) # Pretty name of the application
1113
image_id = models.CharField(max_length=32, unique=True) # Image id of the image that the actual application lies on
1214
path = models.CharField(max_length=256,blank=True) # Path of the application to be run on the host
13-
max_concurrent_instances = models.IntegerField()
14-
users_per_small = models.IntegerField()
15-
cluster_headroom = models.IntegerField()
15+
max_concurrent_instances = models.IntegerField(default=0)
16+
users_per_small = models.IntegerField(default=10)
17+
cluster_headroom = models.IntegerField(default=0)
1618
icon_url = models.URLField()
1719
ssh_key = models.FileField("SSH Key", upload_to='vdi/sshkeys')
20+
scale_interarrival = models.IntegerField(default=180) # The interarrival time of the scale function running
21+
to_be_run_at = models.DateTimeField(auto_now_add=True)
22+
23+
def is_time_to_run(self, last_run_at):
24+
now = datetime.now()
25+
return False
26+
#if to_be_run_at > now
1827

1928
def __str__(self):
2029
return self.name

tasks.py

Lines changed: 100 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -17,98 +17,106 @@
1717
log = core.log.getLogger()
1818
from django.http import HttpResponse, HttpResponseRedirect
1919

20-
class Scale(PeriodicTask):
21-
# By default this task runs every three minutes
22-
run_every = timedelta(seconds=30)
23-
24-
def run(self):
25-
for app in Application.objects.all():
26-
# Create the cluster object to help us manage the cluster
27-
cluster = AppCluster(app.pk)
28-
29-
# Clean up all idle users on all nodes for this application cluster
30-
log.debug('APP NAME %s'%app.name)
31-
cluster.logout_idle_users()
32-
33-
log.debug("Checking for active clusters")
34-
for host in cluster.active:
35-
log.debug("Found active host")
36-
an = AppNode(host)
37-
user_experience_tools.process_user_connections(an)
38-
39-
# Handle vms we were waiting on to boot up
40-
booting = deltacloud_tools.get_instances(cluster.booting)
41-
for vm in booting:
42-
dns_name = vm.public_addresses[0]
43-
log.debug('ASDF = %s' % dns_name)
44-
if dns_name.find("amazonaws.com") > -1:
45-
# Resolve the domain name into an IP address
46-
# This adds a dependancy on the 'host' command
47-
output = Popen(["host", dns_name], stdout=PIPE).communicate()[0]
48-
ip = '.'.join(re.findall('(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', output)[0])
49-
try:
50-
# TODO: remove the hard coded '3389' & '22' below. '3389' is for RDP and '22' is for SSH
51-
# TODO: remove the arbitrary '3' second timeout below
52-
socket.create_connection((ip,3389),3)
53-
socket.create_connection((ip,22),3)
54-
except Exception as e:
55-
log.debug("Server %s is not yet available" % ip)
56-
pass
57-
else:
58-
instance = Instance.objects.filter(instanceId=vm.id)[0]
59-
booting.remove(vm)
60-
instance.ip = ip
61-
instance.state = 2
62-
instance.save()
63-
log.debug("Moving instance %s into enabled state with ip %s" % (instance.instanceId,ip))
64-
num_booting = len(booting)
65-
if num_booting > 0:
66-
log.debug("Application cluster '%s' is still waiting for %s cluster nodes to boot" % (cluster.name,len(booting)))
67-
68-
69-
# Consider if the cluster needs to be scaled
70-
log.debug('Considering %s app cluster for scaling ...' % cluster.name)
71-
# Should I scale up?
72-
log.debug('%s is avail (%s) < req (%s)?' % (cluster.app.name, cluster.avail_headroom, cluster.req_headroom))
73-
if cluster.avail_headroom < cluster.req_headroom:
74-
# Yes I should scale up
75-
space_needed = cluster.req_headroom - cluster.avail_headroom
76-
servers_needed = int(math.ceil(space_needed / float(cluster.app.users_per_small)))
77-
log.debug('Available headroom (%s) is less than the cluster headroom goal (%s). Starting %s additional cluster nodes now' % (cluster.avail_headroom,cluster.req_headroom,servers_needed))
78-
for i in range(servers_needed):
79-
cluster.start_node()
80-
81-
82-
# Handle instances we are supposed to shut down
83-
toTerminate = []
84-
for host in cluster.shutting_down:
85-
log.debug('ASDASDASD %s' % host.instanceId)
20+
class ScaleScheduler(PeriodicTask):
21+
22+
run_every = timedelta(seconds=5) # Used for tick frequency
23+
24+
def is_due(self, last_run_at):
25+
for app in Application.objects.exclude(to_be_run_at__gt=datetime.now()):
26+
app.to_be_run_at = datetime.now() + timedelta(seconds=app.scale_interarrival)
27+
app.save()
28+
Scale.delay(app)
29+
return (False, self.timedelta_seconds(self.run_every))
30+
31+
tasks.register(ScaleScheduler)
32+
33+
class Scale(Task):
34+
35+
def run(self, app):
36+
# Create the cluster object to help us manage the cluster
37+
cluster = AppCluster(app.pk)
38+
39+
# Clean up all idle users on all nodes for this application cluster
40+
log.debug('APP NAME %s'%app.name)
41+
cluster.logout_idle_users()
42+
43+
log.debug("Checking for active clusters")
44+
for host in cluster.active:
45+
log.debug("Found active host")
46+
an = AppNode(host)
47+
user_experience_tools.process_user_connections(an)
48+
49+
# Handle vms we were waiting on to boot up
50+
booting = deltacloud_tools.get_instances(cluster.booting)
51+
for vm in booting:
52+
dns_name = vm.public_addresses[0]
53+
log.debug('ASDF = %s' % dns_name)
54+
if dns_name.find("amazonaws.com") > -1:
55+
# Resolve the domain name into an IP address
56+
# This adds a dependancy on the 'host' command
57+
output = Popen(["host", dns_name], stdout=PIPE).communicate()[0]
58+
ip = '.'.join(re.findall('(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', output)[0])
8659
try:
87-
n = AppNode(host)
88-
log.debug('AppNode %s is waiting to be shut down and has %s connections' % (host.ip,n.sessions))
89-
if n.sessions == []:
90-
toTerminate.append(host)
91-
host.shutdownDateTime = datetime.now()
92-
host.save()
93-
except HostNotConnectableError:
94-
# Ignore this host that doesn't seem to be ssh'able, but log it as an error
95-
log.warning('AppNode %s is NOT sshable and should be looked into. It is currently waiting to shutdown')
96-
deltacloud_tools.terminate_instances(toTerminate)
97-
98-
99-
# Should I scale down?
100-
overprov_num = cluster.avail_headroom - cluster.req_headroom
101-
log.debug('overprov (%s) avail (%s) required(%s)' % (overprov_num,cluster.avail_headroom,cluster.req_headroom))
102-
# Reverse the list to try to remove the servers at the end of the waterfall
103-
inuse_reverse = cluster.inuse_map
104-
inuse_reverse.reverse()
105-
for (host,inuse) in inuse_reverse:
106-
# The node must have 0 sessions and the cluster must be able to be smaller while still leaving enough headroom
107-
if int(inuse) == 0 and overprov_num >= cluster.app.users_per_small:
108-
overprov_num = overprov_num - cluster.app.users_per_small
109-
host.state = 4
60+
# TODO: remove the hard coded '3389' & '22' below. '3389' is for RDP and '22' is for SSH
61+
# TODO: remove the arbitrary '3' second timeout below
62+
socket.create_connection((ip,3389),3)
63+
socket.create_connection((ip,22),3)
64+
except Exception as e:
65+
log.debug("Server %s is not yet available" % ip)
66+
pass
67+
else:
68+
instance = Instance.objects.filter(instanceId=vm.id)[0]
69+
booting.remove(vm)
70+
instance.ip = ip
71+
instance.state = 2
72+
instance.save()
73+
log.debug("Moving instance %s into enabled state with ip %s" % (instance.instanceId,ip))
74+
num_booting = len(booting)
75+
if num_booting > 0:
76+
log.debug("Application cluster '%s' is still waiting for %s cluster nodes to boot" % (cluster.name,len(booting)))
77+
78+
79+
# Consider if the cluster needs to be scaled
80+
log.debug('Considering %s app cluster for scaling ...' % cluster.name)
81+
# Should I scale up?
82+
log.debug('%s is avail (%s) < req (%s)?' % (cluster.app.name, cluster.avail_headroom, cluster.req_headroom))
83+
if cluster.avail_headroom < cluster.req_headroom:
84+
# Yes I should scale up
85+
space_needed = cluster.req_headroom - cluster.avail_headroom
86+
servers_needed = int(math.ceil(space_needed / float(cluster.app.users_per_small)))
87+
log.debug('Available headroom (%s) is less than the cluster headroom goal (%s). Starting %s additional cluster nodes now' % (cluster.avail_headroom,cluster.req_headroom,servers_needed))
88+
for i in range(servers_needed):
89+
cluster.start_node()
90+
91+
92+
# Handle instances we are supposed to shut down
93+
toTerminate = []
94+
for host in cluster.shutting_down:
95+
log.debug('ASDASDASD %s' % host.instanceId)
96+
try:
97+
n = AppNode(host)
98+
log.debug('AppNode %s is waiting to be shut down and has %s connections' % (host.ip,n.sessions))
99+
if n.sessions == []:
100+
toTerminate.append(host)
101+
host.shutdownDateTime = datetime.now()
110102
host.save()
111-
log.debug('Application Server %s has no sessions. Removing that node from the cluster!' % host.ip)
112-
return 'scaling complete @TODO put scaling event summary in this output'
103+
except HostNotConnectableError:
104+
# Ignore this host that doesn't seem to be ssh'able, but log it as an error
105+
log.warning('AppNode %s is NOT sshable and should be looked into. It is currently waiting to shutdown')
106+
deltacloud_tools.terminate_instances(toTerminate)
107+
113108

114-
tasks.register(Scale)
109+
# Should I scale down?
110+
overprov_num = cluster.avail_headroom - cluster.req_headroom
111+
log.debug('overprov (%s) avail (%s) required(%s)' % (overprov_num,cluster.avail_headroom,cluster.req_headroom))
112+
# Reverse the list to try to remove the servers at the end of the waterfall
113+
inuse_reverse = cluster.inuse_map
114+
inuse_reverse.reverse()
115+
for (host,inuse) in inuse_reverse:
116+
# The node must have 0 sessions and the cluster must be able to be smaller while still leaving enough headroom
117+
if int(inuse) == 0 and overprov_num >= cluster.app.users_per_small:
118+
overprov_num = overprov_num - cluster.app.users_per_small
119+
host.state = 4
120+
host.save()
121+
log.debug('Application Server %s has no sessions. Removing that node from the cluster!' % host.ip)
122+
return 'scaling complete @TODO put scaling event summary in this output'

0 commit comments

Comments
 (0)