diff --git a/.travis.yml b/.travis.yml index bf541dcc1..df242a1f1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,20 +9,19 @@ matrix: - python: "3.4" fast_finish: true -# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors -install: - - pip install MySQL-python - - pip install python-ldap - - pip install iso8601 - - pip install dirq - - pip install unittest2 - - pip install coveralls +# Route build to container-based infrastructure +sudo: false +# Cache the dependencies installed by pip +cache: pip -# command to run tests, e.g. python setup.py test -script: +# Install defaults to "pip install -r requirements.txt" + +# Commands that prepare things for the test +before_script: - export PYTHONPATH=$PYTHONPATH:`pwd -P` - cd test - - coverage run --source=apel,bin -m unittest2 discover -after_success: - - coveralls +# Command to run tests +script: coverage run --source=apel,bin -m unittest2 discover --buffer + +after_success: coveralls diff --git a/CHANGELOG b/CHANGELOG index c4e80fcbe..0a142d2ca 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,12 @@ Changelog for apel ================== + * Tue Jul 14 2015 Adrian Coveney - 1.5.0-1 + - Added sorting of accounting logs before parsing which makes reading through + the parser log easier, especially if files use the YYYYMMDD date format. + - Added the first version of a basic HTCondor parser. + - Fixed the server schema to correctly aggregate super summaries when + viewed as normalised super summaries. + * Thu Mar 12 2015 Adrian Coveney - 1.4.1-1 - Changed defaults so that parallel jobs are reported for new installations. - Corrected year calculation in migrate_apel.py script. diff --git a/apel.spec b/apel.spec index 225d3199a..516a96cab 100644 --- a/apel.spec +++ b/apel.spec @@ -4,7 +4,7 @@ %endif Name: apel -Version: 1.4.1 +Version: 1.5.0 %define releasenumber 1 Release: %{releasenumber}%{?dist} Summary: APEL packages @@ -34,7 +34,7 @@ apel-lib provides required libraries for the rest of APEL system. %package parsers Summary: Parsers for APEL system Group: Development/Languages -Requires: apel-lib >= 1.3.1 +Requires: apel-lib >= 1.5.0 Requires(pre): shadow-utils %description parsers @@ -44,7 +44,7 @@ supported by the APEL system: Torque, SGE and LSF. %package client Summary: APEL client package Group: Development/Languages -Requires: apel-lib >= 1.3.1, apel-ssm +Requires: apel-lib >= 1.5.0, apel-ssm Requires(pre): shadow-utils %description client @@ -55,7 +55,7 @@ SSM. %package server Summary: APEL server package Group: Development/Languages -Requires: apel-lib >= 1.3.1, apel-ssm +Requires: apel-lib >= 1.5.0, apel-ssm Requires(pre): shadow-utils %description server @@ -109,8 +109,9 @@ cp schemas/server-extra.sql %{buildroot}%_datadir/apel/ cp schemas/cloud.sql %{buildroot}%_datadir/apel/ cp schemas/storage.sql %{buildroot}%_datadir/apel/ -# slurm accounting script +# accounting scripts cp scripts/slurm_acc.sh %{buildroot}%_datadir/apel/ +cp scripts/htcondor_acc.sh %{buildroot}%_datadir/apel/ # message status script cp scripts/msg_status.py %{buildroot}%_datadir/apel/ @@ -140,6 +141,7 @@ exit 0 %attr(755,root,root) %_bindir/apelparser %config(noreplace) %attr(600,-,-) %{apelconf}/parser.cfg %attr(755,root,root) %_datadir/apel/slurm_acc.sh +%attr(755,root,root) %_datadir/apel/htcondor_acc.sh # ------------------------------------------------------------------------------ @@ -187,6 +189,13 @@ exit 0 # ============================================================================== %changelog + * Tue Jul 14 2015 Adrian Coveney - 1.5.0-1 + - Added sorting of accounting logs before parsing which makes reading through + the parser log easier, especially if files use the YYYYMMDD date format. + - Added the first version of a basic HTCondor parser. + - Fixed the server schema to correctly aggregate super summaries when + viewed as normalised super summaries. + * Thu Mar 12 2015 Adrian Coveney - 1.4.1-1 - Changed defaults so that parallel jobs are reported for new installations. - Corrected year calculation in migrate_apel.py script. diff --git a/apel/__init__.py b/apel/__init__.py index 7c6e6ba65..1e96bd9dd 100644 --- a/apel/__init__.py +++ b/apel/__init__.py @@ -15,4 +15,4 @@ @author Konrad Jopek, Will Rogers ''' -__version__ = (1, 4, 1) +__version__ = (1, 5, 0) diff --git a/apel/parsers/htcondor.py b/apel/parsers/htcondor.py new file mode 100644 index 000000000..03e03e409 --- /dev/null +++ b/apel/parsers/htcondor.py @@ -0,0 +1,69 @@ +''' + Copyright 2014 The Science and Technology Facilities Council + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + @author: Pavel Demin +''' + + +import logging + +from apel.db.records.event import EventRecord +from apel.parsers import Parser + + +log = logging.getLogger(__name__) + + +class HTCondorParser(Parser): + ''' + First implementation of the APEL parser for HTCondor + ''' + def __init__(self, site, machine_name, mpi): + Parser.__init__(self, site, machine_name, mpi) + log.info('Site: %s; batch system: %s' % (self.site_name, self.machine_name)) + + def parse(self, line): + ''' + Parses single line from accounting log file. + ''' + # condor_history -constraint "JobStartDate > 0" -format "%s|" GlobalJobId -format "%s|" Owner -format "%d|" RemoteWallClockTime -format "%d|" RemoteUserCpu -format "%d|" RemoteSysCpu -format "%d|" JobStartDate -format "%d|" EnteredCurrentStatus -format "%d|" ResidentSetSize_RAW -format "%d|" ImageSize_RAW -format "%d|" RequestCpus + # arcce.rl.ac.uk#2376.0#71589|tatls011|287|107|11|1435671643|1435671930|26636|26832|1|1 + + values = line.strip().split('|') + + mapping = {'Site' : lambda x: self.site_name, + 'MachineName' : lambda x: self.machine_name, + 'Infrastructure' : lambda x: "APEL-CREAM-HTCONDOR", + 'JobName' : lambda x: x[0], + 'LocalUserID' : lambda x: x[1], + 'LocalUserGroup' : lambda x: "", + 'WallDuration' : lambda x: int(x[2]), + 'CpuDuration' : lambda x: int(x[3])+int(x[4]), + 'StartTime' : lambda x: x[5], + 'StopTime' : lambda x: x[6], + 'MemoryReal' : lambda x: int(x[7]), + 'MemoryVirtual' : lambda x: int(x[8]), + 'Processors' : lambda x: int(x[9]), + 'NodeCount' : lambda x: 0 + } + + rc = {} + + for key in mapping: + rc[key] = mapping[key](values) + + record = EventRecord() + record.set_all(rc) + return record diff --git a/bin/parser.py b/bin/parser.py index 4770d2080..2c328ef3d 100644 --- a/bin/parser.py +++ b/bin/parser.py @@ -43,6 +43,7 @@ from apel.parsers.sge import SGEParser from apel.parsers.pbs import PBSParser from apel.parsers.slurm import SlurmParser +from apel.parsers.htcondor import HTCondorParser LOGGER_ID = 'parser' @@ -55,9 +56,11 @@ 'LSF': LSFParser, 'SGE': SGEParser, 'SLURM': SlurmParser, - 'blah' : BlahParser + 'blah' : BlahParser, + 'HTCondor': HTCondorParser, } + class ParserConfigException(Exception): ''' Exception raised when parser is misconfigured. @@ -154,7 +157,7 @@ def scan_dir(parser, dirpath, reparse, expr, apel_db, processed): try: log.info('Scanning directory: %s', dirpath) - for item in os.listdir(dirpath): + for item in sorted(os.listdir(dirpath)): abs_file = os.path.join(dirpath, item) if os.path.isfile(abs_file) and expr.match(item): # first, calculate the hash of the file: diff --git a/conf/parser.cfg b/conf/parser.cfg index 41329d8d6..a5643aec9 100644 --- a/conf/parser.cfg +++ b/conf/parser.cfg @@ -29,7 +29,7 @@ enabled = true reparse = false # Batch system specific options. -# Valid types are LSF, PBS, SGE, SLURM +# Valid types are LSF, PBS, SGE, SLURM, HTCondor type = # Whether to try to parse multi-core details parallel = true diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..4ed91cae9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +MySQL-python +python-ldap +iso8601 +dirq +# Dependencies for testing +unittest2 +coveralls \ No newline at end of file diff --git a/schemas/server.sql b/schemas/server.sql index 907ccad75..dd8d961f0 100644 --- a/schemas/server.sql +++ b/schemas/server.sql @@ -762,13 +762,13 @@ CREATE VIEW VNormalisedSuperSummaries AS Infrastructure, NodeCount, Processors, - EarliestEndTime, - LatestEndTime, + MIN(EarliestEndTime) AS EarliestEndTime, + MAX(LatestEndTime) AS LatestEndTime, SUM(WallDuration) AS WallDuration, SUM(CpuDuration) AS CpuDuration, SUM(NormalisedWallDuration) AS NormalisedWallDuration, SUM(NormalisedCpuDuration) AS NormalisedCpuDuration, - NumberOfJobs + SUM(NumberOfJobs) AS NumberOfJobs FROM HybridSuperSummaries, Sites AS site, DNs AS userdn, diff --git a/scripts/htcondor_acc.sh b/scripts/htcondor_acc.sh new file mode 100644 index 000000000..c31f0398d --- /dev/null +++ b/scripts/htcondor_acc.sh @@ -0,0 +1,38 @@ +#! /bin/sh + +CONDOR_LOCATION=/usr +OUTPUT_LOCATION=/var/log/accounting + +# Find all the history files modified in the last two months +# (there can be more than one, if the CE submits to several schedds) +HISTORY_FILES=$(find /var/lib/condor/spool/ -name history\* -mtime -62) + +# Create a temporary accounting file name +NOW=$(date +"%Y%m%dT%H%M%S") +OUTPUT_FILE=$OUTPUT_LOCATION/accounting.$NOW + +# Build the filter for the history command +CONSTR="(JobStartDate>0)&&(CompletionDate>`date +%s -d "01 Jan 2014"`)" + +# Populate the temporary file +for HF in $HISTORY_FILES +do + $CONDOR_LOCATION/bin/condor_history -file $HF -constraint $CONSTR \ + -format "%s|" GlobalJobId \ + -format "%s|" Owner \ + -format "%d|" RemoteWallClockTime \ + -format "%d|" RemoteUserCpu \ + -format "%d|" RemoteSysCpu \ + -format "%d|" JobStartDate \ + -format "%d|" EnteredCurrentStatus \ + -format "%d|" ResidentSetSize_RAW \ + -format "%d|" ImageSize_RAW \ + -format "%d|" RequestCpus \ + -format "\n" EMPTY >> $OUTPUT_FILE +done + +# Invoke the parser +/usr/bin/apelparser --config /etc/apel/parser.cfg + +# Cleanup +/bin/find $OUTPUT_LOCATION -name accounting.\* -mtime +30 -exec /bin/rm {} \; diff --git a/scripts/migrate_apel.py b/scripts/migrate_apel.py index b7ae22734..5a5e2f599 100755 --- a/scripts/migrate_apel.py +++ b/scripts/migrate_apel.py @@ -57,6 +57,21 @@ CALLPROC_STMT = """CALL InsertJobRecord(%s, %s, %s, 'None', %s, %s, %s, %s, %s, %s, %s, %s, %s, NULL, NULL, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" +DUPLICATES_JOIN = """ FROM JobRecords AS t + LEFT JOIN MachineNames as m + on (t.MachineNameID = m.id) + INNER JOIN (SELECT LocalJobId, + EndTime + FROM JobRecords + LEFT JOIN MachineNames + on (JobRecords.MachineNameID = MachineNames.id) + WHERE MachineNames.name != 'MachineName' ) + AS u + ON (m.name = 'MachineName' AND t.LocalJobId = u.LocalJobId AND t.EndTime = u.EndTime); """ + +COUNT_DUPLICATES_STMT = "SELECT count(*) " + DUPLICATES_JOIN + +DELETE_DUPLICATES_STMT = "DELETE t " + DUPLICATES_JOIN def parse_timestamp(string, fmt="%Y-%m-%dT%H:%M:%SZ"): ''' @@ -188,6 +203,23 @@ def delete_old_records(db, cutoff): c.execute(REMOVE_PROC) db.commit() +def delete_duplicates(db): + ''' + Delete all records that have been migrated but are duplicates of records + that are in the database already + ''' + c = db.cursor() + + c.execute(COUNT_DUPLICATES_STMT) + num_duplicates = c.fetchone()[0] + sys.stdout.write(' Found %d duplicate entries\n' % num_duplicates) + + sys.stdout.write(' Deleting duplicates\n') + c.execute(DELETE_DUPLICATES_STMT) + num_deleted = db.affected_rows() + sys.stdout.write(' Deleted %d duplicate entries\n\n\n' % num_deleted) + + db.commit() def main(): ''' @@ -235,6 +267,10 @@ def main(): sys.stdout.write("\n Deleting all records older than %s\n" % cutoff) sys.stdout.write(" from %s:%s ...\n" % (host2, dbname2)) delete_old_records(db2, cutoff) + sys.stdout.write(" Done.\n\n\n") + + # delete duplicates + delete_duplicates(db2) sys.stdout.write(" Complete.\n\n\n") diff --git a/test/test_htcondor_parser.py b/test/test_htcondor_parser.py new file mode 100644 index 000000000..cd3738aa7 --- /dev/null +++ b/test/test_htcondor_parser.py @@ -0,0 +1,75 @@ +import datetime +import unittest + +from apel.parsers.htcondor import HTCondorParser + + +class HTCondorParserTest(unittest.TestCase): + """ + Test case for HTCondor parser + """ + + def setUp(self): + self.parser = HTCondorParser('testSite', 'testHost', True) + + def test_parse_line(self): + + keys = ('JobName', 'LocalUserID', 'LocalUserGroup', 'WallDuration', + 'CpuDuration', 'StartTime', 'StopTime', 'MemoryReal', + 'MemoryVirtual', 'NodeCount', 'Processors') + + # condor_history output (line split): + # GlobalJobId|Owner|RemoteWallClockTime|RemoteUserCpu|RemoteSysCpu| + # JobStartDate|EnteredCurrentStatus|ResidentSetSize_RAW|ImageSize_RAW| + # RequestCpus + + # Examples for correct lines + lines = ( + '1234567|opssgm|19|18|8|1412688273|1412708199|712944|2075028|1', + 'arcce.rl.ac.uk#2376.0#1589|tatls011|287|107|11|1435671643|1435671930|26636|26832|1|1', + 'arcce.rl.ac.uk#2486.0#1888|t2k016|4|0|0|1435671919|1435671922|0|11|1|1', + 'arcce.rl.ac.uk#2478.0#1874|snoplus014|2|0|0|1435671918|1435671920|0|11|1|1', + ) + + values = ( + ('1234567', 'opssgm', None, 19, 26, + datetime.datetime(2014, 10, 7, 13, 24, 33), + datetime.datetime(2014, 10, 7, 18, 56, 39), + 712944, 2075028, 0, 1), + ('arcce.rl.ac.uk#2376.0#1589', 'tatls011', None, 287, 118, + datetime.datetime(2015, 6, 30, 13, 40, 43), + datetime.datetime(2015, 6, 30, 13, 45, 30), + 26636, 26832, 0, 1), + ('arcce.rl.ac.uk#2486.0#1888', 't2k016', None, 4, 0, + datetime.datetime(2015, 6, 30, 13, 45, 19), + datetime.datetime(2015, 6, 30, 13, 45, 22), + 0, 11, 0, 1), + ('arcce.rl.ac.uk#2478.0#1874', 'snoplus014', None, 2, 0, + datetime.datetime(2015, 6, 30, 13, 45, 18), + datetime.datetime(2015, 6, 30, 13, 45, 20), + 0, 11, 0, 1), + ) + + cases = {} + for line, value in zip(lines, values): + cases[line] = dict(zip(keys, value)) + + for line in cases.keys(): + record = self.parser.parse(line) + cont = record._record_content + + self.assertEqual(cont['Site'], 'testSite') + self.assertEqual(cont['MachineName'], 'testHost') + self.assertEqual(cont['Infrastructure'], 'APEL-CREAM-HTCONDOR') + + for key in cases[line].keys(): + self.assertTrue(key in cont, "Key '%s' not in record." % key) + + for key in cases[line].keys(): + self.assertEqual(cont[key], cases[line][key], + "%s != %s for key %s." % + (cont[key], cases[line][key], key)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_mysql.py b/test/test_mysql.py new file mode 100644 index 000000000..15cfa42ef --- /dev/null +++ b/test/test_mysql.py @@ -0,0 +1,95 @@ +import datetime +import os +import subprocess +import unittest + +import apel.db.apeldb +import apel.db.records.job + + +if os.name == 'nt': + os.environ['PATH'] += ';C:/Program Files/MySQL/MySQL Server 5.1/bin/' + + +class MysqlTest(unittest.TestCase): + # These test cases require a local MySQL db server with no password on root + def setUp(self): + query = ('DROP DATABASE IF EXISTS apel_unittest;' + 'CREATE DATABASE apel_unittest;') + subprocess.call(['mysql', '-u', 'root', '-e', query]) + + schema_path = os.path.abspath(os.path.join('..', 'schemas', + 'server.sql')) + schema_handle = open(schema_path) + subprocess.call(['mysql', '-u', 'root', 'apel_unittest'], stdin=schema_handle) + schema_handle.close() + + self.db = apel.db.apeldb.ApelDb('mysql', 'localhost', 3306, 'root', '', + 'apel_unittest') + + # This method seems to run really slowly on Travis CI + #def tearDown(self): + # query = "DROP DATABASE apel_unittest;" + # subprocess.call(['mysql', '-u', 'root', '-e', query]) + + def test_test_connection(self): + """Basic check that test_connection works without error.""" + self.db.test_connection() + + def test_bad_connection(self): + """Check that initialising ApelDb fails if a bad password is used.""" + self.assertRaises(apel.db.apeldb.ApelDbException, apel.db.apeldb.ApelDb, + 'mysql', 'localhost', 3306, 'root', 'badpassword', + 'apel_badtest') + + def test_lost_connection(self): + """ + Check that a lost connection to the db raises an exception. + + Simulate the lost connection by changing the host. + """ + self.db._db_host = 'badhost' + self.assertRaises(apel.db.apeldb.ApelDbException, + self.db.test_connection) + + def test_bad_loads(self): + """Check that empty loads return None and bad types raise exception.""" + self.assertTrue(self.db.load_records([], source='testDN') is None) + self.assertRaises(apel.db.apeldb.ApelDbException, + self.db.load_records, [1234], source='testDN') + + def test_load_and_get(self): + job = apel.db.records.job.JobRecord() + job._record_content = {'Site': 'testSite', 'LocalJobId': 'testJob', + 'SubmitHost': 'testHost', + 'WallDuration': 10, 'CpuDuration': 10, + 'StartTime': datetime.datetime.fromtimestamp(123456), + 'EndTime': datetime.datetime.fromtimestamp(654321), + 'ServiceLevelType': 'HEPSPEC', + 'ServiceLevel': 3} + items_in = job._record_content.items() + record_list = [job] + # load_records changes the 'job' job record as it calls _check_fields + # which adds placeholders to empty fields + self.db.load_records(record_list, source='testDN') + + records_out = self.db.get_records(apel.db.records.job.JobRecord) + items_out = list(records_out)[0][0]._record_content.items() + # Check that items_in is a subset of items_out + # Can't use 'all()' rather than comparing the length as Python 2.4 + self.assertEqual([item in items_out for item in items_in].count(True), len(items_in)) + + def test_last_update(self): + """ + Check that the LastUpdated table can be set and queried. + + It should not be set initially, so should return None, then should + return a time after being set. + """ + self.assertTrue(self.db.get_last_updated() is None) + self.assertTrue(self.db.set_updated()) + self.assertTrue(type(self.db.get_last_updated()) is datetime.datetime) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_schemas.py b/test/test_schemas.py new file mode 100644 index 000000000..17ace05df --- /dev/null +++ b/test/test_schemas.py @@ -0,0 +1,48 @@ +import os +from subprocess import call, Popen, PIPE +import unittest + + +if os.name == 'nt': + os.environ['PATH'] += ';C:/Program Files/MySQL/MySQL Server 5.1/bin/' + + +class SchemaTest(unittest.TestCase): + # These test cases require a local MySQL db server with no password on root + def setUp(self): + query = ('DROP DATABASE IF EXISTS apel_unittest;' + 'CREATE DATABASE apel_unittest;') + call(['mysql', '-u', 'root', '-e', query]) + + def tearDown(self): + call(['mysql', '-u', 'root', '-e', 'DROP DATABASE apel_unittest;']) + + +def make_schema_test(schema): + """Make a test case that will check the given schema.""" + def schema_test(self): + if schema == 'server-extra.sql': + # server-extra.sql needs server.sql loaded first + parent_schema_path = os.path.abspath(os.path.join('..', 'schemas', + 'server.sql')) + parent_schema_handle = open(parent_schema_path) + call(['mysql', '-u', 'root', 'apel_unittest'], stdin=parent_schema_handle) + parent_schema_handle.close() + schema_path = os.path.abspath(os.path.join('..', 'schemas', schema)) + schema_handle = open(schema_path) + p = Popen(['mysql', '-u', 'root', 'apel_unittest'], stdin=schema_handle, stderr=PIPE) + schema_handle.close() + p.wait() + self.assertFalse(p.returncode, p.communicate()[1]) + return schema_test + + +# Make a test case for each schema found in the schemas directory +for schema in os.listdir(os.path.abspath(os.path.join('..', 'schemas'))): + test_method = make_schema_test(schema) + test_method.__name__ = 'test_%s_schema' % schema[:-4] + setattr(SchemaTest, test_method.__name__, test_method) + + +if __name__ == '__main__': + unittest.main()