X-Ray support added

praspaliauskas · praspaliauskas · commit b136aa7c8099 · 2018-04-02T14:30:49.000-07:00
diff --git a/README.md b/README.md
@@ -20,12 +20,14 @@ By leveraging this framework, you can build a cost-effective pipeline to run ad
 * Lambda execution role with 
     * [S3 read/write access](http://docs.aws.amazon.com/lambda/latest/dg/with-s3-example-create-iam-role.html)
     * Cloudwatch log access (logs:CreateLogGroup, logs:CreateLogStream, logs:PutLogEvents)
+    * X-Ray write access (xray:PutTraceSegments, xray:PutTelemetryRecords)
  
 Check policy.json for a sample that you can use or extend.
 
 * To execute the driver locally, make sure that you configure your AWS profile with access to: 
     * [S3](http://docs.aws.amazon.com/AmazonS3/latest/dev/example-policies-s3.html)
     * [Lambda](http://docs.aws.amazon.com/lambda/latest/dg/lambda-api-permissions-ref.html)
+    * [X-Ray](https://docs.aws.amazon.com/xray/latest/devguide/xray-permissions.html)
 
 ### Quickstart::Step by Step  ###
 
@@ -52,7 +54,9 @@ To run the example, you must have the AWS CLI set up. Your credentials must have
 
   $ cat driverconfig.json 
 
-6. Run the driver
+6. [Run AWS X-Ray Daemon locally](https://docs.aws.amazon.com/xray/latest/devguide/xray-daemon-local.html), otherwise you will not be able to see traces from the local driver in AWS X-Ray console. However, traces from Reducer Coordinator Lambda functions will be present.
+
+7. Run the driver
  
 	$ python driver.py
 
diff --git a/policy.json b/policy.json
@@ -64,6 +64,19 @@
                 "arn:aws:s3:::MY-S3-BUCKET/*"
             ]
         },
+
+        {
+            "Sid": "StmXrayAllow",
+            "Effect": "Allow",
+            "Action": [
+                "xray:PutTraceSegments",
+                "xray:PutTelemetryRecords"
+            ],
+            "Resource": [
+                "*"
+            ]
+        },
+        
         {
             "Effect": "Allow",
             "Action": [
diff --git a/src/python/driver.py b/src/python/driver.py
@@ -32,22 +32,40 @@
 from multiprocessing.dummy import Pool as ThreadPool
 from functools import partial
 
+from botocore.client import Config
+import logging
+from aws_xray_sdk.core import xray_recorder
+from aws_xray_sdk.core import patch_all
+patch_all()
+logging.basicConfig(level='WARNING')
+logging.getLogger('aws_xray_sdk').setLevel(logging.ERROR)
+# collect all tracing samples 
+rules={"version": 1, "default": {"fixed_target": 1,"rate": 1}}
+xray_recorder.configure(sampling_rules=rules)
+
+xray_recorder.begin_segment('Map Reduce Driver')
 # create an S3 session
 s3 = boto3.resource('s3')
 s3_client = boto3.client('s3')
-lambda_client = boto3.client('lambda')
+
+# Setting longer timeout for reading lambda results and larger connections pool
+lambda_config=Config(read_timeout=120, max_pool_connections=50)
+lambda_client = boto3.client('lambda',config=lambda_config)
 
 JOB_INFO = 'jobinfo.json'
 
 ### UTILS ####
+@xray_recorder.capture('zipLambda')
 def zipLambda(fname, zipname):
     # faster to zip with shell exec
     subprocess.call(['zip', zipname] + glob.glob(fname) + glob.glob(JOB_INFO) +
                         glob.glob("lambdautils.py"))
 
+@xray_recorder.capture('write_to_s3')
 def write_to_s3(bucket, key, data, metadata):
     s3.Bucket(bucket).put_object(Key=key, Body=data, Metadata=metadata)
 
+@xray_recorder.capture('write_job_config')
 def write_job_config(job_id, job_bucket, n_mappers, r_func, r_handler):
     fname = "jobinfo.json"; 
     with open(fname, 'w') as f:
@@ -70,15 +88,14 @@ def write_job_config(job_id, job_bucket, n_mappers, r_func, r_handler):
 config = json.loads(open('driverconfig.json', 'r').read())
 
 # 1. Get all keys to be processed  
+xray_recorder.begin_subsegment('Get all keys to be processed')
 # init 
 bucket = config["bucket"]
 job_bucket = config["jobBucket"]
 region = config["region"]
 lambda_memory = config["lambdaMemory"]
 concurrent_lambdas = config["concurrentLambdas"]
 
-#all_keys = s3_client.list_objects(Bucket=bucket, Prefix=config["prefix"])["Contents"]
-
 # Fetch all the keys that match the prefix
 all_keys = []
 for obj in s3.Bucket(bucket).objects.filter(Prefix=config["prefix"]).all():
@@ -87,9 +104,13 @@ def write_job_config(job_id, job_bucket, n_mappers, r_func, r_handler):
 bsize = lambdautils.compute_batch_size(all_keys, lambda_memory)
 batches = lambdautils.batch_creator(all_keys, bsize)
 n_mappers = len(batches)
+document = xray_recorder.current_subsegment()
+document.put_metadata("Batch size: ", bsize, "Processing initialization")
+document.put_metadata("Mappers: ", n_mappers, "Processing initialization")
+xray_recorder.end_subsegment() #Get all keys to be processed
 
 # 2. Create the lambda functions
-
+xray_recorder.begin_subsegment('Prepare Lambda functions')
 L_PREFIX = "BL"
 
 # Lambda functions
@@ -103,18 +124,24 @@ def write_job_config(job_id, job_bucket, n_mappers, r_func, r_handler):
 zipLambda(config["mapper"]["name"], config["mapper"]["zip"])
 zipLambda(config["reducer"]["name"], config["reducer"]["zip"])
 zipLambda(config["reducerCoordinator"]["name"], config["reducerCoordinator"]["zip"])
+xray_recorder.end_subsegment() #Prepare Lambda functions
 
 # mapper
+xray_recorder.begin_subsegment('Create mapper Lambda function')
 l_mapper = lambdautils.LambdaManager(lambda_client, s3_client, region, config["mapper"]["zip"], job_id,
         mapper_lambda_name, config["mapper"]["handler"])
 l_mapper.update_code_or_create_on_noexist()
+xray_recorder.end_subsegment() #Create mapper Lambda function
 
 # Reducer func
+xray_recorder.begin_subsegment('Create reducer Lambda function')
 l_reducer = lambdautils.LambdaManager(lambda_client, s3_client, region, config["reducer"]["zip"], job_id,
         reducer_lambda_name, config["reducer"]["handler"])
 l_reducer.update_code_or_create_on_noexist()
+xray_recorder.end_subsegment() #Create reducer Lambda function
 
 # Coordinator
+xray_recorder.begin_subsegment('Create reducer coordinator Lambda function')
 l_rc = lambdautils.LambdaManager(lambda_client, s3_client, region, config["reducerCoordinator"]["zip"], job_id,
         rc_lambda_name, config["reducerCoordinator"]["handler"])
 l_rc.update_code_or_create_on_noexist()
@@ -124,29 +151,36 @@ def write_job_config(job_id, job_bucket, n_mappers, r_func, r_handler):
 
 # create event source for coordinator
 l_rc.create_s3_eventsource_notification(job_bucket)
+xray_recorder.end_subsegment() #Create reducer coordinator Lambda function
 
 # Write Jobdata to S3
+xray_recorder.begin_subsegment('Write job data to S3')
 j_key = job_id + "/jobdata";
 data = json.dumps({
                 "mapCount": n_mappers, 
                 "totalS3Files": len(all_keys),
                 "startTime": time.time()
                 })
+xray_recorder.current_subsegment().put_metadata("Job data: ", data, "Write job data to S3");
 write_to_s3(job_bucket, j_key, data, {})
+xray_recorder.end_subsegment() #Write job data to S3
 
 ### Execute ###
 
 mapper_outputs = []
 
 #2. Invoke Mappers
+xray_recorder.begin_subsegment('Invoke mappers')
 def invoke_lambda(batches, m_id):
+    xray_recorder.begin_segment('Invoke mapper Lambda')
     '''
     lambda invoke function
     '''
     # TODO: Increase timeout
 
     #batch = [k['Key'] for k in batches[m_id-1]]
     batch = [k.key for k in batches[m_id-1]]
+    xray_recorder.current_segment().put_annotation("batch_for_mapper_"+str(m_id), str(batch));
     #print "invoking", m_id, len(batch)
     resp = lambda_client.invoke( 
             FunctionName = mapper_lambda_name,
@@ -162,7 +196,7 @@ def invoke_lambda(batches, m_id):
     out = eval(resp['Payload'].read())
     mapper_outputs.append(out)
     print "mapper output", out
-
+    xray_recorder.end_segment()
 # Exec Parallel
 print "# of Mappers ", n_mappers 
 pool = ThreadPool(n_mappers)
@@ -175,16 +209,20 @@ def invoke_lambda(batches, m_id):
     nm = min(concurrent_lambdas, n_mappers)
     results = pool.map(invoke_lambda_partial, Ids[mappers_executed: mappers_executed + nm])
     mappers_executed += nm
+    xray_recorder.current_subsegment().put_metadata("Mapper lambdas executed: ", mappers_executed, "Invoke mappers");
 
 pool.close()
 pool.join()
 
 print "all the mappers finished"
+xray_recorder.end_subsegment() #Invoke mappers
 
 # Delete Mapper function
+xray_recorder.begin_subsegment('Delete mappers')
 l_mapper.delete_function()
+xray_recorder.end_subsegment() #Delete mappers
 
-######## COST ######
+xray_recorder.begin_subsegment('Calculate cost')
 
 # Calculate costs - Approx (since we are using exec time reported by our func and not billed ms)
 total_lambda_secs = 0
@@ -247,8 +285,12 @@ def invoke_lambda(batches, m_id):
 print "S3 Cost", s3_cost 
 print "Total Cost: ", lambda_cost + s3_cost
 print "Total Lines:", total_lines 
-
+xray_recorder.end_subsegment() #Calculate cost
 
 # Delete Reducer function
+xray_recorder.begin_subsegment('Delete reducers')
 l_reducer.delete_function()
 l_rc.delete_function()
+xray_recorder.end_subsegment() #Delete reducers
+
+xray_recorder.end_segment() #Map Reduce Driver
diff --git a/src/python/lambdautils.py b/src/python/lambdautils.py
@@ -30,6 +30,8 @@ def __init__ (self, l, s3, region, codepath, job_id, fname, handler, lmem=1536):
         self.timeout = 300
         self.function_arn = None # set after creation
 
+    # TracingConfig parameter switches X-Ray tracing on/off.
+    # Change value to 'Mode':'PassThrough' to switch it off
     def create_lambda_function(self):
         runtime = 'python2.7';
         response = self.awslambda.create_function(
@@ -42,7 +44,8 @@ def create_lambda_function(self):
                       Runtime = runtime,
                       Description = self.function_name,
                       MemorySize = self.memory,
-                      Timeout =  self.timeout
+                      Timeout =  self.timeout,
+                      TracingConfig={'Mode':'Active'}
                     )
         self.function_arn = response['FunctionArn']
         print response