Skip to content

Commit 018da21

Browse files
committed
move files
1 parent 8da1092 commit 018da21

File tree

8 files changed

+177
-1
lines changed

8 files changed

+177
-1
lines changed

aws_lambda_app/__init__.py

Whitespace-only changes.

aws_lambda_app/lambda_functions/__init__.py

Whitespace-only changes.
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import json
2+
import boto3
3+
import os
4+
5+
import numpy as np
6+
from numpy.random import MT19937, RandomState
7+
import pandas as pd
8+
from sklearn.linear_model import *
9+
from sklearn.ensemble import *
10+
11+
12+
def lambda_cv_predict(event, context):
13+
# Get variables from event
14+
data_backend = event.get('data_backend')
15+
lrn_repr = event.get('learner_repr')
16+
pred_method = event.get('pred_method')
17+
y_col = event.get('y_col')
18+
x_cols = event.get('x_cols')
19+
test_ids = event.get('test_ids')
20+
train_ids = event.get('train_ids')
21+
22+
learner_name = event.get('learner')
23+
scaling = event.get('scaling')
24+
i_rep = event.get('i_rep')
25+
i_fold = event.get('i_fold')
26+
seed = event.get('seed')
27+
seed_jumps = event.get('seed_jumps')
28+
29+
if data_backend == 's3':
30+
# s3 data backend
31+
bucket = event.get('bucket')
32+
key = event.get('file_key')
33+
s3_client = boto3.client('s3')
34+
response = s3_client.get_object(Bucket=bucket,
35+
Key=key)
36+
file = response["Body"]
37+
file_ending = os.path.splitext(key)[1]
38+
assert file_ending in ['.csv']
39+
# load csv as a pd.DataFrame
40+
df = pd.read_csv(file)
41+
elif data_backend == 'json':
42+
df_json = event.get('data')
43+
df = pd.read_json(df_json, orient='columns')
44+
else:
45+
raise NotImplementedError()
46+
47+
y = df.loc[:, y_col].values
48+
x = df.loc[:, x_cols].values
49+
50+
# create and fit learner
51+
52+
learner = eval(lrn_repr)
53+
if scaling == 'n_folds * n_rep':
54+
if seed is not None:
55+
learner.set_params(random_state=RandomState(MT19937(seed).jumped(seed_jumps)))
56+
if train_ids is None:
57+
learner.fit(np.delete(x, test_ids, axis=0), np.delete(y, test_ids))
58+
else:
59+
learner.fit(x[train_ids], y[train_ids])
60+
if pred_method == 'predict':
61+
preds = learner.predict(x[test_ids])
62+
else:
63+
assert pred_method == 'predict_proba'
64+
preds = learner.predict_proba(x[test_ids])[:, 1]
65+
66+
else:
67+
assert scaling == 'n_rep'
68+
n_obs = x.shape[0]
69+
preds = np.full(n_obs, np.nan)
70+
if train_ids is None:
71+
for idx, test_index in enumerate(test_ids):
72+
if seed is not None:
73+
learner.set_params(random_state=RandomState(MT19937(seed).jumped(seed_jumps + idx)))
74+
learner.fit(np.delete(x, test_index, axis=0), np.delete(y, test_index))
75+
if pred_method == 'predict':
76+
preds[test_index] = learner.predict(x[test_index])
77+
else:
78+
assert pred_method == 'predict_proba'
79+
preds[test_index] = learner.predict_proba(x[test_index])[:, 1]
80+
else:
81+
for idx, (train_index, test_index) in enumerate(zip(train_ids, test_ids)):
82+
if seed is not None:
83+
learner.set_params(random_state=RandomState(MT19937(seed).jumped(seed_jumps + idx)))
84+
learner.fit(x[train_index], y[train_index])
85+
if pred_method == 'predict':
86+
preds[test_index] = learner.predict(x[test_index])
87+
else:
88+
assert pred_method == 'predict_proba'
89+
preds[test_index] = learner.predict_proba(x[test_index])[:, 1]
90+
91+
return {
92+
'statusCode': 200,
93+
'message': 'Success!',
94+
'preds': preds.tolist(),
95+
'learner': learner_name,
96+
'i_rep': i_rep,
97+
'i_fold': i_fold
98+
}

aws_lambda_app/lambda_functions/requirements.txt

Whitespace-only changes.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
numpy
2+
pandas
3+
sklearn

aws_lambda_app/template.yaml

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
AWSTemplateFormatVersion: '2010-09-09'
2+
Transform: AWS::Serverless-2016-10-31
3+
Description: >
4+
doubleml-serverless
5+
6+
Sample SAM Template for doubleml-serverless
7+
8+
Metadata:
9+
AWS::ServerlessRepo::Application:
10+
Name: doubleml-serverless
11+
Description: Double Machine Learning with AWS Lambda
12+
Author: Malte S. Kurz
13+
SemanticVersion: 0.0.1
14+
15+
Globals:
16+
Function:
17+
Timeout: 180
18+
19+
Parameters:
20+
CreateS3BucketForDataTransfer:
21+
Type: String
22+
AllowedValues:
23+
- 'y'
24+
- 'n'
25+
Default: 'n'
26+
Description: 'Choose y to create the S3 bucket for data transfer to lambda'
27+
28+
S3BucketName:
29+
Type: String
30+
Default: doubleml-serverless-data
31+
Description: 'Name of the S3 bucket used for data transfer to lambda'
32+
33+
Conditions:
34+
CreateS3Bucket: !Equals [ !Ref CreateS3BucketForDataTransfer, y ]
35+
36+
Resources:
37+
LambdaCVPredict:
38+
Type: AWS::Serverless::Function
39+
Properties:
40+
CodeUri: lambda_functions/
41+
Handler: cv_predict.lambda_cv_predict
42+
FunctionName: LambdaCVPredict
43+
Layers:
44+
- Ref: SklearnPandasLayer
45+
Runtime: python3.8
46+
Timeout: 180
47+
MemorySize: 1024
48+
Policies:
49+
- AWSLambdaBasicExecutionRole
50+
- S3ReadPolicy:
51+
BucketName:
52+
Ref: S3BucketName
53+
54+
SklearnPandasLayer:
55+
Type: AWS::Serverless::LayerVersion
56+
Properties:
57+
ContentUri: lambda_layers/
58+
CompatibleRuntimes:
59+
- python3.8
60+
Metadata:
61+
BuildMethod: python3.8
62+
63+
DataS3Bucket:
64+
Type: 'AWS::S3::Bucket'
65+
Condition: CreateS3Bucket
66+
Description: 'S3 bucket for data transfer to lambda'
67+
Properties:
68+
BucketName:
69+
Ref: S3BucketName
70+
PublicAccessBlockConfiguration:
71+
BlockPublicAcls: true
72+
BlockPublicPolicy: true
73+
IgnorePublicAcls: true
74+
RestrictPublicBuckets: true

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ pandas
55
scipy
66
sklearn
77
statsmodels
8+
aiobotocore

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
long_description=long_description,
1919
long_description_content_type='text/markdown',
2020
url='http://doubleml.org',
21-
packages=find_packages(),
21+
packages=find_packages(exclude=['aws_lambda_app*']),
2222
install_requires=[
2323
'joblib',
2424
'numpy',

0 commit comments

Comments
 (0)