Skip to content

Commit 8fad52b

Browse files
committed
added some functionality for easy upload
1 parent ebf5c4b commit 8fad52b

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

doubleml_serverless/double_ml_data_aws.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from doubleml import DoubleMLData
2+
import os
3+
import boto3
4+
import pandas as pd
25

36

47
class DoubleMLDataS3(DoubleMLData):
@@ -18,6 +21,8 @@ def __init__(self,
1821
z_cols,
1922
use_other_treat_as_covariate)
2023
self._bucket = bucket
24+
self._file_ending = os.path.splitext(file_key)[1]
25+
assert self._file_ending in ['.csv']
2126
self._file_key = file_key
2227

2328
@property
@@ -36,6 +41,36 @@ def get_payload(self):
3641
}
3742
return payload
3843

44+
@classmethod
45+
def from_s3(cls,
46+
bucket,
47+
file_key,
48+
y_col,
49+
d_cols,
50+
x_cols=None,
51+
z_cols=None,
52+
use_other_treat_as_covariate=True):
53+
s3_client = boto3.client('s3')
54+
response = s3_client.get_object(Bucket=bucket,
55+
Key=file_key)
56+
file = response["Body"]
57+
file_ending = os.path.splitext(file_key)[1]
58+
assert file_ending in ['.csv']
59+
# load csv as a pd.DataFrame
60+
data = pd.read_csv(file)
61+
62+
return cls(bucket, file_key, data, y_col, d_cols, x_cols, z_cols, use_other_treat_as_covariate)
63+
64+
def store_and_upload_to_s3(self):
65+
# load csv as a pd.DataFrame
66+
file_name = os.path.split(self.file_key)[1]
67+
self.data.to_csv(file_name)
68+
s3_client = boto3.client('s3')
69+
response = s3_client.upload_file(Filename=file_name,
70+
Bucket=self.bucket,
71+
Key=self.file_key)
72+
return response
73+
3974

4075
class DoubleMLDataJson(DoubleMLData):
4176
def __init__(self,

0 commit comments

Comments
 (0)