Skip to content

Commit b8a5ee5

Browse files
author
Saul Frank
committed
S3 clean up
1 parent d2aaa2f commit b8a5ee5

File tree

8 files changed

+105
-209
lines changed

8 files changed

+105
-209
lines changed
File renamed without changes.
Lines changed: 19 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,35 @@
1-
""" Host: {name}.sharepoint.com
2-
TenantID: Directory or TenantID as per Azure portal
3-
ClientID: Azure Client ID or Application ID.
4-
Secret: Azure secret for client ID.
5-
SiteName: Name of the site to be looked up <- in url e.g https://{name}.sharepoint.com/sites/DataplanePython
1+
"""
2+
S3Client: Client for s3
3+
S3FilePath: /tmp/source.xlsx (needs UploadMethod="File")
4+
LocalFilePath: /General/hello.xlxs (where download method is File)
5+
Download Method: File or Object
66
FilePath: /General/hello.xlxs
7-
ProxyUse: Whether to use a proxy, true or false
8-
ProxyUrl: Proxy endpoint to use
9-
ProxyMethod: https or http, default https
107
"""
118

12-
def s3_download(Bucket, AccessKey, SecretKey, S3FilePath, DownloadMethod="Object", LocalFilePath="", ProxyUse=False, ProxyUrl="", ProxyMethod="https", EndPointUrl=None):
9+
def s3_download(S3Client, Bucket, S3FilePath, DownloadMethod="Object", LocalFilePath=""):
1310

14-
import boto3
15-
from botocore.exceptions import ClientError
1611
from datetime import datetime
1712
from io import BytesIO
1813

1914
# Start the timer
2015
start = datetime.now()
21-
22-
# Connect to S3 Bucket
23-
24-
if ProxyUse:
25-
proxies = {ProxyMethod: ProxyUrl}
26-
else:
27-
proxies = {}
2816

29-
s3_conn = boto3.resource(
30-
"s3",
31-
endpoint_url = EndPointUrl,
32-
aws_access_key_id=AccessKey,
33-
aws_secret_access_key=SecretKey,
34-
aws_session_token=None,
35-
config=boto3.session.Config(
36-
signature_version='s3v4',
37-
proxies=proxies
38-
),
39-
)
17+
# Download S3 file content to file - uses multi threaded download
18+
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_fileobj
4019

41-
bucket_conn = s3_conn.Bucket(Bucket)
20+
if DownloadMethod == "File":
21+
if LocalFilePath == "":
22+
duration = datetime.now() - start
23+
return {"result":"Fail", "duration": str(duration), "reason":"File method requires a local file file path"}
4224

43-
try:
44-
_ = bucket_conn.creation_date
45-
except ClientError as e:
46-
duration = datetime.now() - start
47-
return {
48-
"result":"Fail",
49-
"reason":"Auth fail",
50-
"duration": str(duration),
51-
"status": None,
52-
"error": e
53-
}
54-
55-
# Download S3 file content
56-
r=BytesIO()
57-
print(S3FilePath[1:])
58-
bucket_conn.download_fileobj(S3FilePath[1:], r)
25+
with open(LocalFilePath, 'wb') as data:
26+
S3Client.download_fileobj(Bucket=Bucket, Key=S3FilePath, Fileobj=data)
5927

60-
if DownloadMethod == "File":
61-
with open(LocalFilePath, 'wb') as f:
62-
r.seek(0)
63-
f.write(r.read())
6428
duration = datetime.now() - start
65-
return {"result":"OK", "duration": str(duration), "status": None, "FilePath": LocalFilePath}
29+
return {"result":"OK", "duration": str(duration), "FilePath": LocalFilePath}
30+
31+
# Download S3 file content to object
32+
objectGet = S3Client.get_object(Bucket=Bucket, Key=S3FilePath, ChecksumMode='ENABLED')["Body"].read()
6633

6734
duration = datetime.now() - start
68-
return {"result":"OK", "duration": str(duration), "status": None, "content": r}
35+
return {"result":"OK", "duration": str(duration), "content": objectGet}

src/dataplane/DataStorage/s3/s3_upload.py

Lines changed: 8 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,28 @@
11
"""
2-
AccessKey: S3 Access Key.
3-
SecretKey: S3 Secret Key.
2+
S3Client: Client for s3
43
SourceFilePath: /tmp/source.xlsx (needs UploadMethod="File")
54
TargetFilePath: /General/hello.xlxs
6-
UploadMethod
7-
ProxyUse: Whether to use a proxy, true or false
8-
ProxyUrl: Proxy endpoint to use
9-
ProxyMethod: https or http, default https
10-
EndPointUrl: Custom endpoint URL (eg. for minio)
5+
UploadMethod: Object or File
116
"""
127

13-
def s3_upload(Bucket, AccessKey, SecretKey, TargetFilePath, SourceFilePath="/tmp/default.txt", UploadMethod="Object", UploadObject="", ProxyUse=False, ProxyUrl="", ProxyMethod="https", EndPointUrl=None):
8+
def s3_upload(S3Client, Bucket, TargetFilePath, SourceFilePath="/tmp/default.txt", UploadMethod="Object", UploadObject=""):
149

15-
16-
import boto3
17-
from botocore.config import Config
18-
from botocore.exceptions import ClientError
1910
from datetime import datetime
2011
from io import BytesIO
21-
import os
2212

2313
start = datetime.now()
2414

2515
# Connect to S3 Bucket
2616

27-
if ProxyUse:
28-
proxies = {ProxyMethod: ProxyUrl}
29-
else:
30-
proxies = {}
31-
32-
s3_conn = boto3.resource(
33-
"s3",
34-
endpoint_url = EndPointUrl,
35-
aws_access_key_id=AccessKey,
36-
aws_secret_access_key=SecretKey,
37-
aws_session_token=None,
38-
config=boto3.session.Config(
39-
signature_version='s3v4',
40-
proxies=proxies
41-
),
42-
)
43-
44-
bucket_conn = s3_conn.Bucket(Bucket)
45-
46-
try:
47-
_ = bucket_conn.creation_date
48-
except ClientError as e:
49-
duration = datetime.now() - start
50-
return {
51-
"result":"Fail",
52-
"reason":"Auth fail",
53-
"duration": str(duration),
54-
"status": None,
55-
"error": e
56-
}
57-
5817
# ====== Obtain the file from disk ======
5918
if UploadMethod == "File":
60-
UploadObject = open(SourceFilePath, 'rb')
61-
else:
62-
temp_file = BytesIO()
63-
temp_file.write(UploadObject)
64-
temp_file.seek(0)
65-
UploadObject = temp_file
19+
with open(SourceFilePath, 'rb') as data:
20+
S3Client.upload_fileobj(Fileobj=data, Bucket=Bucket, Key=TargetFilePath)
21+
duration = datetime.now() - start
22+
return {"result":"OK", "duration": str(duration), "Path":TargetFilePath}
6623

6724
# ====== Upload file using boto3 upload_file ======
68-
bucket_conn.upload_fileobj(UploadObject, TargetFilePath)
25+
S3Client.upload_fileobj(Fileobj=BytesIO(UploadObject), Bucket=Bucket, Key=TargetFilePath)
6926

7027
duration = datetime.now() - start
7128

src/dataplane/DataStorage/s3/test_s3.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11

22
import os
3-
from dataplane.DataStorage.s3.s3_upload import s3_upload
4-
from dataplane.DataStorage.s3.s3_download import s3_download
3+
from .s3_upload import s3_upload
4+
from .s3_download import s3_download
55
from nanoid import generate
66
import os
77
from dotenv import load_dotenv
8+
import boto3
9+
from botocore.client import Config
810

911
def test_s3():
1012

@@ -15,11 +17,21 @@ def test_s3():
1517

1618
# Sharepoint connection
1719
load_dotenv()
18-
1920
RUN_ID = os.environ["DP_RUNID"]
20-
BUCKET = os.getenv('BUCKET')
21-
ACCESS_KEY = os.getenv('ACCESS_KEY')
22-
SECRET_KEY = os.getenv('SECRET_KEY')
21+
22+
# S3 connection
23+
S3Connect = boto3.client(
24+
's3',
25+
endpoint_url="http://minio:9000",
26+
aws_access_key_id="admin",
27+
aws_secret_access_key="hello123",
28+
config=Config(signature_version='s3v4'),
29+
region_name='us-east-1'
30+
31+
)
32+
33+
bucket = "dataplanebucket"
34+
2335
CURRENT_DIRECTORY = os.path.realpath(os.path.dirname(__file__))
2436

2537
if os.path.exists(CURRENT_DIRECTORY+"/test_cities_delete.csv"):
@@ -29,28 +41,23 @@ def test_s3():
2941
# s3_upload(Bucket, AccessKey, SecretKey, TargetFilePath, SourceFilePath="/tmp/default.txt", UploadMethod="Object", UploadObject="", ProxyUse=False, ProxyUrl="", ProxyMethod="https", EndPointUrl=None)
3042
print(CURRENT_DIRECTORY)
3143
# Store the data with key hello - run id will be attached
32-
rs = s3_upload(Bucket=BUCKET,
33-
AccessKey=ACCESS_KEY,
34-
SecretKey=SECRET_KEY,
35-
TargetFilePath=f"/General/myfile {RUN_ID}.csv",
36-
SourceFilePath=CURRENT_DIRECTORY+"/test_cities.csv",
37-
UploadMethod="File",
38-
EndPointUrl="http://minio:9000"
44+
rs = s3_upload(Bucket=bucket,
45+
S3Client=S3Connect,
46+
TargetFilePath=f"/s3test/myfile {RUN_ID}.csv",
47+
SourceFilePath=CURRENT_DIRECTORY+"/test_s3_cities.csv",
48+
UploadMethod="File"
3949
)
4050
print(rs)
4151
assert rs["result"]=="OK"
4252

4353

4454
# ---------- RETRIEVE PARQUET FROM S3 ------------
4555

46-
rs = s3_download(Bucket=BUCKET,
47-
AccessKey=ACCESS_KEY,
48-
SecretKey=SECRET_KEY,
49-
S3FilePath=f"/General/myfile {RUN_ID}.csv",
56+
rs = s3_download(Bucket=bucket,
57+
S3Client=S3Connect,
58+
S3FilePath=f"/s3test/myfile {RUN_ID}.csv",
5059
LocalFilePath=CURRENT_DIRECTORY+"/test_cities_delete.csv",
51-
DownloadMethod="File",
52-
ProxyUse=False, ProxyUrl="", ProxyMethod="https",
53-
EndPointUrl='http://minio:9000'
60+
DownloadMethod="File"
5461
)
5562
print(rs)
5663
assert rs["result"]=="OK"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"LatD", "LatM", "LatS", "NS", "LonD", "LonM", "LonS", "EW", "City", "State"
2+
41, 5, 59, "N", 80, 39, 0, "W", "Youngstown", OH
3+
42, 52, 48, "N", 97, 23, 23, "W", "Yankton", SD
4+
46, 35, 59, "N", 120, 30, 36, "W", "Yakima", WA
5+
42, 16, 12, "N", 71, 48, 0, "W", "Worcester", MA
6+
43, 37, 48, "N", 89, 46, 11, "W", "Wisconsin Dells", WI
7+
36, 5, 59, "N", 80, 15, 0, "W", "Winston-Salem", NC
8+
49, 52, 48, "N", 97, 9, 0, "W", "Winnipeg", MB
9+
39, 11, 23, "N", 78, 9, 36, "W", "Winchester", VA
10+
34, 14, 24, "N", 77, 55, 11, "W", "Wilmington", NC
11+
39, 45, 0, "N", 75, 33, 0, "W", "Wilmington", DE
12+
48, 9, 0, "N", 103, 37, 12, "W", "Williston", ND
13+
41, 15, 0, "N", 77, 0, 0, "W", "Williamsport", PA
14+
37, 40, 48, "N", 82, 16, 47, "W", "Williamson", WV

src/dataplane/DataStorage/s3/test_s3_nonroot.py

Lines changed: 0 additions & 64 deletions
This file was deleted.

0 commit comments

Comments
 (0)