Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions filename
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"LatD", "LatM", "LatS", "NS", "LonD", "LonM", "LonS", "EW", "City", "State"
41, 5, 59, "N", 80, 39, 0, "W", "Youngstown", OH
42, 52, 48, "N", 97, 23, 23, "W", "Yankton", SD
46, 35, 59, "N", 120, 30, 36, "W", "Yakima", WA
42, 16, 12, "N", 71, 48, 0, "W", "Worcester", MA
43, 37, 48, "N", 89, 46, 11, "W", "Wisconsin Dells", WI
36, 5, 59, "N", 80, 15, 0, "W", "Winston-Salem", NC
49, 52, 48, "N", 97, 9, 0, "W", "Winnipeg", MB
39, 11, 23, "N", 78, 9, 36, "W", "Winchester", VA
34, 14, 24, "N", 77, 55, 11, "W", "Wilmington", NC
39, 45, 0, "N", 75, 33, 0, "W", "Wilmington", DE
48, 9, 0, "N", 103, 37, 12, "W", "Williston", ND
41, 15, 0, "N", 77, 0, 0, "W", "Williamsport", PA
37, 40, 48, "N", 82, 16, 47, "W", "Williamson", WV
Empty file.
35 changes: 35 additions & 0 deletions src/dataplane/DataStorage/s3/s3_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
S3Client: Client for s3
S3FilePath: /tmp/source.xlsx (needs UploadMethod="File")
LocalFilePath: /General/hello.xlxs (where download method is File)
Download Method: File or Object
FilePath: /General/hello.xlxs
"""

def s3_download(S3Client, Bucket, S3FilePath, DownloadMethod="Object", LocalFilePath=""):

from datetime import datetime
from io import BytesIO

# Start the timer
start = datetime.now()

# Download S3 file content to file - uses multi threaded download
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_fileobj

if DownloadMethod == "File":
if LocalFilePath == "":
duration = datetime.now() - start
return {"result":"Fail", "duration": str(duration), "reason":"File method requires a local file file path"}

with open(LocalFilePath, 'wb') as data:
S3Client.download_fileobj(Bucket=Bucket, Key=S3FilePath, Fileobj=data)

duration = datetime.now() - start
return {"result":"OK", "duration": str(duration), "FilePath": LocalFilePath}

# Download S3 file content to object
objectGet = S3Client.get_object(Bucket=Bucket, Key=S3FilePath, ChecksumMode='ENABLED')["Body"].read()

duration = datetime.now() - start
return {"result":"OK", "duration": str(duration), "content": objectGet}
29 changes: 29 additions & 0 deletions src/dataplane/DataStorage/s3/s3_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
S3Client: Client for s3
SourceFilePath: /tmp/source.xlsx (needs UploadMethod="File")
TargetFilePath: /General/hello.xlxs
UploadMethod: Object or File
"""

def s3_upload(S3Client, Bucket, TargetFilePath, SourceFilePath="/tmp/default.txt", UploadMethod="Object", UploadObject=""):

from datetime import datetime
from io import BytesIO

start = datetime.now()

# Connect to S3 Bucket

# ====== Obtain the file from disk ======
if UploadMethod == "File":
with open(SourceFilePath, 'rb') as data:
S3Client.upload_fileobj(Fileobj=data, Bucket=Bucket, Key=TargetFilePath)
duration = datetime.now() - start
return {"result":"OK", "duration": str(duration), "Path":TargetFilePath}

# ====== Upload file using boto3 upload_file ======
S3Client.upload_fileobj(Fileobj=BytesIO(UploadObject), Bucket=Bucket, Key=TargetFilePath)

duration = datetime.now() - start

return {"result":"OK", "duration": str(duration)}
71 changes: 71 additions & 0 deletions src/dataplane/DataStorage/s3/test_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

import os
from .s3_upload import s3_upload
from .s3_download import s3_download
from nanoid import generate
import os
from dotenv import load_dotenv
import boto3
from botocore.client import Config

def test_s3():

# ---------- DATAPLANE RUN ------------

# Dataplane run id
os.environ["DP_RUNID"] = generate('1234567890abcdef', 10)

# Sharepoint connection
load_dotenv()
RUN_ID = os.environ["DP_RUNID"]

# S3 connection
S3Connect = boto3.client(
's3',
endpoint_url="http://minio:9000",
aws_access_key_id="admin",
aws_secret_access_key="hello123",
config=Config(signature_version='s3v4'),
region_name='us-east-1'

)

bucket = "dataplanebucket"

CURRENT_DIRECTORY = os.path.realpath(os.path.dirname(__file__))

if os.path.exists(CURRENT_DIRECTORY+"/test_cities_delete.csv"):
os.remove(CURRENT_DIRECTORY+"/test_cities_delete.csv")

# ---------- STORE File to Sharepoint ------------
# s3_upload(Bucket, AccessKey, SecretKey, TargetFilePath, SourceFilePath="/tmp/default.txt", UploadMethod="Object", UploadObject="", ProxyUse=False, ProxyUrl="", ProxyMethod="https", EndPointUrl=None)
print(CURRENT_DIRECTORY)
# Store the data with key hello - run id will be attached
rs = s3_upload(Bucket=bucket,
S3Client=S3Connect,
TargetFilePath=f"/s3test/myfile {RUN_ID}.csv",
SourceFilePath=CURRENT_DIRECTORY+"/test_s3_cities.csv",
UploadMethod="File"
)
print(rs)
assert rs["result"]=="OK"


# ---------- RETRIEVE PARQUET FROM S3 ------------

rs = s3_download(Bucket=bucket,
S3Client=S3Connect,
S3FilePath=f"/s3test/myfile {RUN_ID}.csv",
LocalFilePath=CURRENT_DIRECTORY+"/test_cities_delete.csv",
DownloadMethod="File"
)
print(rs)
assert rs["result"]=="OK"
# Get the data
# rsget = S3Get(StoreKey="s3me", S3Client=S3Connect, Bucket=bucket)
# print(rsget)
# df = rsget["dataframe"]
# print(df.shape[0])
# # Test before and after rows
# assert df.shape[0] == dfrows
# assert rsget["result"]=="OK"
14 changes: 14 additions & 0 deletions src/dataplane/DataStorage/s3/test_s3_cities.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"LatD", "LatM", "LatS", "NS", "LonD", "LonM", "LonS", "EW", "City", "State"
41, 5, 59, "N", 80, 39, 0, "W", "Youngstown", OH
42, 52, 48, "N", 97, 23, 23, "W", "Yankton", SD
46, 35, 59, "N", 120, 30, 36, "W", "Yakima", WA
42, 16, 12, "N", 71, 48, 0, "W", "Worcester", MA
43, 37, 48, "N", 89, 46, 11, "W", "Wisconsin Dells", WI
36, 5, 59, "N", 80, 15, 0, "W", "Winston-Salem", NC
49, 52, 48, "N", 97, 9, 0, "W", "Winnipeg", MB
39, 11, 23, "N", 78, 9, 36, "W", "Winchester", VA
34, 14, 24, "N", 77, 55, 11, "W", "Wilmington", NC
39, 45, 0, "N", 75, 33, 0, "W", "Wilmington", DE
48, 9, 0, "N", 103, 37, 12, "W", "Williston", ND
41, 15, 0, "N", 77, 0, 0, "W", "Williamsport", PA
37, 40, 48, "N", 82, 16, 47, "W", "Williamson", WV
77 changes: 77 additions & 0 deletions src/dataplane/DataStorage/s3/test_s3_object.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

import os
from .s3_download import s3_download
from .s3_upload import s3_upload
from nanoid import generate
import os
from dotenv import load_dotenv
import boto3
from botocore.client import Config

def test_s3_object():

# ---------- DATAPLANE RUN ------------

# Dataplane run id
os.environ["DP_RUNID"] = generate('1234567890abcdef', 10)

# Sharepoint connection
load_dotenv()

RUN_ID = os.environ["DP_RUNID"]

# S3 connection
S3Connect = boto3.client(
's3',
endpoint_url="http://minio:9000",
aws_access_key_id="admin",
aws_secret_access_key="hello123",
config=Config(signature_version='s3v4'),
region_name='us-east-1'

)

bucket = "dataplanebucket"

CURRENT_DIRECTORY = os.path.realpath(os.path.dirname(__file__))

if os.path.exists(CURRENT_DIRECTORY+"/test_cities_delete_object.csv"):
os.remove(CURRENT_DIRECTORY+"/test_cities_delete_object.csv")

# ---------- STORE File to S3 ------------
# s3_upload(Bucket, AccessKey, SecretKey, TargetFilePath, SourceFilePath="/tmp/default.txt", UploadMethod="Object", UploadObject="", ProxyUse=False, ProxyUrl="", ProxyMethod="https", EndPointUrl=None)
print(CURRENT_DIRECTORY)

FileSize = os.path.getsize(CURRENT_DIRECTORY+"/test_s3_cities.csv")
print("File size dir:", FileSize)
UploadObject = open(CURRENT_DIRECTORY+"/test_s3_cities.csv", 'rb').read()
# Store the data with key hello - run id will be attached
rs = s3_upload(Bucket=bucket,
S3Client=S3Connect,
TargetFilePath=f"/s3test/object myfile {RUN_ID}.csv",
UploadObject=UploadObject,
UploadMethod="Object"
)
print(rs)
assert rs["result"]=="OK"


# ---------- RETRIEVE FILE FROM S3 ------------
rs = s3_download(Bucket=bucket,
S3Client=S3Connect,
S3FilePath=f"/s3test/object myfile {RUN_ID}.csv",
DownloadMethod="Object"
)
print(rs)
assert rs["result"]=="OK"

with open(CURRENT_DIRECTORY+"/test_cities_delete_object.csv", 'wb') as f:
f.write(rs["content"])
# Get the data
# rsget = S3Get(StoreKey="s3me", S3Client=S3Connect, Bucket=bucket)
# print(rsget)
# df = rsget["dataframe"]
# print(df.shape[0])
# # Test before and after rows
# assert df.shape[0] == dfrows
# assert rsget["result"]=="OK"
7 changes: 7 additions & 0 deletions src/dataplane/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from dataplane.Microsoft.Sharepoint.sharepoint_download import sharepoint_download
from dataplane.Microsoft.Sharepoint.sharepoint_upload import sharepoint_upload

# Data storage
from dataplane.DataStorage.s3.s3_download import s3_download
from dataplane.DataStorage.s3.s3_upload import s3_upload

__all__ = [

Expand All @@ -34,4 +37,8 @@
"sharepoint_download",
"sharepoint_upload",

# Data storage providers
"s3_download",
"s3_upload"

]