dataplane-app · saul-data · Nov 2, 2022 · Nov 1, 2022 · Nov 2, 2022 · Nov 2, 2022
diff --git a/filename b/filename
@@ -0,0 +1,14 @@
+"LatD", "LatM", "LatS", "NS", "LonD", "LonM", "LonS", "EW", "City", "State"
+   41,    5,   59, "N",     80,   39,    0, "W", "Youngstown", OH
+   42,   52,   48, "N",     97,   23,   23, "W", "Yankton", SD
+   46,   35,   59, "N",    120,   30,   36, "W", "Yakima", WA
+   42,   16,   12, "N",     71,   48,    0, "W", "Worcester", MA
+   43,   37,   48, "N",     89,   46,   11, "W", "Wisconsin Dells", WI
+   36,    5,   59, "N",     80,   15,    0, "W", "Winston-Salem", NC
+   49,   52,   48, "N",     97,    9,    0, "W", "Winnipeg", MB
+   39,   11,   23, "N",     78,    9,   36, "W", "Winchester", VA
+   34,   14,   24, "N",     77,   55,   11, "W", "Wilmington", NC
+   39,   45,    0, "N",     75,   33,    0, "W", "Wilmington", DE
+   48,    9,    0, "N",    103,   37,   12, "W", "Williston", ND
+   41,   15,    0, "N",     77,    0,    0, "W", "Williamsport", PA
+   37,   40,   48, "N",     82,   16,   47, "W", "Williamson", WV
diff --git a/src/dataplane/DataStorage/s3/__init__.py b/src/dataplane/DataStorage/s3/__init__.py
diff --git a/src/dataplane/DataStorage/s3/s3_download.py b/src/dataplane/DataStorage/s3/s3_download.py
@@ -0,0 +1,35 @@
+""" 
+S3Client: Client for s3
+S3FilePath: /tmp/source.xlsx (needs UploadMethod="File")
+LocalFilePath: /General/hello.xlxs (where download method is File)
+Download Method: File or Object
+FilePath: /General/hello.xlxs
+"""
+
+def s3_download(S3Client, Bucket,  S3FilePath, DownloadMethod="Object", LocalFilePath=""):
+
+    from datetime import datetime
+    from io import BytesIO
+
+    # Start the timer
+    start = datetime.now()
+
+    # Download S3 file content to file - uses multi threaded download
+    # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_fileobj
+
+    if DownloadMethod == "File":
+        if LocalFilePath == "":
+            duration = datetime.now() - start
+            return {"result":"Fail", "duration": str(duration), "reason":"File method requires a local file file path"} 
+
+        with open(LocalFilePath, 'wb') as data:
+            S3Client.download_fileobj(Bucket=Bucket, Key=S3FilePath, Fileobj=data)
+
+        duration = datetime.now() - start
+        return {"result":"OK", "duration": str(duration), "FilePath": LocalFilePath} 
+
+    # Download S3 file content to object
+    objectGet = S3Client.get_object(Bucket=Bucket, Key=S3FilePath, ChecksumMode='ENABLED')["Body"].read()
+
+    duration = datetime.now() - start
+    return {"result":"OK", "duration": str(duration), "content": objectGet} 
diff --git a/src/dataplane/DataStorage/s3/s3_upload.py b/src/dataplane/DataStorage/s3/s3_upload.py
@@ -0,0 +1,29 @@
+"""
+S3Client: Client for s3
+SourceFilePath: /tmp/source.xlsx (needs UploadMethod="File")
+TargetFilePath: /General/hello.xlxs
+UploadMethod: Object or File
+"""
+
+def s3_upload(S3Client, Bucket, TargetFilePath, SourceFilePath="/tmp/default.txt", UploadMethod="Object", UploadObject=""):
+
+    from datetime import datetime
+    from io import BytesIO
+
+    start = datetime.now()
+
+    # Connect to S3 Bucket
+
+    # ====== Obtain the file from disk ======
+    if UploadMethod == "File":
+        with open(SourceFilePath, 'rb') as data:
+            S3Client.upload_fileobj(Fileobj=data, Bucket=Bucket, Key=TargetFilePath)
+            duration = datetime.now() - start
+            return {"result":"OK", "duration": str(duration), "Path":TargetFilePath}
+
+    # ====== Upload file using boto3 upload_file ======
+    S3Client.upload_fileobj(Fileobj=BytesIO(UploadObject), Bucket=Bucket, Key=TargetFilePath)
+
+    duration = datetime.now() - start
+
+    return {"result":"OK", "duration": str(duration)}
diff --git a/src/dataplane/DataStorage/s3/test_s3.py b/src/dataplane/DataStorage/s3/test_s3.py
@@ -0,0 +1,71 @@
+
+import os
+from .s3_upload import s3_upload
+from .s3_download import s3_download
+from nanoid import generate
+import os
+from dotenv import load_dotenv
+import boto3
+from botocore.client import Config
+
+def test_s3():
+
+    # ---------- DATAPLANE RUN ------------
+
+    # Dataplane run id
+    os.environ["DP_RUNID"] = generate('1234567890abcdef', 10)
+
+    # Sharepoint connection
+    load_dotenv()
+    RUN_ID = os.environ["DP_RUNID"]
+
+        # S3 connection
+    S3Connect = boto3.client(
+        's3',
+        endpoint_url="http://minio:9000",
+        aws_access_key_id="admin",
+        aws_secret_access_key="hello123",
+        config=Config(signature_version='s3v4'),
+        region_name='us-east-1'
+
+    )
+
+    bucket = "dataplanebucket"
+
+    CURRENT_DIRECTORY = os.path.realpath(os.path.dirname(__file__))
+
+    if os.path.exists(CURRENT_DIRECTORY+"/test_cities_delete.csv"):
+        os.remove(CURRENT_DIRECTORY+"/test_cities_delete.csv")
+
+    # ---------- STORE File to Sharepoint ------------
+    # s3_upload(Bucket, AccessKey, SecretKey, TargetFilePath, SourceFilePath="/tmp/default.txt", UploadMethod="Object", UploadObject="", ProxyUse=False, ProxyUrl="", ProxyMethod="https", EndPointUrl=None)
+    print(CURRENT_DIRECTORY)
+    # Store the data with key hello - run id will be attached
+    rs = s3_upload(Bucket=bucket,
+    S3Client=S3Connect,
+    TargetFilePath=f"/s3test/myfile {RUN_ID}.csv",
+    SourceFilePath=CURRENT_DIRECTORY+"/test_s3_cities.csv",
+    UploadMethod="File"
+    )
+    print(rs)
+    assert rs["result"]=="OK"
+
+
+    # ---------- RETRIEVE PARQUET FROM S3 ------------
+
+    rs = s3_download(Bucket=bucket,
+    S3Client=S3Connect,
+    S3FilePath=f"/s3test/myfile {RUN_ID}.csv",
+    LocalFilePath=CURRENT_DIRECTORY+"/test_cities_delete.csv",
+    DownloadMethod="File"
+    )
+    print(rs)
+    assert rs["result"]=="OK"
+    # Get the data
+    # rsget = S3Get(StoreKey="s3me", S3Client=S3Connect, Bucket=bucket)
+    # print(rsget)
+    # df = rsget["dataframe"]
+    # print(df.shape[0])
+    # # Test before and after rows
+    # assert df.shape[0] == dfrows
+    # assert rsget["result"]=="OK"
diff --git a/src/dataplane/DataStorage/s3/test_s3_cities.csv b/src/dataplane/DataStorage/s3/test_s3_cities.csv
@@ -0,0 +1,14 @@
+"LatD", "LatM", "LatS", "NS", "LonD", "LonM", "LonS", "EW", "City", "State"
+   41,    5,   59, "N",     80,   39,    0, "W", "Youngstown", OH
+   42,   52,   48, "N",     97,   23,   23, "W", "Yankton", SD
+   46,   35,   59, "N",    120,   30,   36, "W", "Yakima", WA
+   42,   16,   12, "N",     71,   48,    0, "W", "Worcester", MA
+   43,   37,   48, "N",     89,   46,   11, "W", "Wisconsin Dells", WI
+   36,    5,   59, "N",     80,   15,    0, "W", "Winston-Salem", NC
+   49,   52,   48, "N",     97,    9,    0, "W", "Winnipeg", MB
+   39,   11,   23, "N",     78,    9,   36, "W", "Winchester", VA
+   34,   14,   24, "N",     77,   55,   11, "W", "Wilmington", NC
+   39,   45,    0, "N",     75,   33,    0, "W", "Wilmington", DE
+   48,    9,    0, "N",    103,   37,   12, "W", "Williston", ND
+   41,   15,    0, "N",     77,    0,    0, "W", "Williamsport", PA
+   37,   40,   48, "N",     82,   16,   47, "W", "Williamson", WV
diff --git a/src/dataplane/DataStorage/s3/test_s3_object.py b/src/dataplane/DataStorage/s3/test_s3_object.py
@@ -0,0 +1,77 @@
+
+import os
+from .s3_download import s3_download
+from .s3_upload import s3_upload
+from nanoid import generate
+import os
+from dotenv import load_dotenv
+import boto3
+from botocore.client import Config
+
+def test_s3_object():
+
+    # ---------- DATAPLANE RUN ------------
+
+    # Dataplane run id
+    os.environ["DP_RUNID"] = generate('1234567890abcdef', 10)
+
+    # Sharepoint connection
+    load_dotenv()
+
+    RUN_ID = os.environ["DP_RUNID"]
+
+        # S3 connection
+    S3Connect = boto3.client(
+        's3',
+        endpoint_url="http://minio:9000",
+        aws_access_key_id="admin",
+        aws_secret_access_key="hello123",
+        config=Config(signature_version='s3v4'),
+        region_name='us-east-1'
+
+    )
+
+    bucket = "dataplanebucket"
+
+    CURRENT_DIRECTORY = os.path.realpath(os.path.dirname(__file__))
+
+    if os.path.exists(CURRENT_DIRECTORY+"/test_cities_delete_object.csv"):
+        os.remove(CURRENT_DIRECTORY+"/test_cities_delete_object.csv")
+
+    # ---------- STORE File to S3 ------------
+    # s3_upload(Bucket, AccessKey, SecretKey, TargetFilePath, SourceFilePath="/tmp/default.txt", UploadMethod="Object", UploadObject="", ProxyUse=False, ProxyUrl="", ProxyMethod="https", EndPointUrl=None)
+    print(CURRENT_DIRECTORY)
+
+    FileSize = os.path.getsize(CURRENT_DIRECTORY+"/test_s3_cities.csv")
+    print("File size dir:", FileSize)
+    UploadObject = open(CURRENT_DIRECTORY+"/test_s3_cities.csv", 'rb').read()
+    # Store the data with key hello - run id will be attached
+    rs = s3_upload(Bucket=bucket, 
+    S3Client=S3Connect,
+    TargetFilePath=f"/s3test/object myfile {RUN_ID}.csv",
+    UploadObject=UploadObject,
+    UploadMethod="Object"
+    )
+    print(rs)
+    assert rs["result"]=="OK"
+
+
+    # ---------- RETRIEVE FILE FROM S3 ------------
+    rs = s3_download(Bucket=bucket, 
+    S3Client=S3Connect,
+    S3FilePath=f"/s3test/object myfile {RUN_ID}.csv",
+    DownloadMethod="Object"
+    )
+    print(rs)
+    assert rs["result"]=="OK"
+
+    with open(CURRENT_DIRECTORY+"/test_cities_delete_object.csv", 'wb') as f:
+        f.write(rs["content"])
+    # Get the data
+    # rsget = S3Get(StoreKey="s3me", S3Client=S3Connect, Bucket=bucket)
+    # print(rsget)
+    # df = rsget["dataframe"]
+    # print(df.shape[0])
+    # # Test before and after rows
+    # assert df.shape[0] == dfrows
+    # assert rsget["result"]=="OK"
diff --git a/src/dataplane/__init__.py b/src/dataplane/__init__.py
@@ -17,6 +17,9 @@
 from dataplane.Microsoft.Sharepoint.sharepoint_download import sharepoint_download
 from dataplane.Microsoft.Sharepoint.sharepoint_upload import sharepoint_upload
 
+# Data storage
+from dataplane.DataStorage.s3.s3_download import s3_download
+from dataplane.DataStorage.s3.s3_upload import s3_upload
 
 __all__ = [
 
@@ -34,4 +37,8 @@
     "sharepoint_download", 
     "sharepoint_upload",
 
+    # Data storage providers
+    "s3_download",
+    "s3_upload"
+
     ]