-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Tyler
committed
Nov 16, 2021
1 parent
49c7209
commit 3eb8bfa
Showing
7 changed files
with
166 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
/aws-config | ||
/Extractions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM python:3.8-slim-buster | ||
WORKDIR /aws-s3 | ||
|
||
COPY requirements.txt . | ||
#Install Amazon sdk | ||
RUN pip install -r requirements.txt | ||
#Install os packages | ||
RUN apt-get update && apt-get install -yq curl unzip | ||
#Install aws cli | ||
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" | ||
RUN unzip awscliv2.zip | ||
RUN ./aws/install | ||
|
||
ENTRYPOINT python main.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# AWS S3 Bucket Extractor | ||
![s3-Extractor](demo_gif.gif) | ||
## Prerequisites | ||
|
||
- Docker | ||
- AWS Credentials (Access Key ID, Secret Access Key) | ||
|
||
## Existing AWS CLI credentials | ||
- If you have previously installed and configured AWS cli on your host, copy your configuration files (config & credentials) from `%UserProfile%/.aws` (windows) or `$HOME/.aws` (Linux) to the respective file in `aws-config` | ||
|
||
## Quick Start | ||
1. RUN `docker-compose run --rm s3-extract-cli` | ||
2. Enter AWS cli configuration details (follow interactive prompts) | ||
- AWS Access Key ID | ||
- AWS Secret Access Key | ||
- Default region name (ex: us-east-1) | ||
- Default output format (ex: json) | ||
3. Enter the target S3 URI address (ex: s3://bucket_name/subfolder) `NOTE: This can be copied to your clipboard on the AWS S3 Web Console` | ||
|
||
## SubFolders | ||
- The Extractor preserves the last subfolder in the path and extracts recursively | ||
so all files beneath the given bucket/subdirectory will be downloaded | ||
|
||
## Extracting All Files From a Bucket | ||
- To extract all files from a single s3 bucket use the following s3 URI syntax (ex: s3://bucket_name/*) | ||
- Nested Subdirectory structure is not currently preserved | ||
- *DISCLAIMER* - Downloading all files from a bucket is not recommended and can take a long time depending | ||
on how many objects and their sizes. I recommend isolating files into a subfolder and targeting that folder and any subfolder that fall beneath it. | ||
## Output | ||
- By default files will be downloaded into a normal directory in the `Extractions` folder | ||
- Extractor will give the option to `compress` the folder to a `.zip` format | ||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
version: '2.1' | ||
services: | ||
s3-extract-cli: | ||
build: . | ||
container_name: s3-extractor | ||
working_dir: /aws-s3 | ||
stdin_open: true | ||
volumes: | ||
- .:/aws-s3 | ||
- ./aws-config:/root/.aws/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import boto3 | ||
import os | ||
from tqdm import tqdm | ||
from shutil import make_archive | ||
|
||
def confirm(prompt): | ||
answer = "" | ||
while answer not in ["y", "n"]: | ||
answer = input(prompt).lower() | ||
return answer == "y" | ||
|
||
#Set AWS cli config | ||
def aws_login(): | ||
"""Prompts user for aws credentials and default config | ||
via the aws cli | ||
Returns: | ||
boolean: status of login | ||
""" | ||
if(not os.path.exists('./aws-config/credentials')): | ||
while not os.path.exists('./aws-config/credentials'): | ||
print('\n|-----⚙️ AWS Configuration⚙️-----|\n') | ||
os.system('aws configure') | ||
return True | ||
print('\nCongrats you have pre-configured credentials 🔥👏\n') | ||
os.system('aws configure list') | ||
if confirm('\nWould you like to logout and re-setup your configuration? [Y/N] -> '): | ||
os.unlink('./aws-config/credentials') | ||
aws_login() | ||
return True | ||
|
||
#Authenticate | ||
aws_login() | ||
|
||
# Init s3 resource | ||
s3 = boto3.resource('s3') | ||
|
||
#set output directory | ||
output_dir = 'Extractions' | ||
|
||
#set base directory | ||
base_dir = os.path.join(os.path.abspath(os.curdir),output_dir) | ||
|
||
|
||
|
||
def extract_bucket_contents(bucket_name,folder_name): | ||
"""Extracts the contents of a given AWS s3 bucket | ||
Args: | ||
bucket_name (string): Name of the bucket | ||
folder_name (string): Subfolder name within the bucket | ||
""" | ||
bucket = s3.Bucket(bucket_name) | ||
objects = bucket.objects.filter(Prefix=folder_name) | ||
total_objects = sum(1 for _ in objects) | ||
if total_objects > 0: | ||
os.makedirs(f'{base_dir}/{bucket_name}/{folder_name}',exist_ok=True) | ||
os.chdir(f'{base_dir}/{bucket_name}/{folder_name}') | ||
print(f'\nExtracting {total_objects} object(s) from s3://{bucket_name}/{folder_name}...\n') | ||
with tqdm(total=total_objects,ncols=100,desc="Download Progress") as pbar: | ||
for obj in objects: | ||
pbar.update(1) | ||
path, filename = os.path.split(obj.key) | ||
if filename: | ||
if not os.path.exists(filename): | ||
bucket.download_file(obj.key,filename) | ||
os.chdir(base_dir) | ||
if confirm("\nWould you also like to compress the bucket contents to a zip file? [Y/N] -> "): | ||
print(f'\nWriting zip file {os.path.join(base_dir,bucket_name)}.zip...') | ||
make_archive(bucket_name,'zip',os.path.join(os.curdir,bucket_name)) | ||
else: | ||
print(f'\nNo objects found at the given location: s3://{bucket_name}/{folder_name}') | ||
|
||
def get_s3_target(): | ||
"""Retrives a s3 URI address from the user and | ||
parses it to return the associated bucket name and folder\prefix path | ||
Returns: | ||
tuple: bucket name | folder path | ||
""" | ||
s3_uri = input('\nEnter s3 URI (ex: s3://bucket_name/subfolder) -> ') | ||
while s3_uri == '': | ||
print('\n**s3 URI is required!**') | ||
s3_uri = input('\nEnter s3 URI (ex: s3://bucket_name/subfolder) -> ') | ||
try: | ||
bucket_name, folder_name = s3_uri.replace("s3://", "").split("/", 1) | ||
return bucket_name,folder_name.strip('*') | ||
except ValueError: | ||
print(f'\n** Error parsing s3 URI please try again **') | ||
|
||
|
||
def main(): | ||
try: | ||
print("\n\n|------🪣 S3 Bucket Extractor🪣------|\n") | ||
while True: | ||
bucket_name, folder_name = get_s3_target() | ||
extract_bucket_contents(bucket_name,folder_name) | ||
except Exception as e: | ||
print(f'\n** Error extracting bucket contents: {e} **') | ||
main() | ||
|
||
|
||
|
||
if __name__ == '__main__': | ||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
boto3 | ||
tqdm |