Skip to content

Commit

Permalink
Add Project files
Browse files Browse the repository at this point in the history
  • Loading branch information
Tyler committed Nov 16, 2021
1 parent 49c7209 commit 3eb8bfa
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/aws-config
/Extractions
14 changes: 14 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM python:3.8-slim-buster
WORKDIR /aws-s3

COPY requirements.txt .
#Install Amazon sdk
RUN pip install -r requirements.txt
#Install os packages
RUN apt-get update && apt-get install -yq curl unzip
#Install aws cli
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
RUN unzip awscliv2.zip
RUN ./aws/install

ENTRYPOINT python main.py
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# AWS S3 Bucket Extractor
![s3-Extractor](demo_gif.gif)
## Prerequisites

- Docker
- AWS Credentials (Access Key ID, Secret Access Key)

## Existing AWS CLI credentials
- If you have previously installed and configured AWS cli on your host, copy your configuration files (config & credentials) from `%UserProfile%/.aws` (windows) or `$HOME/.aws` (Linux) to the respective file in `aws-config`

## Quick Start
1. RUN `docker-compose run --rm s3-extract-cli`
2. Enter AWS cli configuration details (follow interactive prompts)
- AWS Access Key ID
- AWS Secret Access Key
- Default region name (ex: us-east-1)
- Default output format (ex: json)
3. Enter the target S3 URI address (ex: s3://bucket_name/subfolder) `NOTE: This can be copied to your clipboard on the AWS S3 Web Console`

## SubFolders
- The Extractor preserves the last subfolder in the path and extracts recursively
so all files beneath the given bucket/subdirectory will be downloaded

## Extracting All Files From a Bucket
- To extract all files from a single s3 bucket use the following s3 URI syntax (ex: s3://bucket_name/*)
- Nested Subdirectory structure is not currently preserved
- *DISCLAIMER* - Downloading all files from a bucket is not recommended and can take a long time depending
on how many objects and their sizes. I recommend isolating files into a subfolder and targeting that folder and any subfolder that fall beneath it.
## Output
- By default files will be downloaded into a normal directory in the `Extractions` folder
- Extractor will give the option to `compress` the folder to a `.zip` format

Binary file added demo_gif.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 10 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: '2.1'
services:
s3-extract-cli:
build: .
container_name: s3-extractor
working_dir: /aws-s3
stdin_open: true
volumes:
- .:/aws-s3
- ./aws-config:/root/.aws/
106 changes: 106 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import boto3
import os
from tqdm import tqdm
from shutil import make_archive

def confirm(prompt):
answer = ""
while answer not in ["y", "n"]:
answer = input(prompt).lower()
return answer == "y"

#Set AWS cli config
def aws_login():
"""Prompts user for aws credentials and default config
via the aws cli
Returns:
boolean: status of login
"""
if(not os.path.exists('./aws-config/credentials')):
while not os.path.exists('./aws-config/credentials'):
print('\n|-----⚙️ AWS Configuration⚙️-----|\n')
os.system('aws configure')
return True
print('\nCongrats you have pre-configured credentials 🔥👏\n')
os.system('aws configure list')
if confirm('\nWould you like to logout and re-setup your configuration? [Y/N] -> '):
os.unlink('./aws-config/credentials')
aws_login()
return True

#Authenticate
aws_login()

# Init s3 resource
s3 = boto3.resource('s3')

#set output directory
output_dir = 'Extractions'

#set base directory
base_dir = os.path.join(os.path.abspath(os.curdir),output_dir)



def extract_bucket_contents(bucket_name,folder_name):
"""Extracts the contents of a given AWS s3 bucket
Args:
bucket_name (string): Name of the bucket
folder_name (string): Subfolder name within the bucket
"""
bucket = s3.Bucket(bucket_name)
objects = bucket.objects.filter(Prefix=folder_name)
total_objects = sum(1 for _ in objects)
if total_objects > 0:
os.makedirs(f'{base_dir}/{bucket_name}/{folder_name}',exist_ok=True)
os.chdir(f'{base_dir}/{bucket_name}/{folder_name}')
print(f'\nExtracting {total_objects} object(s) from s3://{bucket_name}/{folder_name}...\n')
with tqdm(total=total_objects,ncols=100,desc="Download Progress") as pbar:
for obj in objects:
pbar.update(1)
path, filename = os.path.split(obj.key)
if filename:
if not os.path.exists(filename):
bucket.download_file(obj.key,filename)
os.chdir(base_dir)
if confirm("\nWould you also like to compress the bucket contents to a zip file? [Y/N] -> "):
print(f'\nWriting zip file {os.path.join(base_dir,bucket_name)}.zip...')
make_archive(bucket_name,'zip',os.path.join(os.curdir,bucket_name))
else:
print(f'\nNo objects found at the given location: s3://{bucket_name}/{folder_name}')

def get_s3_target():
"""Retrives a s3 URI address from the user and
parses it to return the associated bucket name and folder\prefix path
Returns:
tuple: bucket name | folder path
"""
s3_uri = input('\nEnter s3 URI (ex: s3://bucket_name/subfolder) -> ')
while s3_uri == '':
print('\n**s3 URI is required!**')
s3_uri = input('\nEnter s3 URI (ex: s3://bucket_name/subfolder) -> ')
try:
bucket_name, folder_name = s3_uri.replace("s3://", "").split("/", 1)
return bucket_name,folder_name.strip('*')
except ValueError:
print(f'\n** Error parsing s3 URI please try again **')


def main():
try:
print("\n\n|------🪣 S3 Bucket Extractor🪣------|\n")
while True:
bucket_name, folder_name = get_s3_target()
extract_bucket_contents(bucket_name,folder_name)
except Exception as e:
print(f'\n** Error extracting bucket contents: {e} **')
main()



if __name__ == '__main__':
main()

2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
boto3
tqdm

0 comments on commit 3eb8bfa

Please sign in to comment.