Skip to content

Commit

Permalink
image upload done
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunprakash027 committed Nov 13, 2023
1 parent 9584a6f commit 6543ea4
Show file tree
Hide file tree
Showing 15 changed files with 507 additions and 59 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ IMG_3046.jpg
airlift_linux
# Mac/OSX
.DS_Store
client_secrets.json
settings.yml
marker_data_demo
upload_test_copy.sh
Binary file modified airlift/__pycache__/airtable_client.cpython-310.pyc
Binary file not shown.
Binary file modified airlift/__pycache__/cli.cpython-310.pyc
Binary file not shown.
Binary file modified airlift/__pycache__/cli_args.cpython-310.pyc
Binary file not shown.
Binary file modified airlift/__pycache__/csv_data.cpython-310.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified airlift/__pycache__/json_data.cpython-310.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion airlift/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def cli(*argv: str) -> None:
suffix = pathlib.Path(args.csv_file.name).suffix

if "csv" in suffix:
data = csv_read(args.csv_file,args.fail_on_duplicate_csv_columns)
data = csv_read(args.csv_file,args.fail_on_duplicate_csv_columns,args.attachment_columns,args.dropbox_token)
elif "json" in suffix:
data = json_read(args.csv_file,args.fail_on_duplicate_csv_columns)
else:
Expand Down
79 changes: 43 additions & 36 deletions airlift/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,60 +20,67 @@ def parse_args(argv: Sequence[str]) -> argparse.Namespace:
)

schema: ArgSchema = {
"POSITIONAL":{
"csv_file":{
"type":Path,
"help":"CSV or JSON file to upload",
"metavar":"FILE",
"POSITIONAL": {
"csv_file": {
"type": Path,
"help": "CSV or JSON file to upload",
"metavar": "FILE",
}
},
"general_options":{
"--token":{
"help":"your Airtable personal access token",
"required":True,
"general_options": {
"--token": {
"help": "your Airtable personal access token",
"required": True,
},
"--base":{
"help":"your Airtable Base ID",
"required":True,
"--base": {
"help": "your Airtable Base ID",
"required": True,
},
"--table":{
"help":"your Airtable Table ID",
"required":True,
"--table": {
"help": "your Airtable Table ID",
"required": True,
},
"--log":{
"type":Path,
"metavar":"FILE",
"help":"file to store program log",
"--log": {
"type": Path,
"metavar": "FILE",
"help": "file to store program log",
},
"--verbose":{
"action":"store_true",
"help":"output debug information",
"--verbose": {
"action": "store_true",
"help": "output debug information",
},
"--version": {
"action": "version",
"version": f"%(prog)s {__version__}",
},
"--workers":{
"type":int,
"help":"total number of worker threads to upload your data (default: 1)"
"--workers": {
"type": int,
"help": "total number of worker threads to upload your data (default: 1)"
},
("-h", "--help"): {
"action": "help",
"help": "show this help message and exit",
},
"--dropbox-token":{
"help":"enter your dropbox token here",
},

},
"column options":{
"--disable-bypass-column-creation":{
"action":"store_true",
"help": (
"creates new columns that are not present in Airtable's table"
),
},
"column_options": {
"--disable-bypass-column-creation": {
"action": "store_true",
"help": "creates new columns that are not present in Airtable's table",
},
"--attachment-columns": {
"nargs": "+",
"help": "specify one or more arguments",
"metavar": "ATTTACHMENT_COLUMNS",
},
},
"validation options":{
"--fail-on-duplicate-csv-columns":{
"action":"store_true",
"help":(
"validation_options": {
"--fail-on-duplicate-csv-columns": {
"action": "store_true",
"help": (
"fail if CSV has duplicate columns"
"\notherwise first column will be used"
),
Expand Down
89 changes: 69 additions & 20 deletions airlift/csv_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
from typing import Any, Dict, Iterable, Iterator, List, Optional
import datetime
import email
import os
from airlift.dropbox_client import dropbox_client
from tqdm import tqdm
from queue import Queue, Empty
import multiprocessing
import concurrent.futures

from airlift.utils_exceptions import CriticalError
from airlift.airlift_data_guesser import guess_data_type
Expand All @@ -14,15 +20,22 @@
logger = logging.getLogger(__name__)


def csv_read(file_path: Path,fail_on_dup:bool) -> List[CSVRowType]:
def csv_read(file_path: Path,fail_on_dup:bool,attachment_columns:List[str],dropbox_token:str) -> List[CSVRowType]:
dirname = os.path.dirname(file_path)
try:
with open(file_path,"r",encoding="utf-8-sig") as csv_file:
return _csv_read_rows(csv_file,fail_on_dup)
return _csv_read_rows(csv_file,fail_on_dup,dirname,attachment_columns,dropbox_token)
except FileNotFoundError as e:
logger.debug(f"error : {e}")
raise CriticalError(f"File {file_path} not found") from e

def _csv_read_rows(csv_file:Iterable[str],fail_on_dup:bool) -> List[CSVRowType]:
def _csv_read_rows(csv_file:Iterable[str],fail_on_dup:bool,dirname:str,attachment_columns:List[str],dropbox_token:str) -> List[CSVRowType]:

if dropbox_token:
dbx = dropbox_client(dropbox_token)
else:
dbx = None

reader = csv.DictReader(csv_file,restval="")

if not reader.fieldnames:
Expand All @@ -38,7 +51,7 @@ def _csv_read_rows(csv_file:Iterable[str],fail_on_dup:bool) -> List[CSVRowType]:
else:
rows = _remove_duplicates(rows)

converted_data = _convert_datatypes(rows)
converted_data = _convert_datatypes(rows,dirname,attachment_columns,dbx)

records = []

Expand All @@ -47,24 +60,60 @@ def _csv_read_rows(csv_file:Iterable[str],fail_on_dup:bool) -> List[CSVRowType]:

return records

def _convert_datatypes(rows:List[Dict]) -> List[CSVRowType]:

for row in rows:
for key, value in row.items():

data_type = guess_data_type(value)
if data_type == "number":
row[key] = float(value)
elif data_type == "date":
row[key] = datetime.datetime.strptime(value, "%Y-%m-%d")
elif data_type == "email":
row[key] = email.utils.parseaddr(value)[1]
elif data_type == "bool":
row[key] = False if value.lower() == "false" else True
def _convert_datatypes(rows:List[Dict],dirname:str,attachment_columns:List[str],dbx:dropbox_client) -> List[CSVRowType]:

manager = multiprocessing.Manager()
shared_list = manager.list()

if dbx:
print("Uploading image to dropbox!")

with tqdm(total = len(rows)) as progress_bar:
data_queue = Queue()
for row in rows:
data_queue.put(row)

with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
futures = [executor.submit(_worker,dirname,attachment_columns,dbx,data_queue,shared_list,progress_bar) for _ in range(6)]
concurrent.futures.wait(futures,timeout=None)


return list(shared_list)

def _worker(dirname:str,attachment_columns:List[str],dbx:dropbox_client,data_queue:Queue,shared_list,progress_bar):
while True:
try:
row = data_queue.get_nowait()
try:
for key, value in row.items():

data_type = guess_data_type(value)
if attachment_columns:
if dbx:
if key in attachment_columns:
if dirname:
row[key] = [{"url":dbx.upload_to_dropbox(f"{dirname}/{value}")}]
else:
row[key] = [{"url":dbx.upload_to_dropbox(f"{dirname}/{value}")}]
else:
raise CriticalError("dropbox token not provided! aborting the upload")
if data_type == "number":
row[key] = float(value)
elif data_type == "date":
row[key] = datetime.datetime.strptime(value, "%Y-%m-%d")
elif data_type == "email":
row[key] = email.utils.parseaddr(value)[1]
elif data_type == "bool":
row[key] = False if value.lower() == "false" else True

shared_list.append(row)
progress_bar.update(1)
except Exception as e:
raise CriticalError(e)
except Empty:
break


return list(rows)

def _list_duplicates(lst: List[str]) -> List[str]:
return [lst_item for lst_item, count in Counter(lst).items() if count > 1]

Expand Down
34 changes: 34 additions & 0 deletions airlift/dropbox_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from pydrive.drive import GoogleDrive
from pydrive.auth import GoogleAuth
import os
import dropbox
import logging
logger = logging.getLogger(__name__)
class dropbox_client:
def __init__(self,access_token):

self.dbx = dropbox.Dropbox(access_token)
logger.info("Created a dropbox client")

try:
self.dbx.files_create_folder("/airlift")
except dropbox.exceptions.ApiError as e:
print(f"The folder airlift already exists.")

def upload_to_dropbox(self,filename):
with open(filename, 'rb') as f:
image_data = f.read()
image_name = os.path.basename(filename)

dropbox_path = f"/airlift/{image_name}"

# Upload the image
self.dbx.files_upload(image_data, dropbox_path)

shared_link_metadata = self.dbx.sharing_create_shared_link(path=dropbox_path)
shared_url = shared_link_metadata.url


direct_download_url = shared_url.replace('www.dropbox.com', 'dl.dropboxusercontent.com').replace('?dl=0', '?dl=1')

return direct_download_url
2 changes: 1 addition & 1 deletion airlift/json_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ def _json_read_rows(json_file:Iterable[str],fail_on_dup:bool) -> List[CSVRowType
records.append({"fields":each_data})

return records

Loading

0 comments on commit 6543ea4

Please sign in to comment.