Skip to content

Commit c35d1ce

Browse files
(feat:file_abstract) return storage metadata after upload
1 parent 0d3e615 commit c35d1ce

File tree

3 files changed

+77
-68
lines changed

3 files changed

+77
-68
lines changed

application/storage/base.py

+23-20
Original file line numberDiff line numberDiff line change
@@ -7,84 +7,87 @@ class BaseStorage(ABC):
77
"""Abstract base class for storage implementations."""
88

99
@abstractmethod
10-
def save_file(self, file_data: BinaryIO, path: str) -> str:
10+
def save_file(self, file_data: BinaryIO, path: str) -> dict:
1111
"""
1212
Save a file to storage.
13-
13+
1414
Args:
1515
file_data: File-like object containing the data
1616
path: Path where the file should be stored
17-
17+
1818
Returns:
19-
str: The complete path where the file was saved
19+
dict: A dictionary containing metadata about the saved file, including:
20+
- 'path': The path where the file was saved
21+
- 'storage_type': The type of storage (e.g., 'local', 's3')
22+
- Other storage-specific metadata (e.g., 'uri', 'bucket_name', etc.)
2023
"""
2124
pass
22-
25+
2326
@abstractmethod
2427
def get_file(self, path: str) -> BinaryIO:
2528
"""
2629
Retrieve a file from storage.
27-
30+
2831
Args:
2932
path: Path to the file
30-
33+
3134
Returns:
3235
BinaryIO: File-like object containing the file data
3336
"""
3437
pass
35-
38+
3639
@abstractmethod
3740
def process_file(self, path: str, processor_func: Callable, **kwargs):
3841
"""
3942
Process a file using the provided processor function.
40-
43+
4144
This method handles the details of retrieving the file and providing
4245
it to the processor function in an appropriate way based on the storage type.
43-
46+
4447
Args:
4548
path: Path to the file
4649
processor_func: Function that processes the file
4750
**kwargs: Additional arguments to pass to the processor function
48-
51+
4952
Returns:
5053
The result of the processor function
5154
"""
5255
pass
53-
56+
5457
@abstractmethod
5558
def delete_file(self, path: str) -> bool:
5659
"""
5760
Delete a file from storage.
58-
61+
5962
Args:
6063
path: Path to the file
61-
64+
6265
Returns:
6366
bool: True if deletion was successful
6467
"""
6568
pass
66-
69+
6770
@abstractmethod
6871
def file_exists(self, path: str) -> bool:
6972
"""
7073
Check if a file exists.
71-
74+
7275
Args:
7376
path: Path to the file
74-
77+
7578
Returns:
7679
bool: True if the file exists
7780
"""
7881
pass
79-
82+
8083
@abstractmethod
8184
def list_files(self, directory: str) -> List[str]:
8285
"""
8386
List all files in a directory.
84-
87+
8588
Args:
8689
directory: Directory path to list
87-
90+
8891
Returns:
8992
List[str]: List of file paths
9093
"""

application/storage/local.py

+27-29
Original file line numberDiff line numberDiff line change
@@ -8,98 +8,96 @@
88

99
class LocalStorage(BaseStorage):
1010
"""Local file system storage implementation."""
11-
11+
1212
def __init__(self, base_dir: str = None):
1313
"""
1414
Initialize local storage.
15-
15+
1616
Args:
1717
base_dir: Base directory for all operations. If None, uses current directory.
1818
"""
1919
self.base_dir = base_dir or os.path.dirname(
2020
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
2121
)
22-
22+
2323
def _get_full_path(self, path: str) -> str:
2424
"""Get absolute path by combining base_dir and path."""
2525
if os.path.isabs(path):
2626
return path
2727
return os.path.join(self.base_dir, path)
28-
29-
def save_file(self, file_data: BinaryIO, path: str) -> str:
28+
29+
def save_file(self, file_data: BinaryIO, path: str) -> dict:
3030
"""Save a file to local storage."""
3131
full_path = self._get_full_path(path)
32-
33-
# Ensure directory exists
32+
3433
os.makedirs(os.path.dirname(full_path), exist_ok=True)
35-
36-
# Write file
34+
3735
if hasattr(file_data, 'save'):
38-
# Handle Flask's FileStorage objects
3936
file_data.save(full_path)
4037
else:
41-
# Handle regular file-like objects
4238
with open(full_path, 'wb') as f:
4339
shutil.copyfileobj(file_data, f)
44-
45-
return path
46-
40+
41+
return {
42+
'storage_type': 'local'
43+
}
44+
4745
def get_file(self, path: str) -> BinaryIO:
4846
"""Get a file from local storage."""
4947
full_path = self._get_full_path(path)
50-
48+
5149
if not os.path.exists(full_path):
5250
raise FileNotFoundError(f"File not found: {full_path}")
53-
51+
5452
return open(full_path, 'rb')
55-
53+
5654
def delete_file(self, path: str) -> bool:
5755
"""Delete a file from local storage."""
5856
full_path = self._get_full_path(path)
59-
57+
6058
if not os.path.exists(full_path):
6159
return False
62-
60+
6361
os.remove(full_path)
6462
return True
65-
63+
6664
def file_exists(self, path: str) -> bool:
6765
"""Check if a file exists in local storage."""
6866
full_path = self._get_full_path(path)
6967
return os.path.exists(full_path)
70-
68+
7169
def list_files(self, directory: str) -> List[str]:
7270
"""List all files in a directory in local storage."""
7371
full_path = self._get_full_path(directory)
74-
72+
7573
if not os.path.exists(full_path):
7674
return []
77-
75+
7876
result = []
7977
for root, _, files in os.walk(full_path):
8078
for file in files:
8179
rel_path = os.path.relpath(os.path.join(root, file), self.base_dir)
8280
result.append(rel_path)
83-
81+
8482
return result
8583

8684
def process_file(self, path: str, processor_func: Callable, **kwargs):
8785
"""
8886
Process a file using the provided processor function.
89-
87+
9088
For local storage, we can directly pass the full path to the processor.
91-
89+
9290
Args:
9391
path: Path to the file
9492
processor_func: Function that processes the file
9593
**kwargs: Additional arguments to pass to the processor function
96-
94+
9795
Returns:
9896
The result of the processor function
9997
"""
10098
full_path = self._get_full_path(path)
101-
99+
102100
if not os.path.exists(full_path):
103101
raise FileNotFoundError(f"File not found: {full_path}")
104-
102+
105103
return processor_func(file_path=full_path, **kwargs)

application/storage/s3.py

+27-19
Original file line numberDiff line numberDiff line change
@@ -12,94 +12,102 @@
1212

1313
class S3Storage(BaseStorage):
1414
"""AWS S3 storage implementation."""
15-
15+
1616
def __init__(self, bucket_name=None):
1717
"""
1818
Initialize S3 storage.
19-
19+
2020
Args:
2121
bucket_name: S3 bucket name (optional, defaults to settings)
2222
"""
2323
self.bucket_name = bucket_name or getattr(settings, "S3_BUCKET_NAME", "docsgpt-test-bucket")
24-
24+
2525
# Get credentials from settings
2626
aws_access_key_id = getattr(settings, "SAGEMAKER_ACCESS_KEY", None)
2727
aws_secret_access_key = getattr(settings, "SAGEMAKER_SECRET_KEY", None)
2828
region_name = getattr(settings, "SAGEMAKER_REGION", None)
29-
29+
3030
self.s3 = boto3.client(
3131
's3',
3232
aws_access_key_id=aws_access_key_id,
3333
aws_secret_access_key=aws_secret_access_key,
3434
region_name=region_name
3535
)
36-
37-
def save_file(self, file_data: BinaryIO, path: str) -> str:
36+
37+
def save_file(self, file_data: BinaryIO, path: str) -> dict:
3838
"""Save a file to S3 storage."""
3939
self.s3.upload_fileobj(file_data, self.bucket_name, path)
40-
return path
41-
40+
41+
region = getattr(settings, "SAGEMAKER_REGION", None)
42+
43+
return {
44+
'storage_type': 's3',
45+
'bucket_name': self.bucket_name,
46+
'uri': f's3://{self.bucket_name}/{path}',
47+
'region': region
48+
}
49+
4250
def get_file(self, path: str) -> BinaryIO:
4351
"""Get a file from S3 storage."""
4452
if not self.file_exists(path):
4553
raise FileNotFoundError(f"File not found: {path}")
46-
54+
4755
file_obj = io.BytesIO()
4856
self.s3.download_fileobj(self.bucket_name, path, file_obj)
4957
file_obj.seek(0)
5058
return file_obj
51-
59+
5260
def delete_file(self, path: str) -> bool:
5361
"""Delete a file from S3 storage."""
5462
try:
5563
self.s3.delete_object(Bucket=self.bucket_name, Key=path)
5664
return True
5765
except ClientError:
5866
return False
59-
67+
6068
def file_exists(self, path: str) -> bool:
6169
"""Check if a file exists in S3 storage."""
6270
try:
6371
self.s3.head_object(Bucket=self.bucket_name, Key=path)
6472
return True
6573
except ClientError:
6674
return False
67-
75+
6876
def list_files(self, directory: str) -> List[str]:
6977
"""List all files in a directory in S3 storage."""
7078
# Ensure directory ends with a slash if it's not empty
7179
if directory and not directory.endswith('/'):
7280
directory += '/'
73-
81+
7482
result = []
7583
paginator = self.s3.get_paginator('list_objects_v2')
7684
pages = paginator.paginate(Bucket=self.bucket_name, Prefix=directory)
77-
85+
7886
for page in pages:
7987
if 'Contents' in page:
8088
for obj in page['Contents']:
8189
result.append(obj['Key'])
82-
90+
8391
return result
8492

8593
def process_file(self, path: str, processor_func: Callable, **kwargs):
8694
"""
8795
Process a file using the provided processor function.
88-
96+
8997
Args:
9098
path: Path to the file
9199
processor_func: Function that processes the file
92100
**kwargs: Additional arguments to pass to the processor function
93-
101+
94102
Returns:
95103
The result of the processor function
96104
"""
97105
import tempfile
98106
import logging
99-
107+
100108
if not self.file_exists(path):
101109
raise FileNotFoundError(f"File not found in S3: {path}")
102-
110+
103111
with tempfile.NamedTemporaryFile(suffix=os.path.splitext(path)[1], delete=True) as temp_file:
104112
try:
105113
# Download the file from S3 to the temporary file

0 commit comments

Comments
 (0)