22import logging
33import re
44from abc import ABC , abstractmethod
5- from datetime import date , datetime
5+ from datetime import date , datetime , timezone
66from typing import Any , List , Optional , Tuple
77
88from bson import Timestamp
@@ -115,7 +115,7 @@ def extract_column_and_format(self, text: str) -> Tuple[str, Optional[str]]:
115115 return "" , None
116116
117117 def convert_value (self , value : Any , format_param : Optional [str ] = None ) -> Any :
118- """Convert to BSON Timestamp"""
118+ """Convert to BSON Timestamp using UTC for consistency """
119119 if value is None :
120120 return None
121121
@@ -130,33 +130,39 @@ def convert_value(self, value: Any, format_param: Optional[str] = None) -> Any:
130130 # Try ISO format first
131131 try :
132132 dt = datetime .fromisoformat (value )
133+ # Use UTC if no timezone info
134+ if dt .tzinfo is None :
135+ dt = dt .replace (tzinfo = timezone .utc )
133136 timestamp_int = int (dt .timestamp ())
134137 return Timestamp (timestamp_int , 0 )
135138 except (ValueError , TypeError ):
136139 pass
137140
138- # Try date-only format (YYYY-MM-DD)
141+ # Try date-only format (YYYY-MM-DD) - treat as UTC
139142 if re .match (r"^\d{4}-\d{2}-\d{2}$" , value ):
140143 try :
141144 dt = datetime .strptime (value , "%Y-%m-%d" )
145+ dt = dt .replace (tzinfo = timezone .utc ) # Treat as UTC
142146 timestamp_int = int (dt .timestamp ())
143147 return Timestamp (timestamp_int , 0 )
144148 except ValueError :
145149 pass
146150
147- # Try custom format if provided
151+ # Try custom format if provided - treat as UTC
148152 if format_param :
149153 try :
150154 dt = datetime .strptime (value , format_param )
155+ dt = dt .replace (tzinfo = timezone .utc ) # Treat as UTC
151156 timestamp_int = int (dt .timestamp ())
152157 return Timestamp (timestamp_int , 0 )
153158 except ValueError :
154159 pass
155160
156- # Try common formats
161+ # Try common formats - treat as UTC
157162 for fmt in ["%Y-%m-%dT%H:%M:%SZ" , "%Y-%m-%d %H:%M:%S" , "%Y-%m-%d" , "%d-%m-%Y" , "%m/%d/%Y" ]:
158163 try :
159164 dt = datetime .strptime (value , fmt )
165+ dt = dt .replace (tzinfo = timezone .utc ) # Treat as UTC
160166 timestamp_int = int (dt .timestamp ())
161167 return Timestamp (timestamp_int , 0 )
162168 except ValueError :
0 commit comments