diff --git a/airbyte-config-oss/init-oss/src/main/resources/seed/oss_catalog.json b/airbyte-config-oss/init-oss/src/main/resources/seed/oss_catalog.json
index 05d58dc12c4c7..e5acc5fdb9db9 100644
--- a/airbyte-config-oss/init-oss/src/main/resources/seed/oss_catalog.json
+++ b/airbyte-config-oss/init-oss/src/main/resources/seed/oss_catalog.json
@@ -12862,7 +12862,7 @@
"sourceDefinitionId": "778daa7c-feaf-4db6-96f3-70fd645acc77",
"name": "File (CSV, JSON, Excel, Feather, Parquet)",
"dockerRepository": "airbyte/source-file",
- "dockerImageTag": "0.2.38",
+ "dockerImageTag": "0.3.0",
"documentationUrl": "https://docs.airbyte.com/integrations/sources/file",
"icon": "file.svg",
"sourceType": "file",
diff --git a/airbyte-config-oss/init-oss/src/main/resources/seed/source_definitions.yaml b/airbyte-config-oss/init-oss/src/main/resources/seed/source_definitions.yaml
index acf110451fe27..f0fda6cd38ea4 100644
--- a/airbyte-config-oss/init-oss/src/main/resources/seed/source_definitions.yaml
+++ b/airbyte-config-oss/init-oss/src/main/resources/seed/source_definitions.yaml
@@ -637,7 +637,7 @@
- name: File (CSV, JSON, Excel, Feather, Parquet)
sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
dockerRepository: airbyte/source-file
- dockerImageTag: 0.2.38
+ dockerImageTag: 0.3.0
documentationUrl: https://docs.airbyte.com/integrations/sources/file
icon: file.svg
sourceType: file
diff --git a/airbyte-config-oss/init-oss/src/main/resources/seed/source_specs.yaml b/airbyte-config-oss/init-oss/src/main/resources/seed/source_specs.yaml
index 826b03081288e..0feb5289286cf 100644
--- a/airbyte-config-oss/init-oss/src/main/resources/seed/source_specs.yaml
+++ b/airbyte-config-oss/init-oss/src/main/resources/seed/source_specs.yaml
@@ -4555,7 +4555,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
-- dockerImage: "airbyte/source-file:0.2.38"
+- dockerImage: "airbyte/source-file:0.3.0"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/file"
connectionSpecification:
diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile
index 94f23e7b42048..1d8ce1a64c70b 100644
--- a/airbyte-integrations/connectors/source-file-secure/Dockerfile
+++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile
@@ -9,5 +9,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
-LABEL io.airbyte.version=0.2.38
+LABEL io.airbyte.version=0.3.0
LABEL io.airbyte.name=airbyte/source-file-secure
diff --git a/airbyte-integrations/connectors/source-file/Dockerfile b/airbyte-integrations/connectors/source-file/Dockerfile
index 108dcabda7b31..51bf5b47d7643 100644
--- a/airbyte-integrations/connectors/source-file/Dockerfile
+++ b/airbyte-integrations/connectors/source-file/Dockerfile
@@ -17,5 +17,5 @@ COPY source_file ./source_file
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
-LABEL io.airbyte.version=0.2.38
+LABEL io.airbyte.version=0.3.0
LABEL io.airbyte.name=airbyte/source-file
diff --git a/airbyte-integrations/connectors/source-file/integration_tests/sample_files/test_utf16.csv b/airbyte-integrations/connectors/source-file/integration_tests/sample_files/test_utf16.csv
index e786a4a6567ae..7d5a29f4ccc0b 100644
Binary files a/airbyte-integrations/connectors/source-file/integration_tests/sample_files/test_utf16.csv and b/airbyte-integrations/connectors/source-file/integration_tests/sample_files/test_utf16.csv differ
diff --git a/airbyte-integrations/connectors/source-file/source_file/client.py b/airbyte-integrations/connectors/source-file/source_file/client.py
index ee78740ebed22..9280749911965 100644
--- a/airbyte-integrations/connectors/source-file/source_file/client.py
+++ b/airbyte-integrations/connectors/source-file/source_file/client.py
@@ -364,6 +364,8 @@ def dtype_to_json_type(current_type: str, dtype) -> str:
return "number"
if dtype == "bool" and (not current_type or current_type == "boolean"):
return "boolean"
+ if dtype == "datetime64[ns]":
+ return "datetime"
return "string"
@property
@@ -419,8 +421,14 @@ def _stream_properties(self, fp, empty_schema: bool = False, read_sample_chunk:
for col in df.columns:
# if data type of the same column differs in dataframes, we choose the broadest one
prev_frame_column_type = fields.get(col)
- fields[col] = self.dtype_to_json_type(prev_frame_column_type, df[col].dtype)
- return {field: {"type": [fields[field], "null"]} for field in fields}
+ df_type = df[col].dtype
+ fields[col] = self.dtype_to_json_type(prev_frame_column_type, df_type)
+ return {
+ field: (
+ {"type": ["string", "null"], "format": "datetime"} if fields[field] == "datetime" else {"type": [fields[field], "null"]}
+ )
+ for field in fields
+ }
def streams(self, empty_schema: bool = False) -> Iterable:
"""Discovers available streams"""
diff --git a/airbyte-integrations/connectors/source-file/unit_tests/test_client.py b/airbyte-integrations/connectors/source-file/unit_tests/test_client.py
index ac6a0fb16ddf1..74d09092afef5 100644
--- a/airbyte-integrations/connectors/source-file/unit_tests/test_client.py
+++ b/airbyte-integrations/connectors/source-file/unit_tests/test_client.py
@@ -93,6 +93,7 @@ def test_load_nested_json(client, absolute_path, test_files):
("boolean", "bool", "boolean"),
("number", "int64", "number"),
("number", "float64", "number"),
+ ("number", "datetime64[ns]", "datetime"),
],
)
def test_dtype_to_json_type(client, current_type, dtype, expected):
diff --git a/airbyte-integrations/connectors/source-file/unit_tests/test_source.py b/airbyte-integrations/connectors/source-file/unit_tests/test_source.py
index f2365baa961db..dfd526b3ab950 100644
--- a/airbyte-integrations/connectors/source-file/unit_tests/test_source.py
+++ b/airbyte-integrations/connectors/source-file/unit_tests/test_source.py
@@ -42,7 +42,7 @@ def test_csv_with_utf16_encoding(absolute_path, test_files):
config_local_csv_utf16 = {
"dataset_name": "AAA",
"format": "csv",
- "reader_options": '{"encoding":"utf_16"}',
+ "reader_options": '{"encoding":"utf_16", "parse_dates": [\"header5\"]}',
"url": f"{absolute_path}/{test_files}/test_utf16.csv",
"provider": {"storage": "local"},
}
@@ -53,6 +53,7 @@ def test_csv_with_utf16_encoding(absolute_path, test_files):
"header2": {"type": ["number", "null"]},
"header3": {"type": ["number", "null"]},
"header4": {"type": ["boolean", "null"]},
+ "header5": {"type": ["string", "null"], "format": "datetime"},
},
"type": "object",
}
diff --git a/connectors.md b/connectors.md
index a4c53aa82e169..74a3e33d62f69 100644
--- a/connectors.md
+++ b/connectors.md
@@ -75,7 +75,7 @@
| **Facebook Pages** |
| Source | airbyte/source-facebook-pages:0.2.4 | beta | [docs](https://docs.airbyte.com/integrations/sources/facebook-pages) | [connectors/source/facebook-pages](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/source/facebook-pages) | [source-facebook-pages](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-facebook-pages) | `010eb12f-837b-4685-892d-0a39f76a98f5` |
| **Fastbill** |
| Source | airbyte/source-fastbill:0.1.0 | alpha | [docs](https://docs.airbyte.com/integrations/sources/fastbill) | [connectors/source/fastbill](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/source/fastbill) | [source-fastbill](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fastbill) | `eb3e9c1c-0467-4eb7-a172-5265e04ccd0a` |
| **Fauna** |
| Source | airbyte/source-fauna:0.1.1 | alpha | [docs](https://docs.airbyte.com/integrations/sources/fauna) | [connectors/source/fauna](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/source/fauna) | [source-fauna](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fauna) | `3825db3e-c94b-42ac-bd53-b5a9507ace2b` |
-| **File (CSV, JSON, Excel, Feather, Parquet)** |
| Source | airbyte/source-file:0.2.38 | generally_available | [docs](https://docs.airbyte.com/integrations/sources/file) | [connectors/source/file](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/source/file) | [source-file](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | `778daa7c-feaf-4db6-96f3-70fd645acc77` |
+| **File (CSV, JSON, Excel, Feather, Parquet)** |
| Source | airbyte/source-file:0.3.0 | generally_available | [docs](https://docs.airbyte.com/integrations/sources/file) | [connectors/source/file](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/source/file) | [source-file](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | `778daa7c-feaf-4db6-96f3-70fd645acc77` |
| **Firebase Realtime Database** | x | Source | airbyte/source-firebase-realtime-database:0.1.0 | alpha | [docs](https://docs.airbyte.io/integrations/sources/firebase-realtime-database) | [connectors/source/firebase-realtime-database](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/source/firebase-realtime-database) | [source-firebase-realtime-database](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-firebase-realtime-database) | `acb5f973-a565-441e-992f-4946f3e65662` |
| **Firebolt** |
| Source | airbyte/source-firebolt:0.2.0 | alpha | [docs](https://docs.airbyte.com/integrations/sources/firebolt) | [connectors/source/firebolt](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/source/firebolt) | [source-firebolt](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-firebolt) | `6f2ac653-8623-43c4-8950-19218c7caf3d` |
| **Flexport** | x | Source | airbyte/source-flexport:0.1.0 | alpha | [docs](https://docs.airbyte.com/integrations/sources/flexport) | [connectors/source/flexport](https://github.com/airbytehq/airbyte/issues?q=is:open+is:issue+label:connectors/source/flexport) | [source-flexport](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-flexport) | `f95337f1-2ad1-4baf-922f-2ca9152de630` |
diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md
index 1d9d7c1668bfd..a1828de16d906 100644
--- a/docs/integrations/sources/file.md
+++ b/docs/integrations/sources/file.md
@@ -79,12 +79,13 @@ For example, if the format `CSV` is selected, then options from the [read_csv](h
- It is therefore possible to customize the `delimiter` (or `sep`) to in case of tab separated files.
- Header line can be ignored with `header=0` and customized with `names`
+- Parse dates for in specified columns
- etc
We would therefore provide in the `reader_options` the following json:
```
-{ "sep" : "\t", "header" : 0, "names": ["column1", "column2"]}
+{ "sep" : "\t", "header" : 0, "names": ["column1", "column2"], "parse_dates": ["column2"]}
```
In case you select `JSON` format, then options from the [read_json](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#io-json-reader) reader are available.
@@ -190,17 +191,18 @@ In order to read large files from a remote location, this connector uses the [sm
| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------|
+| 0.3.0 | 2023-04-24 | [25445](https://github.com/airbytehq/airbyte/pull/25445) | Add datatime format parsing support for csv files |
| 0.2.38 | 2023-04-12 | [23759](https://github.com/airbytehq/airbyte/pull/23759) | Fix column data types for numerical values |
| 0.2.37 | 2023-04-06 | [24525](https://github.com/airbytehq/airbyte/pull/24525) | Fix examples in spec |
| 0.2.36 | 2023-03-27 | [24588](https://github.com/airbytehq/airbyte/pull/24588) | Remove traceback from user messages. |
| 0.2.35 | 2023-03-03 | [24278](https://github.com/airbytehq/airbyte/pull/24278) | Read only file header when checking connectivity; read only a single chunk when discovering the schema. |
-| 0.2.34 | 2023-03-03 | [23723](https://github.com/airbytehq/airbyte/pull/23723) | Update description in spec, make user-friendly error messages and docs. |
+| 0.2.34 | 2023-03-03 | [23723](https://github.com/airbytehq/airbyte/pull/23723) | Update description in spec, make user-friendly error messages and docs. |
| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug |
| 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s |
| 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 |
| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command |
| 0.2.29 | 2022-11-08 | [18587](https://github.com/airbytehq/airbyte/pull/18587) | Fix pandas read_csv header none issue. |
-| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Added retry logic for `Connection reset error - 104` |
+| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Add retry logic for `Connection reset error - 104` |
| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format |
| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link |
| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. |
@@ -212,7 +214,7 @@ In order to read large files from a remote location, this connector uses the [sm
| 0.2.19 | 2022-08-19 | [15768](https://github.com/airbytehq/airbyte/pull/15768) | Convert 'nan' to 'null' |
| 0.2.18 | 2022-08-16 | [15698](https://github.com/airbytehq/airbyte/pull/15698) | Cache binary stream to file for discover |
| 0.2.17 | 2022-08-11 | [15501](https://github.com/airbytehq/airbyte/pull/15501) | Cache binary stream to file |
-| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | added support for encoding reader option |
+| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | Add support for encoding reader option |
| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 |
| 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files |
| 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format |