|
7 | 7 | import pandas as pd
|
8 | 8 | from google.cloud import bigquery
|
9 | 9 |
|
10 |
| -try: |
11 |
| - import pandavro |
12 |
| - |
13 |
| - _avro_dependencies = True |
14 |
| -except ImportError: |
15 |
| - _avro_dependencies = False |
16 |
| - |
17 | 10 | from basedosdados.exceptions import BaseDosDadosMissingDependencyException
|
18 | 11 |
|
| 12 | +_avro_dependencies = False |
| 13 | +# try: |
| 14 | +# import pandavro |
| 15 | +# |
| 16 | +# _avro_dependencies = True |
| 17 | +# except ImportError: |
| 18 | +# _avro_dependencies = False |
| 19 | + |
19 | 20 |
|
20 | 21 | class Datatype:
|
21 | 22 | """
|
@@ -59,19 +60,18 @@ def header(self, data_sample_path, csv_delimiter: str = ","):
|
59 | 60 | return next(csv_reader)
|
60 | 61 |
|
61 | 62 | if self.source_format == "avro":
|
| 63 | + # TODO: Restore support for avro format |
| 64 | + # See https://github.com/ynqa/pandavro/issues/56 and https://github.com/basedosdados/sdk/issues/1728 |
62 | 65 | if not _avro_dependencies:
|
63 |
| - raise BaseDosDadosMissingDependencyException( |
64 |
| - "Optional dependencies for handling AVRO files are not installed. " |
65 |
| - 'Please install basedosdados with the "avro" extra, such as:' |
66 |
| - "\n\npip install basedosdados[avro]" |
67 |
| - ) |
68 |
| - dataframe = pandavro.read_avro(str(data_sample_path)) |
69 |
| - return list(dataframe.columns.values) |
| 66 | + msg = "Handling avro file is currently not supported due to a limitation. See https://github.com/ynqa/pandavro/issues/56" |
| 67 | + raise BaseDosDadosMissingDependencyException(msg) |
| 68 | + # dataframe = pandavro.read_avro(str(data_sample_path)) |
| 69 | + # return list(dataframe.columns.values) |
70 | 70 | if self.source_format == "parquet":
|
71 | 71 | dataframe = pd.read_parquet(str(data_sample_path))
|
72 | 72 | return list(dataframe.columns.values)
|
73 | 73 | raise NotImplementedError(
|
74 |
| - "Base dos Dados just supports comma separated csv, avro and parquet files" |
| 74 | + "Base dos Dados just supports comma separated csv and parquet files" |
75 | 75 | )
|
76 | 76 |
|
77 | 77 | def partition(self):
|
@@ -108,7 +108,7 @@ def external_config(self):
|
108 | 108 | _external_config = bigquery.ExternalConfig("PARQUET")
|
109 | 109 | else:
|
110 | 110 | raise NotImplementedError(
|
111 |
| - "Base dos Dados just supports csv, avro and parquet files" |
| 111 | + "Base dos Dados just supports csv and parquet files" |
112 | 112 | )
|
113 | 113 | _external_config.source_uris = self.uri
|
114 | 114 | if self.partitioned:
|
|
0 commit comments