|
9 | 9 |
|
10 | 10 | from dlt.sources.sql_database import sql_database, sql_table, Table |
11 | 11 |
|
12 | | -from sqlalchemy.sql.sqltypes import TypeEngine |
13 | 12 | import sqlalchemy as sa |
14 | 13 |
|
15 | 14 |
|
@@ -105,46 +104,13 @@ def load_standalone_table_resource() -> None: |
105 | 104 | defer_table_reflect=True, |
106 | 105 | ) |
107 | 106 |
|
108 | | - # Run the resources together |
109 | | - info = pipeline.extract([family, genome], write_disposition="merge") |
| 107 | + # Run the resources together (just take one page of results to make it faster) |
| 108 | + info = pipeline.extract([family.add_limit(1), genome.add_limit(1)], write_disposition="merge") |
110 | 109 | print(info) |
111 | 110 | # Show inferred columns |
112 | 111 | print(pipeline.default_schema.to_pretty_yaml()) |
113 | 112 |
|
114 | 113 |
|
115 | | -def select_columns() -> None: |
116 | | - """Uses table adapter callback to modify list of columns to be selected""" |
117 | | - pipeline = dlt.pipeline( |
118 | | - pipeline_name="rfam_database", |
119 | | - destination="duckdb", |
120 | | - dataset_name="rfam_data_cols", |
121 | | - dev_mode=True, |
122 | | - ) |
123 | | - |
124 | | - def table_adapter(table: Table) -> Table: |
125 | | - print(table.name) |
126 | | - if table.name == "family": |
127 | | - # this is SqlAlchemy table. _columns are writable |
128 | | - # let's drop updated column |
129 | | - table._columns.remove(table.columns["updated"]) # type: ignore |
130 | | - return table |
131 | | - |
132 | | - family = sql_table( |
133 | | - credentials="mysql+pymysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam", |
134 | | - table="family", |
135 | | - chunk_size=10, |
136 | | - reflection_level="full_with_precision", |
137 | | - table_adapter_callback=table_adapter, |
138 | | - ) |
139 | | - |
140 | | - # also we do not want the whole table, so we add limit to get just one chunk (10 records) |
141 | | - pipeline.run(family.add_limit(1)) |
142 | | - # only 10 rows |
143 | | - print(pipeline.last_trace.last_normalize_info) |
144 | | - # no "updated" column in "family" table |
145 | | - print(pipeline.default_schema.to_pretty_yaml()) |
146 | | - |
147 | | - |
148 | 114 | def select_with_end_value_and_row_order() -> None: |
149 | 115 | """Gets data from a table withing a specified range and sorts rows descending""" |
150 | 116 | pipeline = dlt.pipeline( |
@@ -347,9 +313,6 @@ def specify_columns_to_load() -> None: |
347 | 313 | # Load selected tables with different settings |
348 | 314 | # load_select_tables_from_database() |
349 | 315 |
|
350 | | - # load a table and select columns |
351 | | - # select_columns() |
352 | | - |
353 | 316 | # load_entire_database() |
354 | 317 | # select_with_end_value_and_row_order() |
355 | 318 |
|
|
0 commit comments