From 0357a33c26a60d20923ce596dd98bcd04f9a03e1 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 21 Feb 2024 11:05:05 -0300
Subject: [PATCH 001/147] Get rid of rejects_recovery_columns

---
 .../table_function/csv_file_scanner.cpp       | 14 ------
 .../table_function/global_csv_state.cpp       | 15 -------
 .../csv_scanner/util/csv_reader_options.cpp   |  7 ---
 .../operator/persistent/csv_rejects_table.cpp |  9 ----
 src/function/table/read_csv.cpp               | 23 +---------
 .../csv_scanner/csv_reader_options.hpp        |  4 --
 .../duckdb/storage/serialization/nodes.json   | 26 ++++-------
 src/storage/serialization/serialize_nodes.cpp | 44 +++++++++----------
 8 files changed, 30 insertions(+), 112 deletions(-)
diff --git a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
index 27293e0736cb..7bdc4712fc02 100644
--- a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
+++ b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
@@ -168,20 +168,6 @@ void CSVFileScan::InitializeFileNamesTypes() {
 		projection_ids.emplace_back(result_idx, i);
 	}
 
-	if (!projected_columns.empty()) {
-		// We might have to add recovery rejects column ids
-		for (idx_t i = 0; i < options.rejects_recovery_column_ids.size(); i++) {
-			idx_t col_id = options.rejects_recovery_column_ids[i];
-			if (projected_columns.find(col_id) == projected_columns.end()) {
-				// We have to insert this column in our projection
-				projected_columns.insert(col_id);
-				file_types.emplace_back(LogicalType::VARCHAR);
-				projected_columns.insert(col_id);
-				projection_ids.emplace_back(col_id, col_id);
-			}
-		}
-	}
-
 	if (reader_data.column_ids.empty()) {
 		file_types = types;
 	}
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 764c7b057053..2f8e92e3718f 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -174,21 +174,6 @@ void CSVGlobalState::FillRejectsTable() {
 						appender.Append(string_t("\"" + col_name + "\""));
 						appender.Append(error.row[col_idx]);
 
-						if (!options.rejects_recovery_columns.empty()) {
-							child_list_t<Value> recovery_key;
-							for (auto &key_idx : options.rejects_recovery_column_ids) {
-								// Figure out if the recovery key is valid.
-								// If not, error out for real.
-								auto &value = error.row[key_idx];
-								if (value.IsNull()) {
-									throw InvalidInputException("%s at line %llu in column %s. Parser options:\n%s ",
-									                            "Could not parse recovery column", row_line, col_name,
-									                            options.ToString());
-								}
-								recovery_key.emplace_back(bind_data.return_names[key_idx], value);
-							}
-							appender.Append(Value::STRUCT(recovery_key));
-						}
 						auto row_error_msg =
 						    StringUtil::Format("Could not convert string '%s' to '%s'", error.row[col_idx].ToString(),
 						                       file->types[col_idx].ToString());
diff --git a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
index d05db0bb8fb2..72c73a2e5bac 100644
--- a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
@@ -213,13 +213,6 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 			throw BinderException("REJECTS_TABLE option cannot be empty");
 		}
 		rejects_table_name = table_name;
-	} else if (loption == "rejects_recovery_columns") {
-		// Get the list of columns to use as a recovery key
-		auto &children = ListValue::GetChildren(value);
-		for (auto &child : children) {
-			auto col_name = child.GetValue<string>();
-			rejects_recovery_columns.push_back(col_name);
-		}
 	} else if (loption == "rejects_limit") {
 		int64_t limit = ParseInteger(value, loption);
 		if (limit < 0) {
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index a96bb2251aa4..7d01723a7718 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -29,15 +29,6 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 	info->columns.AddColumn(ColumnDefinition("column", LogicalType::BIGINT));
 	info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
 	info->columns.AddColumn(ColumnDefinition("parsed_value", LogicalType::VARCHAR));
-
-	if (!data.options.rejects_recovery_columns.empty()) {
-		child_list_t<LogicalType> recovery_key_components;
-		for (auto &col_name : data.options.rejects_recovery_columns) {
-			recovery_key_components.emplace_back(col_name, LogicalType::VARCHAR);
-		}
-		info->columns.AddColumn(ColumnDefinition("recovery_columns", LogicalType::STRUCT(recovery_key_components)));
-	}
-
 	info->columns.AddColumn(ColumnDefinition("error", LogicalType::VARCHAR));
 
 	catalog.CreateTable(context, std::move(info));
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index e7c3edb27276..1b8c2ffe7478 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -41,22 +41,6 @@ ReadCSVData::ReadCSVData() {
 
 void ReadCSVData::FinalizeRead(ClientContext &context) {
 	BaseCSVData::Finalize();
-	if (!options.rejects_recovery_columns.empty()) {
-		for (auto &recovery_col : options.rejects_recovery_columns) {
-			bool found = false;
-			for (idx_t col_idx = 0; col_idx < return_names.size(); col_idx++) {
-				if (StringUtil::CIEquals(return_names[col_idx], recovery_col)) {
-					options.rejects_recovery_column_ids.push_back(col_idx);
-					found = true;
-					break;
-				}
-			}
-			if (!found) {
-				throw BinderException("Unsupported parameter for REJECTS_RECOVERY_COLUMNS: column \"%s\" not found",
-				                      recovery_col);
-			}
-		}
-	}
 }
 
 static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctionBindInput &input,
@@ -84,11 +68,6 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 		}
 	}
 
-	if (!options.rejects_recovery_columns.empty() && options.rejects_table_name.empty()) {
-		throw BinderException(
-		    "REJECTS_RECOVERY_COLUMNS option is only supported when REJECTS_TABLE is set to a table name");
-	}
-
 	options.file_options.AutoDetectHivePartitioning(result->files, context);
 
 	if (!options.auto_detect && return_types.empty()) {
@@ -143,7 +122,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	result->return_types = return_types;
 	result->return_names = names;
 
-	result->FinalizeRead(context);
+	result->Finalize();
 	return std::move(result);
 }
 
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
index 6aa349bf823e..ee06436ed9d6 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
@@ -45,10 +45,6 @@ struct CSVReaderOptions {
 	string rejects_table_name;
 	//! Rejects table entry limit (0 = no limit)
 	idx_t rejects_limit = 0;
-	//! Columns to use as recovery key for rejected rows when reading with ignore_errors = true
-	vector<string> rejects_recovery_columns;
-	//! Index of the recovery columns
-	vector<idx_t> rejects_recovery_column_ids;
 	//! Number of samples to buffer
 	idx_t buffer_sample_size = (idx_t)STANDARD_VECTOR_SIZE * 50;
 	//! Specifies the string that represents a null value
diff --git a/src/include/duckdb/storage/serialization/nodes.json b/src/include/duckdb/storage/serialization/nodes.json
index 97fda8405fdc..fadbc480cb22 100644
--- a/src/include/duckdb/storage/serialization/nodes.json
+++ b/src/include/duckdb/storage/serialization/nodes.json
@@ -584,50 +584,42 @@
         "type": "idx_t"
       },
       {"id": 119,
-        "name": "rejects_recovery_columns",
-        "type": "vector<string>"
-      },
-      {"id": 120,
-        "name": "rejects_recovery_column_ids",
-        "type": "vector<idx_t>"
-      },
-      {"id": 121,
         "name": "dialect_options.state_machine_options.delimiter",
         "type": "CSVOption<char>"
       },
-      {"id": 122,
+      {"id": 120,
         "name": "dialect_options.state_machine_options.quote",
         "type": "CSVOption<char>"
       },
-      {"id": 123,
+      {"id": 121,
         "name": "dialect_options.state_machine_options.escape",
         "type": "CSVOption<char>"
       },
-      {"id": 124,
+      {"id": 122,
         "name": "dialect_options.header",
         "type": "CSVOption<bool>"
       },
-      {"id": 125,
+      {"id": 123,
         "name": "dialect_options.num_cols",
         "type": "idx_t"
       },
-      {"id": 126,
+      {"id": 124,
         "name": "dialect_options.state_machine_options.new_line",
         "type": "CSVOption<NewLineIdentifier>"
       },
-      {"id": 127,
+      {"id": 125,
         "name": "dialect_options.skip_rows",
         "type": "CSVOption<idx_t>"
       },
-      {"id": 128,
+      {"id": 126,
         "name": "dialect_options.date_format",
         "type": "map<LogicalTypeId, CSVOption<StrpTimeFormat>>"
       },
-      {"id": 129,
+      {"id": 127,
         "name": "sniffer_user_mismatch_error",
         "type": "string"
       },
-      {"id": 130,
+      {"id": 128,
         "name": "parallel",
         "type": "bool"
       }
diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp
index 13df905c94f4..714a465f4b92 100644
--- a/src/storage/serialization/serialize_nodes.cpp
+++ b/src/storage/serialization/serialize_nodes.cpp
@@ -121,18 +121,16 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WritePropertyWithDefault<vector<bool>>(116, "force_quote", force_quote);
 	serializer.WritePropertyWithDefault<string>(117, "rejects_table_name", rejects_table_name);
 	serializer.WritePropertyWithDefault<idx_t>(118, "rejects_limit", rejects_limit);
-	serializer.WritePropertyWithDefault<vector<string>>(119, "rejects_recovery_columns", rejects_recovery_columns);
-	serializer.WritePropertyWithDefault<vector<idx_t>>(120, "rejects_recovery_column_ids", rejects_recovery_column_ids);
-	serializer.WriteProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
-	serializer.WriteProperty<CSVOption<char>>(122, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
-	serializer.WriteProperty<CSVOption<char>>(123, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
-	serializer.WriteProperty<CSVOption<bool>>(124, "dialect_options.header", dialect_options.header);
-	serializer.WritePropertyWithDefault<idx_t>(125, "dialect_options.num_cols", dialect_options.num_cols);
-	serializer.WriteProperty<CSVOption<NewLineIdentifier>>(126, "dialect_options.state_machine_options.new_line", dialect_options.state_machine_options.new_line);
-	serializer.WriteProperty<CSVOption<idx_t>>(127, "dialect_options.skip_rows", dialect_options.skip_rows);
-	serializer.WriteProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(128, "dialect_options.date_format", dialect_options.date_format);
-	serializer.WritePropertyWithDefault<string>(129, "sniffer_user_mismatch_error", sniffer_user_mismatch_error);
-	serializer.WritePropertyWithDefault<bool>(130, "parallel", parallel);
+	serializer.WriteProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
+	serializer.WriteProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
+	serializer.WriteProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
+	serializer.WriteProperty<CSVOption<bool>>(122, "dialect_options.header", dialect_options.header);
+	serializer.WritePropertyWithDefault<idx_t>(123, "dialect_options.num_cols", dialect_options.num_cols);
+	serializer.WriteProperty<CSVOption<NewLineIdentifier>>(124, "dialect_options.state_machine_options.new_line", dialect_options.state_machine_options.new_line);
+	serializer.WriteProperty<CSVOption<idx_t>>(125, "dialect_options.skip_rows", dialect_options.skip_rows);
+	serializer.WriteProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", dialect_options.date_format);
+	serializer.WritePropertyWithDefault<string>(127, "sniffer_user_mismatch_error", sniffer_user_mismatch_error);
+	serializer.WritePropertyWithDefault<bool>(128, "parallel", parallel);
 }
 
 CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
@@ -156,18 +154,16 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	deserializer.ReadPropertyWithDefault<vector<bool>>(116, "force_quote", result.force_quote);
 	deserializer.ReadPropertyWithDefault<string>(117, "rejects_table_name", result.rejects_table_name);
 	deserializer.ReadPropertyWithDefault<idx_t>(118, "rejects_limit", result.rejects_limit);
-	deserializer.ReadPropertyWithDefault<vector<string>>(119, "rejects_recovery_columns", result.rejects_recovery_columns);
-	deserializer.ReadPropertyWithDefault<vector<idx_t>>(120, "rejects_recovery_column_ids", result.rejects_recovery_column_ids);
-	deserializer.ReadProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
-	deserializer.ReadProperty<CSVOption<char>>(122, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
-	deserializer.ReadProperty<CSVOption<char>>(123, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
-	deserializer.ReadProperty<CSVOption<bool>>(124, "dialect_options.header", result.dialect_options.header);
-	deserializer.ReadPropertyWithDefault<idx_t>(125, "dialect_options.num_cols", result.dialect_options.num_cols);
-	deserializer.ReadProperty<CSVOption<NewLineIdentifier>>(126, "dialect_options.state_machine_options.new_line", result.dialect_options.state_machine_options.new_line);
-	deserializer.ReadProperty<CSVOption<idx_t>>(127, "dialect_options.skip_rows", result.dialect_options.skip_rows);
-	deserializer.ReadProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(128, "dialect_options.date_format", result.dialect_options.date_format);
-	deserializer.ReadPropertyWithDefault<string>(129, "sniffer_user_mismatch_error", result.sniffer_user_mismatch_error);
-	deserializer.ReadPropertyWithDefault<bool>(130, "parallel", result.parallel);
+	deserializer.ReadProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
+	deserializer.ReadProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
+	deserializer.ReadProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
+	deserializer.ReadProperty<CSVOption<bool>>(122, "dialect_options.header", result.dialect_options.header);
+	deserializer.ReadPropertyWithDefault<idx_t>(123, "dialect_options.num_cols", result.dialect_options.num_cols);
+	deserializer.ReadProperty<CSVOption<NewLineIdentifier>>(124, "dialect_options.state_machine_options.new_line", result.dialect_options.state_machine_options.new_line);
+	deserializer.ReadProperty<CSVOption<idx_t>>(125, "dialect_options.skip_rows", result.dialect_options.skip_rows);
+	deserializer.ReadProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", result.dialect_options.date_format);
+	deserializer.ReadPropertyWithDefault<string>(127, "sniffer_user_mismatch_error", result.sniffer_user_mismatch_error);
+	deserializer.ReadPropertyWithDefault<bool>(128, "parallel", result.parallel);
 	return result;
 }
 

From 62d8dec545bc22e4dda08fb980b83ae1563be082 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 21 Feb 2024 11:06:39 -0300
Subject: [PATCH 002/147] pesky bee

---
 src/function/table/read_csv.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index 1b8c2ffe7478..272c5f95a6b0 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -230,7 +230,6 @@ void ReadCSVTableFunction::ReadCSVAddNamedParameters(TableFunction &table_functi
 	table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
 	table_function.named_parameters["rejects_table"] = LogicalType::VARCHAR;
 	table_function.named_parameters["rejects_limit"] = LogicalType::BIGINT;
-	table_function.named_parameters["rejects_recovery_columns"] = LogicalType::LIST(LogicalType::VARCHAR);
 	table_function.named_parameters["buffer_size"] = LogicalType::UBIGINT;
 	table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
 	table_function.named_parameters["parallel"] = LogicalType::BOOLEAN;

From e7bfcd62104696b2e40a44346c007207646e9af2 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 22 Feb 2024 10:13:07 -0300
Subject: [PATCH 003/147] wip commit

---
 .../scanner/string_value_scanner.cpp          | 40 ++++++++++++-------
 .../operator/csv_scanner/util/csv_error.cpp   |  9 ++---
 .../operator/csv_scanner/csv_error.hpp        |  8 ++--
 .../csv_scanner/string_value_scanner.hpp      |  2 +
 4 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 349168d74949..c43ee62d1d6e 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -322,6 +322,24 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() {
 	}
 }
 
+//! Reconstructs the current line to be used in error messages
+string StringValueResult::ReconstructCurrentLine(){
+	LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos,
+		                                   buffer_size};
+	idx_t current_line_size = current_line_start - previous_line_start;
+	string result;
+	result.resize(current_line_size);
+	if (iterator.pos.buffer_idx == previous_line_start.buffer_idx){
+		idx_t result_idx = 0;
+		for (idx_t  i = previous_line_start.buffer_pos; i < iterator.pos.buffer_pos; i ++){
+			result[result_idx++] = buffer_ptr[i];
+		}
+	} else{
+		throw InternalException("Oh no");
+	}
+	return result;
+}
+
 bool StringValueResult::AddRowInternal() {
 	if (ignore_current_row) {
 		// An error occurred on this row, we are ignoring it and resetting our control flag
@@ -330,17 +348,6 @@ bool StringValueResult::AddRowInternal() {
 	}
 	if (!cast_errors.empty()) {
 		// A wild casting error appears
-		// Recreate row for rejects-table
-		vector<Value> row;
-		if (!state_machine.options.rejects_table_name.empty()) {
-			for (idx_t col = 0; col < parse_chunk.ColumnCount(); col++) {
-				if (cast_errors.find(col) != cast_errors.end()) {
-					row.push_back(cast_errors[col]);
-				} else {
-					row.push_back(parse_chunk.data[col].GetValue(number_of_rows));
-				}
-			}
-		}
 		for (auto &cast_error : cast_errors) {
 			std::ostringstream error;
 			// Casting Error Message
@@ -348,9 +355,9 @@ bool StringValueResult::AddRowInternal() {
 			      << LogicalTypeIdToString(parse_types[cast_error.first]) << "\'";
 			auto error_string = error.str();
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read - 1);
-
+			auto borked_line = ReconstructCurrentLine();
 			auto csv_error = CSVError::CastError(state_machine.options, names[cast_error.first], error_string,
-			                                     cast_error.first, row, lines_per_batch);
+			                                     cast_error.first, borked_line, lines_per_batch);
 			error_handler.Error(csv_error);
 		}
 		// If we got here it means we are ignoring errors, hence we need to signify to our result scanner to ignore this
@@ -615,8 +622,10 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 				}
 				LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(),
 				                                 lines_read - parse_chunk.size() + line_error);
+//				auto borked_line = result.ReconstructCurrentLine();
+				string empty;
 				auto csv_error = CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx],
-				                                     error_message, col_idx, row, lines_per_batch);
+				                                     error_message, col_idx, empty, lines_per_batch);
 				error_handler->Error(csv_error);
 			}
 			borked_lines.insert(line_error++);
@@ -632,8 +641,9 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 					}
 					LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(),
 					                                 lines_read - parse_chunk.size() + line_error);
+					string empty;
 					auto csv_error = CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx],
-					                                     error_message, col_idx, row, lines_per_batch);
+					                                     error_message, col_idx, empty, lines_per_batch);
 
 					error_handler->Error(csv_error);
 				}
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index 7867421d57d1..c96893ed4cc3 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -76,9 +76,9 @@ CSVError::CSVError(string error_message_p, CSVErrorType type_p, LinesPerBoundary
     : error_message(std::move(error_message_p)), type(type_p), error_info(error_info_p) {
 }
 
-CSVError::CSVError(string error_message_p, CSVErrorType type_p, idx_t column_idx_p, vector<Value> row_p,
+CSVError::CSVError(string error_message_p, CSVErrorType type_p, idx_t column_idx_p, string csv_row_p,
                    LinesPerBoundary error_info_p)
-    : error_message(std::move(error_message_p)), type(type_p), column_idx(column_idx_p), row(std::move(row_p)),
+    : error_message(std::move(error_message_p)), type(type_p), column_idx(column_idx_p), csv_row(std::move(csv_row_p)),
       error_info(error_info_p) {
 }
 
@@ -102,8 +102,7 @@ CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_
 	return CSVError(exception, CSVErrorType::COLUMN_NAME_TYPE_MISMATCH, {});
 }
 
-CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_name, string &cast_error, idx_t column_idx,
-                             vector<Value> &row, LinesPerBoundary error_info) {
+CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_name, string &cast_error, idx_t column_idx, string &csv_row, LinesPerBoundary error_info) {
 	std::ostringstream error;
 	// Which column
 	error << "Error when converting column \"" << column_name << "\"." << std::endl;
@@ -112,7 +111,7 @@ CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_nam
 	error << std::endl;
 	// What were the options
 	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, row, error_info);
+	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info);
 }
 
 CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index 4d5eeada36eb..c40045b74bc5 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -50,13 +50,13 @@ enum CSVErrorType : uint8_t {
 class CSVError {
 public:
 	CSVError() {};
-	CSVError(string error_message, CSVErrorType type, idx_t column_idx, vector<Value> row, LinesPerBoundary error_info);
+	CSVError(string error_message, CSVErrorType type, idx_t column_idx, string csv_row, LinesPerBoundary error_info);
 	CSVError(string error_message, CSVErrorType type, LinesPerBoundary error_info);
 	//! Produces error messages for column name -> type mismatch.
 	static CSVError ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names);
 	//! Produces error messages for casting errors
 	static CSVError CastError(const CSVReaderOptions &options, string &column_name, string &cast_error,
-	                          idx_t column_idx, vector<Value> &row, LinesPerBoundary error_info);
+	                          idx_t column_idx, string &csv_row, LinesPerBoundary error_info);
 	//! Produces error for when the line size exceeds the maximum line size option
 	static CSVError LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info);
 	//! Produces error for when the sniffer couldn't find viable options
@@ -80,8 +80,8 @@ class CSVError {
 	CSVErrorType type;
 	//! Column Index where error happened
 	idx_t column_idx;
-	//! Values from the row where error happened
-	vector<Value> row;
+	//! Original CSV row where error happened
+	string csv_row;
 	//! Line information regarding this error
 	LinesPerBoundary error_info;
 };
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 2b1475f92861..4750da6b65db 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -117,6 +117,8 @@ class StringValueResult : public ScannerResult {
 	//! Handles EmptyLine states
 	static inline bool EmptyLine(StringValueResult &result, const idx_t buffer_pos);
 	inline bool AddRowInternal();
+	//! Reconstructs the current line to be used in error messages
+	string ReconstructCurrentLine();
 
 	void HandleOverLimitRows();
 

From bf320b4c87b52955b566a3864d0a3c5b84a94c37 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 23 Feb 2024 17:45:08 -0300
Subject: [PATCH 004/147] wip

---
 .../operator/csv_scanner/table_function/global_csv_state.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 2f8e92e3718f..4bd982f4571f 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -168,10 +168,15 @@ void CSVGlobalState::FillRejectsTable() {
 						auto col_name = bind_data.return_names[col_idx];
 						// Add the row to the rejects table
 						appender.BeginRow();
+						// 1. File Name
 						appender.Append(string_t(file_name));
+						// 2. Row Line
 						appender.Append(row_line);
+						// 3. Column Index
 						appender.Append(col_idx);
+						// 4. Column Name
 						appender.Append(string_t("\"" + col_name + "\""));
+						// 5 Parsed Value
 						appender.Append(error.row[col_idx]);
 
 						auto row_error_msg =

From ba93182badfe9c0a608d175308898803cef090cf Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 28 Feb 2024 09:16:54 -0300
Subject: [PATCH 005/147] Enum for CSV Errors, cleaning up output

---
 .../scanner/string_value_scanner.cpp          | 13 ++++-----
 .../table_function/global_csv_state.cpp       | 18 +++++-------
 .../operator/csv_scanner/util/csv_error.cpp   |  3 +-
 .../operator/persistent/csv_rejects_table.cpp | 29 +++++++++++++++++--
 4 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 3569bfab62ec..d5a295190706 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -323,18 +323,17 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() {
 }
 
 //! Reconstructs the current line to be used in error messages
-string StringValueResult::ReconstructCurrentLine(){
-	LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos,
-		                                   buffer_size};
+string StringValueResult::ReconstructCurrentLine() {
+	LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, buffer_size};
 	idx_t current_line_size = current_line_start - previous_line_start;
 	string result;
 	result.resize(current_line_size);
-	if (iterator.pos.buffer_idx == previous_line_start.buffer_idx){
+	if (iterator.pos.buffer_idx == previous_line_start.buffer_idx) {
 		idx_t result_idx = 0;
-		for (idx_t  i = previous_line_start.buffer_pos; i < iterator.pos.buffer_pos; i ++){
+		for (idx_t i = previous_line_start.buffer_pos; i < iterator.pos.buffer_pos; i++) {
 			result[result_idx++] = buffer_ptr[i];
 		}
-	} else{
+	} else {
 		throw InternalException("Oh no");
 	}
 	return result;
@@ -622,7 +621,7 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 				}
 				LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(),
 				                                 lines_read - parse_chunk.size() + line_error);
-//				auto borked_line = result.ReconstructCurrentLine();
+				//				auto borked_line = result.ReconstructCurrentLine();
 				string empty;
 				auto csv_error = CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx],
 				                                     error_message, col_idx, empty, lines_per_batch);
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index f8b8000e9843..7e3a24d6f77c 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -154,7 +154,7 @@ void CSVGlobalState::FillRejectsTable() {
 			for (auto &error_vector : errors) {
 				for (auto &error : error_vector.second) {
 					if (error.type != CSVErrorType::CAST_ERROR) {
-						// For now we only will use it for casting errors
+						// For now, we only will use it for casting errors
 						continue;
 					}
 					// short circuit if we already have too many rejects
@@ -168,21 +168,17 @@ void CSVGlobalState::FillRejectsTable() {
 						auto col_name = bind_data.return_names[col_idx];
 						// Add the row to the rejects table
 						appender.BeginRow();
-						// 1. File Name
+						// 1. File Path
 						appender.Append(string_t(file_name));
 						// 2. Row Line
 						appender.Append(row_line);
-						// 3. Column Index
+						// 3. Column Index (If Applicable)
 						appender.Append(col_idx);
-						// 4. Column Name
+						// 4. Column Name (If Applicable)
 						appender.Append(string_t("\"" + col_name + "\""));
-						// 5 Parsed Value
-						appender.Append(error.row[col_idx]);
-
-						auto row_error_msg =
-						    StringUtil::Format("Could not convert string '%s' to '%s'", error.row[col_idx].ToString(),
-						                       file->types[col_idx].ToString());
-						appender.Append(string_t(row_error_msg));
+						// 5. Error Type (ENUM?)
+						// 6. Full Error Message
+						// 7. Original CSV Line
 						appender.EndRow();
 					}
 					appender.Close();
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index c96893ed4cc3..4bd08a7d7a8d 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -102,7 +102,8 @@ CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_
 	return CSVError(exception, CSVErrorType::COLUMN_NAME_TYPE_MISMATCH, {});
 }
 
-CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_name, string &cast_error, idx_t column_idx, string &csv_row, LinesPerBoundary error_info) {
+CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_name, string &cast_error, idx_t column_idx,
+                             string &csv_row, LinesPerBoundary error_info) {
 	std::ostringstream error;
 	// Which column
 	error << "Error when converting column \"" << column_name << "\"." << std::endl;
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 7d01723a7718..2e64d637e0ec 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -3,6 +3,7 @@
 #include "duckdb/function/table/read_csv.hpp"
 #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
 #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
+#include "duckdb/parser/parsed_data/create_type_info.hpp"
 
 namespace duckdb {
 
@@ -21,15 +22,39 @@ shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context,
 void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData &data) {
 	// (Re)Create the temporary rejects table
 	auto &catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
+
+	// Create CSV_ERROR_TYPE ENUM
+	string enum_name = "CSV_ERROR_TYPE";
+	Vector order_errors(LogicalType::VARCHAR, 5);
+	order_errors.SetValue(0, "CAST");
+	order_errors.SetValue(0, "MISSING COLUMNS");
+	order_errors.SetValue(0, "TOO MANY COLUMNS");
+	order_errors.SetValue(0, "UNQUOTED VALUE");
+	order_errors.SetValue(0, "LINE SIZE OVER MAXIMUM");
+	LogicalType enum_type = LogicalType::ENUM(enum_name, order_errors, 5);
+	auto type_info = make_uniq<CreateTypeInfo>(enum_name, enum_type);
+	type_info->temporary = true;
+	type_info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
+	catalog.CreateType(context, *type_info);
+
+	// Create Rejects Table
 	auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, name);
 	info->temporary = true;
 	info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
+	// 1. File Path
 	info->columns.AddColumn(ColumnDefinition("file", LogicalType::VARCHAR));
+	// 2. Row Line
 	info->columns.AddColumn(ColumnDefinition("line", LogicalType::BIGINT));
+	// 3. Column Index (If Applicable)
 	info->columns.AddColumn(ColumnDefinition("column", LogicalType::BIGINT));
+	// 4. Column Name (If Applicable)
 	info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
-	info->columns.AddColumn(ColumnDefinition("parsed_value", LogicalType::VARCHAR));
-	info->columns.AddColumn(ColumnDefinition("error", LogicalType::VARCHAR));
+	// 5. Error Type
+	info->columns.AddColumn(ColumnDefinition("error_type", enum_type));
+	// 6. Full Error Message
+	info->columns.AddColumn(ColumnDefinition("error_message", LogicalType::VARCHAR));
+	// 7. Original CSV Line
+	info->columns.AddColumn(ColumnDefinition("csv_line", LogicalType::VARCHAR));
 
 	catalog.CreateTable(context, std::move(info));
 

From 551865f39d953c3de33e6ea0781940f5896d7e08 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 28 Feb 2024 10:11:18 -0300
Subject: [PATCH 006/147] Several tweaks for the tables

---
 .../scanner/string_value_scanner.cpp           |  9 ++++-----
 .../table_function/global_csv_state.cpp        |  8 +++++---
 .../operator/persistent/csv_rejects_table.cpp  | 18 ++++++++----------
 3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index d5a295190706..1002d9e6b164 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -324,13 +324,12 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() {
 
 //! Reconstructs the current line to be used in error messages
 string StringValueResult::ReconstructCurrentLine() {
-	LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, buffer_size};
-	idx_t current_line_size = current_line_start - previous_line_start;
+	idx_t current_line_size = previous_line_start - pre_previous_line_start;
 	string result;
-	result.resize(current_line_size);
-	if (iterator.pos.buffer_idx == previous_line_start.buffer_idx) {
+	result.resize(current_line_size - 1);
+	if (previous_line_start.buffer_idx == pre_previous_line_start.buffer_idx) {
 		idx_t result_idx = 0;
-		for (idx_t i = previous_line_start.buffer_pos; i < iterator.pos.buffer_pos; i++) {
+		for (idx_t i = pre_previous_line_start.buffer_pos + 1; i < previous_line_start.buffer_pos; i++) {
 			result[result_idx++] = buffer_ptr[i];
 		}
 	} else {
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 7e3a24d6f77c..f719a83521fd 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -147,7 +147,6 @@ void CSVGlobalState::FillRejectsTable() {
 		lock_guard<mutex> lock(rejects->write_lock);
 		auto &table = rejects->GetTable(context);
 		InternalAppender appender(context, table);
-
 		for (auto &file : file_scans) {
 			auto file_name = file->file_path;
 			auto &errors = file->error_handler->errors;
@@ -177,8 +176,11 @@ void CSVGlobalState::FillRejectsTable() {
 						// 4. Column Name (If Applicable)
 						appender.Append(string_t("\"" + col_name + "\""));
 						// 5. Error Type (ENUM?)
-						// 6. Full Error Message
-						// 7. Original CSV Line
+						appender.Append(string_t("CAST"));
+						// 6. Original CSV Line
+						appender.Append(string_t(error.csv_row));
+						// 7. Full Error Message
+						appender.Append(string_t(error.error_message));
 						appender.EndRow();
 					}
 					appender.Close();
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 2e64d637e0ec..d1c9f13169aa 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -27,10 +27,10 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 	string enum_name = "CSV_ERROR_TYPE";
 	Vector order_errors(LogicalType::VARCHAR, 5);
 	order_errors.SetValue(0, "CAST");
-	order_errors.SetValue(0, "MISSING COLUMNS");
-	order_errors.SetValue(0, "TOO MANY COLUMNS");
-	order_errors.SetValue(0, "UNQUOTED VALUE");
-	order_errors.SetValue(0, "LINE SIZE OVER MAXIMUM");
+	order_errors.SetValue(1, "MISSING COLUMNS");
+	order_errors.SetValue(2, "TOO MANY COLUMNS");
+	order_errors.SetValue(3, "UNQUOTED VALUE");
+	order_errors.SetValue(4, "LINE SIZE OVER MAXIMUM");
 	LogicalType enum_type = LogicalType::ENUM(enum_name, order_errors, 5);
 	auto type_info = make_uniq<CreateTypeInfo>(enum_name, enum_type);
 	type_info->temporary = true;
@@ -46,18 +46,16 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 	// 2. Row Line
 	info->columns.AddColumn(ColumnDefinition("line", LogicalType::BIGINT));
 	// 3. Column Index (If Applicable)
-	info->columns.AddColumn(ColumnDefinition("column", LogicalType::BIGINT));
+	info->columns.AddColumn(ColumnDefinition("column_idx", LogicalType::BIGINT));
 	// 4. Column Name (If Applicable)
 	info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
 	// 5. Error Type
 	info->columns.AddColumn(ColumnDefinition("error_type", enum_type));
-	// 6. Full Error Message
-	info->columns.AddColumn(ColumnDefinition("error_message", LogicalType::VARCHAR));
-	// 7. Original CSV Line
+	// 6. Original CSV Line
 	info->columns.AddColumn(ColumnDefinition("csv_line", LogicalType::VARCHAR));
-
+	// 7. Full Error Message
+	info->columns.AddColumn(ColumnDefinition("error_message", LogicalType::VARCHAR));
 	catalog.CreateTable(context, std::move(info));
-
 	count = 0;
 }
 

From 5eae50f43d7824b66785b0778d419bff06bcce65 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 28 Feb 2024 12:30:17 -0300
Subject: [PATCH 007/147] We can also store the global byte where an error
 shows up

---
 .../csv_scanner/buffer_manager/csv_buffer.cpp    | 10 +++++-----
 .../csv_scanner/scanner/string_value_scanner.cpp |  9 +++++----
 .../table_function/global_csv_state.cpp          | 12 +++++++-----
 .../operator/csv_scanner/util/csv_error.cpp      |  8 ++++----
 .../operator/persistent/csv_rejects_table.cpp    | 16 +++++++++-------
 .../operator/csv_scanner/csv_buffer.hpp          | 12 +++++++-----
 .../execution/operator/csv_scanner/csv_error.hpp |  7 +++++--
 .../csv_scanner/string_value_scanner.hpp         |  3 +++
 8 files changed, 45 insertions(+), 32 deletions(-)

diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
index 8c29ae79fb43..e5a53bdeb1f3 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
@@ -5,7 +5,7 @@ namespace duckdb {
 
 CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
                      idx_t &global_csv_current_position, idx_t file_number_p)
-    : context(context), file_number(file_number_p), can_seek(file_handle.CanSeek()) {
+    : context(context), requested_size(buffer_size_p), file_number(file_number_p), can_seek(file_handle.CanSeek()) {
 	AllocateBuffer(buffer_size_p);
 	auto buffer = Ptr();
 	actual_buffer_size = file_handle.Read(buffer, buffer_size_p);
@@ -19,8 +19,8 @@ CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle
 
 CSVBuffer::CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t buffer_size,
                      idx_t global_csv_current_position, idx_t file_number_p, idx_t buffer_idx_p)
-    : context(context), global_csv_start(global_csv_current_position), file_number(file_number_p),
-      can_seek(file_handle.CanSeek()), buffer_idx(buffer_idx_p) {
+    : context(context), requested_size(buffer_size), global_csv_start(global_csv_current_position),
+      file_number(file_number_p), can_seek(file_handle.CanSeek()), buffer_idx(buffer_idx_p) {
 	AllocateBuffer(buffer_size);
 	auto buffer = handle.Ptr();
 	actual_buffer_size = file_handle.Read(handle.Ptr(), buffer_size);
@@ -73,8 +73,8 @@ shared_ptr<CSVBufferHandle> CSVBuffer::Pin(CSVFileHandle &file_handle, bool &has
 		Reload(file_handle);
 		has_seeked = true;
 	}
-	return make_shared<CSVBufferHandle>(buffer_manager.Pin(block), actual_buffer_size, last_buffer, file_number,
-	                                    buffer_idx);
+	return make_shared<CSVBufferHandle>(buffer_manager.Pin(block), actual_buffer_size, requested_size, last_buffer,
+	                                    file_number, buffer_idx);
 }
 
 void CSVBuffer::Unpin() {
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 1002d9e6b164..10bd3ce3029d 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -354,8 +354,9 @@ bool StringValueResult::AddRowInternal() {
 			auto error_string = error.str();
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read - 1);
 			auto borked_line = ReconstructCurrentLine();
-			auto csv_error = CSVError::CastError(state_machine.options, names[cast_error.first], error_string,
-			                                     cast_error.first, borked_line, lines_per_batch);
+			auto csv_error = CSVError::CastError(
+			    state_machine.options, names[cast_error.first], error_string, cast_error.first, borked_line,
+			    lines_per_batch, pre_previous_line_start.GetGlobalPosition(buffer_handles.front()->requested_size));
 			error_handler.Error(csv_error);
 		}
 		// If we got here it means we are ignoring errors, hence we need to signify to our result scanner to ignore this
@@ -623,7 +624,7 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 				//				auto borked_line = result.ReconstructCurrentLine();
 				string empty;
 				auto csv_error = CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx],
-				                                     error_message, col_idx, empty, lines_per_batch);
+				                                     error_message, col_idx, empty, lines_per_batch, 0);
 				error_handler->Error(csv_error);
 			}
 			borked_lines.insert(line_error++);
@@ -641,7 +642,7 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 					                                 lines_read - parse_chunk.size() + line_error);
 					string empty;
 					auto csv_error = CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx],
-					                                     error_message, col_idx, empty, lines_per_batch);
+					                                     error_message, col_idx, empty, lines_per_batch, 0);
 
 					error_handler->Error(csv_error);
 				}
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index f719a83521fd..77982d83fab2 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -171,15 +171,17 @@ void CSVGlobalState::FillRejectsTable() {
 						appender.Append(string_t(file_name));
 						// 2. Row Line
 						appender.Append(row_line);
-						// 3. Column Index (If Applicable)
+						// 3. Byte Position where error occurred
+						appender.Append(error.byte_position);
+						// 4. Column Index (If Applicable)
 						appender.Append(col_idx);
-						// 4. Column Name (If Applicable)
+						// 5. Column Name (If Applicable)
 						appender.Append(string_t("\"" + col_name + "\""));
-						// 5. Error Type (ENUM?)
+						// 6. Error Type (ENUM?)
 						appender.Append(string_t("CAST"));
-						// 6. Original CSV Line
+						// 7. Original CSV Line
 						appender.Append(string_t(error.csv_row));
-						// 7. Full Error Message
+						// 8. Full Error Message
 						appender.Append(string_t(error.error_message));
 						appender.EndRow();
 					}
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index 4bd08a7d7a8d..b77437c88f6b 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -77,9 +77,9 @@ CSVError::CSVError(string error_message_p, CSVErrorType type_p, LinesPerBoundary
 }
 
 CSVError::CSVError(string error_message_p, CSVErrorType type_p, idx_t column_idx_p, string csv_row_p,
-                   LinesPerBoundary error_info_p)
+                   LinesPerBoundary error_info_p, idx_t byte_position_p)
     : error_message(std::move(error_message_p)), type(type_p), column_idx(column_idx_p), csv_row(std::move(csv_row_p)),
-      error_info(error_info_p) {
+      error_info(error_info_p), byte_position(byte_position_p) {
 }
 
 CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names) {
@@ -103,7 +103,7 @@ CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_
 }
 
 CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_name, string &cast_error, idx_t column_idx,
-                             string &csv_row, LinesPerBoundary error_info) {
+                             string &csv_row, LinesPerBoundary error_info, idx_t byte_position) {
 	std::ostringstream error;
 	// Which column
 	error << "Error when converting column \"" << column_name << "\"." << std::endl;
@@ -112,7 +112,7 @@ CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_nam
 	error << std::endl;
 	// What were the options
 	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info);
+	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info, byte_position);
 }
 
 CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info) {
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index d1c9f13169aa..3f2acf553f21 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -44,16 +44,18 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 	// 1. File Path
 	info->columns.AddColumn(ColumnDefinition("file", LogicalType::VARCHAR));
 	// 2. Row Line
-	info->columns.AddColumn(ColumnDefinition("line", LogicalType::BIGINT));
-	// 3. Column Index (If Applicable)
-	info->columns.AddColumn(ColumnDefinition("column_idx", LogicalType::BIGINT));
-	// 4. Column Name (If Applicable)
+	info->columns.AddColumn(ColumnDefinition("line", LogicalType::UBIGINT));
+	// 3. Byte Position where error occurred
+	info->columns.AddColumn(ColumnDefinition("byte_position", LogicalType::UBIGINT));
+	// 4. Column Index (If Applicable)
+	info->columns.AddColumn(ColumnDefinition("column_idx", LogicalType::UBIGINT));
+	// 5. Column Name (If Applicable)
 	info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
-	// 5. Error Type
+	// 6. Error Type
 	info->columns.AddColumn(ColumnDefinition("error_type", enum_type));
-	// 6. Original CSV Line
+	// 7. Original CSV Line
 	info->columns.AddColumn(ColumnDefinition("csv_line", LogicalType::VARCHAR));
-	// 7. Full Error Message
+	// 8. Full Error Message
 	info->columns.AddColumn(ColumnDefinition("error_message", LogicalType::VARCHAR));
 	catalog.CreateTable(context, std::move(info));
 	count = 0;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
index 72665ae2de54..e71b75e19553 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
@@ -18,16 +18,17 @@ namespace duckdb {
 
 class CSVBufferHandle {
 public:
-	CSVBufferHandle(BufferHandle handle_p, idx_t actual_size_p, const bool is_final_buffer_p, idx_t file_idx_p,
-	                idx_t buffer_index_p)
-	    : handle(std::move(handle_p)), actual_size(actual_size_p), is_last_buffer(is_final_buffer_p),
-	      file_idx(file_idx_p), buffer_idx(buffer_index_p) {};
-	CSVBufferHandle() : actual_size(0), is_last_buffer(false), file_idx(0), buffer_idx(0) {};
+	CSVBufferHandle(BufferHandle handle_p, idx_t actual_size_p, idx_t requested_size_p, const bool is_final_buffer_p,
+	                idx_t file_idx_p, idx_t buffer_index_p)
+	    : handle(std::move(handle_p)), actual_size(actual_size_p), requested_size(requested_size_p),
+	      is_last_buffer(is_final_buffer_p), file_idx(file_idx_p), buffer_idx(buffer_index_p) {};
+	CSVBufferHandle() : actual_size(0), requested_size(0), is_last_buffer(false), file_idx(0), buffer_idx(0) {};
 	~CSVBufferHandle() {
 	}
 	//! Handle created during allocation
 	BufferHandle handle;
 	const idx_t actual_size;
+	const idx_t requested_size;
 	const bool is_last_buffer;
 	const idx_t file_idx;
 	const idx_t buffer_idx;
@@ -86,6 +87,7 @@ class CSVBuffer {
 	ClientContext &context;
 	//! Actual size can be smaller than the buffer size in case we allocate it too optimistically.
 	idx_t actual_buffer_size;
+	idx_t requested_size;
 	//! Global position from the CSV File where this buffer starts
 	idx_t global_csv_start = 0;
 	//! Number of the file that is in this buffer
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index c40045b74bc5..44bd4f25913a 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -50,13 +50,14 @@ enum CSVErrorType : uint8_t {
 class CSVError {
 public:
 	CSVError() {};
-	CSVError(string error_message, CSVErrorType type, idx_t column_idx, string csv_row, LinesPerBoundary error_info);
+	CSVError(string error_message, CSVErrorType type, idx_t column_idx, string csv_row, LinesPerBoundary error_info,
+	         idx_t byte_position);
 	CSVError(string error_message, CSVErrorType type, LinesPerBoundary error_info);
 	//! Produces error messages for column name -> type mismatch.
 	static CSVError ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names);
 	//! Produces error messages for casting errors
 	static CSVError CastError(const CSVReaderOptions &options, string &column_name, string &cast_error,
-	                          idx_t column_idx, string &csv_row, LinesPerBoundary error_info);
+	                          idx_t column_idx, string &csv_row, LinesPerBoundary error_info, idx_t byte_position);
 	//! Produces error for when the line size exceeds the maximum line size option
 	static CSVError LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info);
 	//! Produces error for when the sniffer couldn't find viable options
@@ -84,6 +85,8 @@ class CSVError {
 	string csv_row;
 	//! Line information regarding this error
 	LinesPerBoundary error_info;
+	//! Global Byte Position where error occurred.
+	idx_t byte_position;
 };
 
 class CSVErrorHandler {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 4750da6b65db..8087888393a4 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -42,6 +42,9 @@ class LinePosition {
 		}
 		return other.buffer_size - other.buffer_pos + buffer_pos;
 	}
+	idx_t GetGlobalPosition(idx_t requested_buffer_size) {
+		return requested_buffer_size * buffer_idx + buffer_pos + 1;
+	}
 	idx_t buffer_pos = 0;
 	idx_t buffer_size = 0;
 	idx_t buffer_idx = 0;

From 5951413e0cd88990714a409854717755e6fcc158 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 28 Feb 2024 13:51:26 -0300
Subject: [PATCH 008/147] Fixing up old tests and fixing small bugs

---
 .../scanner/string_value_scanner.cpp          |  33 +--
 .../csv_scanner/string_value_scanner.hpp      |   3 +
 .../copy/csv/rejects/csv_rejects_read.test    | 232 +++++++++++-------
 .../csv/rejects/test_invalid_parameters.test  |  56 +++++
 4 files changed, 222 insertions(+), 102 deletions(-)
 create mode 100644 test/sql/copy/csv/rejects/test_invalid_parameters.test

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 10bd3ce3029d..daa56dd209e0 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -26,6 +26,7 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
 	buffer_ptr = buffer_handle->Ptr();
 	buffer_size = buffer_handle->actual_size;
 	last_position = buffer_position;
+	requested_size = buffer_handle->requested_size;
 
 	// Current Result information
 	previous_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, buffer_handle->actual_size};
@@ -354,9 +355,9 @@ bool StringValueResult::AddRowInternal() {
 			auto error_string = error.str();
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read - 1);
 			auto borked_line = ReconstructCurrentLine();
-			auto csv_error = CSVError::CastError(
-			    state_machine.options, names[cast_error.first], error_string, cast_error.first, borked_line,
-			    lines_per_batch, pre_previous_line_start.GetGlobalPosition(buffer_handles.front()->requested_size));
+			auto csv_error = CSVError::CastError(state_machine.options, names[cast_error.first], error_string,
+			                                     cast_error.first, borked_line, lines_per_batch,
+			                                     pre_previous_line_start.GetGlobalPosition(requested_size));
 			error_handler.Error(csv_error);
 		}
 		// If we got here it means we are ignoring errors, hence we need to signify to our result scanner to ignore this
@@ -414,20 +415,20 @@ bool StringValueResult::AddRowInternal() {
 }
 
 bool StringValueResult::AddRow(StringValueResult &result, const idx_t buffer_pos) {
+	LinePosition current_line_start = {result.iterator.pos.buffer_idx, result.iterator.pos.buffer_pos,
+	                                   result.buffer_size};
+	idx_t current_line_size = current_line_start - result.previous_line_start;
+	if (result.store_line_size) {
+		result.error_handler.NewMaxLineSize(current_line_size);
+	}
+	if (current_line_size > result.state_machine.options.maximum_line_size) {
+		LinesPerBoundary lines_per_batch(result.iterator.GetBoundaryIdx(), result.number_of_rows);
+		auto csv_error = CSVError::LineSizeError(result.state_machine.options, current_line_size, lines_per_batch);
+		result.error_handler.Error(csv_error);
+	}
+	result.pre_previous_line_start = result.previous_line_start;
+	result.previous_line_start = current_line_start;
 	if (result.last_position <= buffer_pos) {
-		LinePosition current_line_start = {result.iterator.pos.buffer_idx, result.iterator.pos.buffer_pos,
-		                                   result.buffer_size};
-		idx_t current_line_size = current_line_start - result.previous_line_start;
-		if (result.store_line_size) {
-			result.error_handler.NewMaxLineSize(current_line_size);
-		}
-		if (current_line_size > result.state_machine.options.maximum_line_size) {
-			LinesPerBoundary lines_per_batch(result.iterator.GetBoundaryIdx(), result.number_of_rows);
-			auto csv_error = CSVError::LineSizeError(result.state_machine.options, current_line_size, lines_per_batch);
-			result.error_handler.Error(csv_error);
-		}
-		result.pre_previous_line_start = result.previous_line_start;
-		result.previous_line_start = current_line_start;
 		// We add the value
 		if (result.quoted) {
 			StringValueResult::AddQuotedValue(result, buffer_pos);
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 8087888393a4..00e0bb80b2c2 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -104,6 +104,9 @@ class StringValueResult : public ScannerResult {
 	//! We must ensure that we keep the buffers alive until processing the query result
 	vector<shared_ptr<CSVBufferHandle>> buffer_handles;
 
+	//! Requested size of buffers (i.e., either 32Mb or set by buffer_size parameter)
+	idx_t requested_size;
+
 	//! If the current row has an error, we have to skip it
 	bool ignore_current_row = false;
 	//! Specialized code for quoted values, makes sure to remove quotes and escapes
diff --git a/test/sql/copy/csv/rejects/csv_rejects_read.test b/test/sql/copy/csv/rejects/csv_rejects_read.test
index 16b73ae7ac65..5713e91760a0 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_read.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_read.test
@@ -6,54 +6,6 @@ require skip_reload
 # FIXME: https://github.com/duckdb/duckdb/issues/7755
 require vector_size 2048
 
-# Test invalid arguments
-statement error
-SELECT * FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
-    ignore_errors=false,
-    rejects_table='csv_rejects_table'
-)
-----
-only supported when IGNORE_ERRORS is set to true
-
-statement error
-SELECT * FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
-    ignore_errors=true,
-    rejects_table='')
-----
-REJECTS_TABLE option cannot be empty
-
-statement error
-SELECT * FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
-    ignore_errors=true,
-    rejects_table='csv_rejects_table',
-    union_by_name=true)
-----
-UNION_BY_NAME is set to true
-
-statement error
-SELECT * FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
-    ignore_errors=true,
-    rejects_limit=10)
-----
-REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name
-
-statement error
-SELECT * FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
-    ignore_errors=true,
-    rejects_table='csv_rejects_table',
-    rejects_limit=-1)
-----
-REJECTS_LIMIT: cannot be negative
 
 # Basic test
 query III rowsort
@@ -66,11 +18,17 @@ SELECT * FROM read_csv(
 1	2	AAA
 6	7	CCC
 
-query IIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "error", regexp_replace("file", '\\', '/', 'g')
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-2	1	"col1"	BBB	Could not convert string 'BBB' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/bad.csv
+test/sql/copy/csv/data/error/mismatch/bad.csv	2	1	"col1"	CAST	4,BBB,9,	9
+
+query I
+SELECT error_message
+FROM csv_rejects_table;
+----
+<REGEX>:.*Could not convert string "BBB" to 'INTEGER'.*
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -85,13 +43,31 @@ SELECT * FROM read_csv(
 ----
 4	5	9
 
-query IIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "error", regexp_replace("file", '\\', '/', 'g')
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-1	2	"col2"	DDD	Could not convert string 'DDD' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/bad2.csv
-3	0	"col0"	EEE	Could not convert string 'EEE' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/bad2.csv
-3	2	"col2"	FFF	Could not convert string 'FFF' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/bad2.csv
+test/sql/copy/csv/data/error/mismatch/bad2.csv	1	2	"col2"	CAST	,2,DDD,	1
+test/sql/copy/csv/data/error/mismatch/bad2.csv	3	0	"col0"	CAST	EEE,7,FFF,	16
+test/sql/copy/csv/data/error/mismatch/bad2.csv	3	2	"col2"	CAST	EEE,7,FFF,	16
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=1 and column_idx=2;
+----
+<REGEX>:.*Could not convert string "DDD" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=3 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "EEE" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=3 and column_idx=2;
+----
+<REGEX>:.*Could not convert string "FFF" to 'INTEGER'.*
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -110,12 +86,24 @@ SELECT * FROM read_csv(
 6	7	CCC
 
 
-query IIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "error", regexp_replace("file", '\\', '/', 'g')
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-2	1	"col1"	BBB	Could not convert string 'BBB' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/bad.csv
-3	0	"col0"	EEE	Could not convert string 'EEE' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/bad2.csv
+test/sql/copy/csv/data/error/mismatch/bad.csv	2	1	"col1"	CAST	4,BBB,9,	9
+test/sql/copy/csv/data/error/mismatch/bad2.csv	3	0	"col0"	CAST	EEE,7,FFF,	16
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=2 and column_idx=1;
+----
+<REGEX>:.*Could not convert string "BBB" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=3 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "EEE" to 'INTEGER'.*
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -154,12 +142,24 @@ SELECT SUM(num) FROM read_csv(
 ----
 4270
 
-query  IIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "error", regexp_replace("file", '\\', '/', 'g')
+query  IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-2176	0	"num"	B	Could not convert string 'B' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/big_bad.csv
-4176	0	"num"	C	Could not convert string 'C' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/big_bad.csv
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	0	"num"	CAST	B, A	10875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	0	"num"	CAST	C, A	20875
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=2176 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "B" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=4176 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "C" to 'INTEGER'.*
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -173,12 +173,24 @@ SELECT SUM(num) FROM read_csv(
 ----
 6774
 
-query  IIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "error", regexp_replace("file", '\\', '/', 'g')
+query  IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-3680	0	"num"	B	Could not convert string 'B' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/big_bad2.csv
-5680	0	"num"	C	Could not convert string 'C' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/big_bad2.csv
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	0	"num"	CAST	B, A	18395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	0	"num"	CAST	C, A	28395
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=3680 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "B" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=5680 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "C" to 'INTEGER'.*
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -193,14 +205,38 @@ SELECT SUM(num) FROM read_csv(
 ----
 11044
 
-query  IIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "error", regexp_replace("file", '\\', '/', 'g')
+query  IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-2176	0	"num"	B	Could not convert string 'B' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/big_bad.csv
-3680	0	"num"	B	Could not convert string 'B' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/big_bad2.csv
-4176	0	"num"	C	Could not convert string 'C' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/big_bad.csv
-5680	0	"num"	C	Could not convert string 'C' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/big_bad2.csv
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	0	"num"	CAST	B, A	10875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	0	"num"	CAST	C, A	20875
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	0	"num"	CAST	B, A	18395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	0	"num"	CAST	C, A	28395
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=3680 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "B" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=5680 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "C" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=2176 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "B" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=4176 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "C" to 'INTEGER'.*
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -223,19 +259,43 @@ ON L.num = R.num;
 1	A	1	A
 3	C	3	C
 
-query IIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "error", regexp_replace("file", '\\', '/', 'g')
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table_left;
 ----
-3	0	"num"	X	Could not convert string 'X' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/small1.csv
-6	0	"num"	X	Could not convert string 'X' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/small1.csv
+test/sql/copy/csv/data/error/mismatch/small1.csv	3	0	"num"	CAST	X,Y	14
+test/sql/copy/csv/data/error/mismatch/small1.csv	6	0	"num"	CAST	X,Y	26
 
-query IIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "error", regexp_replace("file", '\\', '/', 'g')
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table_right;
 ----
-3	0	"num"	X	Could not convert string 'X' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/small2.csv
-5	0	"num"	X	Could not convert string 'X' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/small2.csv
+test/sql/copy/csv/data/error/mismatch/small2.csv	3	0	"num"	CAST	X,Y	14
+test/sql/copy/csv/data/error/mismatch/small2.csv	5	0	"num"	CAST	X,Y	22
+
+query I
+SELECT error_message
+FROM csv_rejects_table_left where line=3 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "X" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table_left where line=6 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "X" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table_right where line=3 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "X" to 'INTEGER'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table_right where line=5 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "X" to 'INTEGER'.*
 
 statement ok
 DROP TABLE csv_rejects_table_left;
@@ -264,12 +324,12 @@ ON L.num = R.num;
 3	C	3	C
 
 
-query IIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "error", regexp_replace("file", '\\', '/', 'g')
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table_left;
 ----
-3	0	"num"	X	Could not convert string 'X' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/small1.csv
-6	0	"num"	X	Could not convert string 'X' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/small1.csv
+test/sql/copy/csv/data/error/mismatch/small1.csv	3	0	"num"	CAST	X,Y	14
+test/sql/copy/csv/data/error/mismatch/small1.csv	6	0	"num"	CAST	X,Y	26
 
 query I
 SELECT COUNT(*)
diff --git a/test/sql/copy/csv/rejects/test_invalid_parameters.test b/test/sql/copy/csv/rejects/test_invalid_parameters.test
new file mode 100644
index 000000000000..d403d0274948
--- /dev/null
+++ b/test/sql/copy/csv/rejects/test_invalid_parameters.test
@@ -0,0 +1,56 @@
+# name: test/sql/copy/csv/rejects/test_invalid_parameters.test
+# group: [rejects]
+
+require skip_reload
+
+# FIXME: https://github.com/duckdb/duckdb/issues/7755
+require vector_size 2048
+
+# Test invalid arguments
+statement error
+SELECT * FROM read_csv(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
+    ignore_errors=false,
+    rejects_table='csv_rejects_table'
+)
+----
+only supported when IGNORE_ERRORS is set to true
+
+statement error
+SELECT * FROM read_csv(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
+    ignore_errors=true,
+    rejects_table='')
+----
+REJECTS_TABLE option cannot be empty
+
+statement error
+SELECT * FROM read_csv(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
+    ignore_errors=true,
+    rejects_table='csv_rejects_table',
+    union_by_name=true)
+----
+UNION_BY_NAME is set to true
+
+statement error
+SELECT * FROM read_csv(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
+    ignore_errors=true,
+    rejects_limit=10)
+----
+REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name
+
+statement error
+SELECT * FROM read_csv(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
+    ignore_errors=true,
+    rejects_table='csv_rejects_table',
+    rejects_limit=-1)
+----
+REJECTS_LIMIT: cannot be negative

From a8f2dcd01d18af6b0680fd90c81b7d3b427a79ba Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 28 Feb 2024 14:15:41 -0300
Subject: [PATCH 009/147] remove old parameter, cleanup of older tests

---
 .../copy/csv/rejects/csv_rejects_auto.test    | 146 +++---------------
 .../copy/csv/rejects/csv_rejects_read.test    |   1 -
 .../csv/rejects/csv_rejects_recovery.test     |  97 ------------
 .../csv/rejects/test_invalid_parameters.test  |  57 +++++++
 4 files changed, 78 insertions(+), 223 deletions(-)
 delete mode 100644 test/sql/copy/csv/rejects/csv_rejects_recovery.test

diff --git a/test/sql/copy/csv/rejects/csv_rejects_auto.test b/test/sql/copy/csv/rejects/csv_rejects_auto.test
index 5dc4358708de..887bd282db73 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_auto.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_auto.test
@@ -6,63 +6,6 @@ require skip_reload
 # FIXME: https://github.com/duckdb/duckdb/issues/7755
 require vector_size 2048
 
-# Test invalid arguments
-statement error
-SELECT * FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    ignore_errors=false,
-    rejects_table='csv_rejects_table'
-)
-----
-only supported when IGNORE_ERRORS is set to true
-
-statement error
-SELECT * FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    ignore_errors=true,
-    rejects_table='')
-----
-REJECTS_TABLE option cannot be empty
-
-statement error
-SELECT * FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    ignore_errors=true,
-    rejects_table='csv_rejects_table',
-    union_by_name=true)
-----
-UNION_BY_NAME is set to true
-
-statement error
-SELECT * FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    ignore_errors=true,
-    rejects_limit=10)
-----
-REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name
-
-statement error
-SELECT * FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/bad.csv',
-    ignore_errors=true,
-    rejects_table='csv_rejects_table',
-    rejects_limit=-1)
-----
-REJECTS_LIMIT: cannot be negative
-
-
-query III
-SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*) FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
-    sample_size=3000,
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, header = 0);
-----
-VARCHAR	VARCHAR	11048
-
-statement ok
-DROP TABLE csv_rejects_table;
-
 # Ensure that we can get the schema if we reduce the sample size and ignore errors
 query IIIII
 SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
@@ -73,85 +16,38 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
 ----
 BIGINT	VARCHAR	11044	11044	2
 
-query IIIIII rowsort
-SELECT regexp_replace("file", '\\', '/', 'g') , "line", "column", "column_name", "parsed_value", "error"
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	0	"column0"	B	Could not convert string 'B' to 'BIGINT'
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	0	"column0"	C	Could not convert string 'C' to 'BIGINT'
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	0	"column0"	B	Could not convert string 'B' to 'BIGINT'
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	0	"column0"	C	Could not convert string 'C' to 'BIGINT'
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	0	"column0"	CAST	B, A	10875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	0	"column0"	CAST	C, A	20875
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	0	"column0"	CAST	B, A	18395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	0	"column0"	CAST	C, A	28395
 
-statement ok
-DROP TABLE csv_rejects_table;
-
-# Test with recovery columns
 query I
-SELECT SUM(COL1) FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/part1.csv',
-    header=true,
-    ignore_errors=true,
-	sample_size=1,
-    rejects_table='csv_rejects_table',
-    rejects_recovery_columns=['COL2']);
+SELECT error_message
+FROM csv_rejects_table where line=2176 and column_idx=0;
 ----
-5230
-
-statement ok
-DROP TABLE csv_rejects_table;
+<REGEX>:.*Could not convert string "B" to 'BIGINT'.*
 
-# Test with recovery columns
 query I
-SELECT SUM(COL1) FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/part2.csv',
-    header=true,
-    ignore_errors=true,
-	sample_size=1,
-    rejects_table='csv_rejects_table',
-    rejects_recovery_columns=['COL2']);
+SELECT error_message
+FROM csv_rejects_table where line=4176 and column_idx=0;
 ----
-5418
-
-statement ok
-DROP TABLE csv_rejects_table;
+<REGEX>:.*Could not convert string "C" to 'BIGINT'.*
 
-# Test with recovery columns
 query I
-SELECT SUM(COL1) FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/part3.csv',
-    header=true,
-    ignore_errors=true,
-	sample_size=1,
-    rejects_table='csv_rejects_table',
-    rejects_recovery_columns=['COL2']);
+SELECT error_message
+FROM csv_rejects_table where line=3680 and column_idx=0;
 ----
-4151
-
-statement ok
-DROP TABLE csv_rejects_table;
+<REGEX>:.*Could not convert string "B" to 'BIGINT'.*
 
-# Test with recovery columns
 query I
-SELECT SUM(COL1) FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/part*.csv',
-    header=true,
-    ignore_errors=true,
-	sample_size=1,
-    rejects_table='csv_rejects_table',
-    rejects_recovery_columns=['COL2']);
-----
-14799
-
-query IIIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "recovery_columns", "error", regexp_replace("file", '\\', '/', 'g') 
-FROM csv_rejects_table;
+SELECT error_message
+FROM csv_rejects_table where line=5680 and column_idx=0;
 ----
-2058	0	"COL1"	B	{'COL2': BAD1B}	Could not convert string 'B' to 'BIGINT'	test/sql/copy/csv/data/error/mismatch/part3.csv
-2325	0	"COL1"	B	{'COL2': BAD2B}	Could not convert string 'B' to 'BIGINT'	test/sql/copy/csv/data/error/mismatch/part2.csv
-3137	0	"COL1"	B	{'COL2': BAD1B}	Could not convert string 'B' to 'BIGINT'	test/sql/copy/csv/data/error/mismatch/part1.csv
-4058	0	"COL1"	C	{'COL2': BAD1C}	Could not convert string 'C' to 'BIGINT'	test/sql/copy/csv/data/error/mismatch/part3.csv
-4325	0	"COL1"	C	{'COL2': BAD2C}	Could not convert string 'C' to 'BIGINT'	test/sql/copy/csv/data/error/mismatch/part2.csv
-5137	0	"COL1"	C	{'COL2': BAD1C}	Could not convert string 'C' to 'BIGINT'	test/sql/copy/csv/data/error/mismatch/part1.csv
+<REGEX>:.*Could not convert string "C" to 'BIGINT'.*
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -180,7 +76,7 @@ statement ok
 CREATE TABLE tbl1 (col1 BIGINT, col2 VARCHAR);
 
 statement ok
-COPY tbl1 FROM 'test/sql/copy/csv/data/error/mismatch/half1.csv' 
+COPY tbl1 FROM 'test/sql/copy/csv/data/error/mismatch/half1.csv'
 WITH (HEADER, IGNORE_ERRORS TRUE, SAMPLE_SIZE 1000, REJECTS_TABLE 'csv_rejects_table');
 
 query I
@@ -222,7 +118,7 @@ statement ok
 CREATE TABLE tbl1 (col1 BIGINT, col2 VARCHAR);
 
 statement ok
-COPY tbl1 FROM 'test/sql/copy/csv/data/error/mismatch/half2.csv' 
+COPY tbl1 FROM 'test/sql/copy/csv/data/error/mismatch/half2.csv'
 WITH (HEADER, IGNORE_ERRORS TRUE,  SAMPLE_SIZE 1000, REJECTS_TABLE 'csv_rejects_table');
 
 query I
@@ -246,7 +142,7 @@ statement ok
 CREATE TABLE tbl1 (col1 BIGINT, col2 VARCHAR);
 
 statement ok
-COPY tbl1 FROM 'test/sql/copy/csv/data/error/mismatch/half2.csv' 
+COPY tbl1 FROM 'test/sql/copy/csv/data/error/mismatch/half2.csv'
 WITH (HEADER, IGNORE_ERRORS TRUE, SAMPLE_SIZE 1000, REJECTS_TABLE 'csv_rejects_table', REJECTS_LIMIT 1337);
 
 query I
diff --git a/test/sql/copy/csv/rejects/csv_rejects_read.test b/test/sql/copy/csv/rejects/csv_rejects_read.test
index 5713e91760a0..458e485ffc75 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_read.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_read.test
@@ -7,7 +7,6 @@ require skip_reload
 require vector_size 2048
 
 
-# Basic test
 query III rowsort
 SELECT * FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/bad.csv',
diff --git a/test/sql/copy/csv/rejects/csv_rejects_recovery.test b/test/sql/copy/csv/rejects/csv_rejects_recovery.test
deleted file mode 100644
index 697e7c94a091..000000000000
--- a/test/sql/copy/csv/rejects/csv_rejects_recovery.test
+++ /dev/null
@@ -1,97 +0,0 @@
-# name: test/sql/copy/csv/rejects/csv_rejects_recovery.test
-# group: [rejects]
-
-require skip_reload
-
-# Test invalid arguments
-
-# Should not work without rejects_table
-statement error
-SELECT SUM(COL1) + SUM(COL3) FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/part*.csv',
-    ignore_errors=true,
-    header=true,
-    columns = {COL3 :'INTEGER', COL1: 'INTEGER', COL2: 'VARCHAR'},
-    rejects_recovery_columns=['COL2']
-);
-----
-only supported when REJECTS_TABLE is set to a table name
-
-# Should not work without rejects_recovery_columns as list
-statement error
-SELECT SUM(COL1) + SUM(COL3) FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/part*.csv',
-    header=true,
-    columns = {COL3 :'INTEGER', COL1: 'INTEGER', COL2: 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    rejects_recovery_columns=['NON_EXISTING_COLUMN'],
-    ignore_errors=true
-);
-----
-CSV options could not be auto-detected. Consider setting parser options manually.
-
-# Should not work without rejects_recovery_columns as list
-statement error
-SELECT SUM(COL1) + SUM(COL3) FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/part*.csv',
-    header=true,
-    columns = {COL3 :'INTEGER', COL1: 'INTEGER', COL2: 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    rejects_recovery_columns=['NON_EXISTING_COLUMN'],
-    ignore_errors=true,
-    auto_detect=false
-);
-----
-REJECTS_RECOVERY_COLUMNS: column "NON_EXISTING_COLUMN" not found
-
-# Basic test
-query IIII rowsort
-SELECT * FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/example.tsv',
-    sep='\t',
-    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'id': 'INTEGER', 'count': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    rejects_recovery_columns=['name', 'age'],
-    ignore_errors=true,
-    auto_detect = false
-);
-----
-alice	10	1	20
-charlie	7	3	30
-
-query IIIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "recovery_columns", "error", regexp_replace("file", '\\', '/', 'g')
-FROM csv_rejects_table;
-----
-2	3	"count"	NOT_A_NUMBER	{'name': bobby, 'age': 12}	Could not convert string 'NOT_A_NUMBER' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/example.tsv
-
-statement ok
-DROP TABLE csv_rejects_table;
-
-# We should not prune columns that are part of the rejects_recovery_columns
-query I
-SELECT SUM(COL1) FROM read_csv(
-    'test/sql/copy/csv/data/error/mismatch/part*.csv',
-    columns = {COL1: 'INTEGER', COL2: 'VARCHAR'},
-    header=true,
-    rejects_table='csv_rejects_table',
-    rejects_recovery_columns=['COL2'],
-    ignore_errors=true,
-    auto_detect = false
-);
-----
-14799
-
-query IIIIIII rowsort
-SELECT "line", "column", "column_name", "parsed_value", "recovery_columns", "error", regexp_replace("file", '\\', '/', 'g') 
-FROM csv_rejects_table;
-----
-2058	0	"COL1"	B	{'COL2': BAD1B}	Could not convert string 'B' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/part3.csv
-2325	0	"COL1"	B	{'COL2': BAD2B}	Could not convert string 'B' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/part2.csv
-3137	0	"COL1"	B	{'COL2': BAD1B}	Could not convert string 'B' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/part1.csv
-4058	0	"COL1"	C	{'COL2': BAD1C}	Could not convert string 'C' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/part3.csv
-4325	0	"COL1"	C	{'COL2': BAD2C}	Could not convert string 'C' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/part2.csv
-5137	0	"COL1"	C	{'COL2': BAD1C}	Could not convert string 'C' to 'INTEGER'	test/sql/copy/csv/data/error/mismatch/part1.csv
-
-statement ok
-DROP TABLE csv_rejects_table;
\ No newline at end of file
diff --git a/test/sql/copy/csv/rejects/test_invalid_parameters.test b/test/sql/copy/csv/rejects/test_invalid_parameters.test
index d403d0274948..5209960fef88 100644
--- a/test/sql/copy/csv/rejects/test_invalid_parameters.test
+++ b/test/sql/copy/csv/rejects/test_invalid_parameters.test
@@ -54,3 +54,60 @@ SELECT * FROM read_csv(
     rejects_limit=-1)
 ----
 REJECTS_LIMIT: cannot be negative
+
+# Test invalid arguments
+statement error
+SELECT * FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    ignore_errors=false,
+    rejects_table='csv_rejects_table'
+)
+----
+only supported when IGNORE_ERRORS is set to true
+
+statement error
+SELECT * FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    ignore_errors=true,
+    rejects_table='')
+----
+REJECTS_TABLE option cannot be empty
+
+statement error
+SELECT * FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    ignore_errors=true,
+    rejects_table='csv_rejects_table',
+    union_by_name=true)
+----
+UNION_BY_NAME is set to true
+
+statement error
+SELECT * FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    ignore_errors=true,
+    rejects_limit=10)
+----
+REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name
+
+statement error
+SELECT * FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/bad.csv',
+    ignore_errors=true,
+    rejects_table='csv_rejects_table',
+    rejects_limit=-1)
+----
+REJECTS_LIMIT: cannot be negative
+
+
+query III
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=3000,
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, header = 0);
+----
+VARCHAR	VARCHAR	11048
+
+statement ok
+DROP TABLE csv_rejects_table;
\ No newline at end of file

From bcebeff2a8e5a0978c6527c73c616c501dc8624c Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 29 Feb 2024 08:46:47 -0300
Subject: [PATCH 010/147] Handle CSV Line Errors that fall over multiple
 buffers

---
 .../scanner/string_value_scanner.cpp          | 28 ++++++++--
 .../csv_scanner/string_value_scanner.hpp      |  2 +-
 .../csv/rejects/csv_buffer_size_rejects.test  | 55 +++++++++++++++++++
 3 files changed, 80 insertions(+), 5 deletions(-)
 create mode 100644 test/sql/copy/csv/rejects/csv_buffer_size_rejects.test

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index daa56dd209e0..70da1ac6ee3a 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -21,7 +21,7 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
       store_line_size(store_line_size_p), csv_file_scan(std::move(csv_file_scan_p)), lines_read(lines_read_p) {
 	// Vector information
 	D_ASSERT(number_of_columns > 0);
-	buffer_handles.push_back(buffer_handle);
+	buffer_handles[buffer_handle->buffer_idx] = buffer_handle;
 	// Buffer Information
 	buffer_ptr = buffer_handle->Ptr();
 	buffer_size = buffer_handle->actual_size;
@@ -327,14 +327,27 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() {
 string StringValueResult::ReconstructCurrentLine() {
 	idx_t current_line_size = previous_line_start - pre_previous_line_start;
 	string result;
-	result.resize(current_line_size - 1);
 	if (previous_line_start.buffer_idx == pre_previous_line_start.buffer_idx) {
+		result.resize(current_line_size - 1);
 		idx_t result_idx = 0;
 		for (idx_t i = pre_previous_line_start.buffer_pos + 1; i < previous_line_start.buffer_pos; i++) {
 			result[result_idx++] = buffer_ptr[i];
 		}
 	} else {
-		throw InternalException("Oh no");
+		result.resize(current_line_size);
+		if (buffer_handles.find(pre_previous_line_start.buffer_idx) == buffer_handles.end()) {
+			throw InternalException("CSV Buffer is not available to reconstruct CSV Line, please open an issue with "
+			                        "your query and dataset.");
+		}
+		idx_t result_idx = 0;
+		auto first_buffer = buffer_handles[pre_previous_line_start.buffer_idx]->Ptr();
+		auto first_buffer_size = buffer_handles[pre_previous_line_start.buffer_idx]->actual_size;
+		for (idx_t i = pre_previous_line_start.buffer_pos + 1; i < first_buffer_size; i++) {
+			result[result_idx++] = first_buffer[i];
+		}
+		for (idx_t i = 0; i < previous_line_start.buffer_pos; i++) {
+			result[result_idx++] = buffer_ptr[i];
+		}
 	}
 	return result;
 }
@@ -884,7 +897,6 @@ bool StringValueScanner::MoveToNextBuffer() {
 	if (iterator.pos.buffer_pos >= cur_buffer_handle->actual_size) {
 		previous_buffer_handle = cur_buffer_handle;
 		cur_buffer_handle = buffer_manager->GetBuffer(++iterator.pos.buffer_idx);
-		result.buffer_handles.push_back(cur_buffer_handle);
 		if (!cur_buffer_handle) {
 			iterator.pos.buffer_idx--;
 			buffer_handle_ptr = nullptr;
@@ -914,6 +926,8 @@ bool StringValueScanner::MoveToNextBuffer() {
 			}
 			return false;
 		}
+		result.buffer_handles[cur_buffer_handle->buffer_idx] = cur_buffer_handle;
+
 		iterator.pos.buffer_pos = 0;
 		buffer_handle_ptr = cur_buffer_handle->Ptr();
 		// Handle overbuffer value
@@ -1057,6 +1071,12 @@ void StringValueScanner::SetStart() {
 				}
 			}
 			if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size) {
+				// Propagate any errors
+				for (auto &error_vector : scan_finder->error_handler->errors) {
+					for (auto &error : error_vector.second) {
+						error_handler->Error(error);
+					}
+				}
 				// If things go terribly wrong, we never loop indefinetly.
 				iterator.pos.buffer_idx = scan_finder->iterator.pos.buffer_idx;
 				iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 00e0bb80b2c2..d0e9124c81f2 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -102,7 +102,7 @@ class StringValueResult : public ScannerResult {
 	idx_t chunk_col_id = 0;
 
 	//! We must ensure that we keep the buffers alive until processing the query result
-	vector<shared_ptr<CSVBufferHandle>> buffer_handles;
+	unordered_map<idx_t, shared_ptr<CSVBufferHandle>> buffer_handles;
 
 	//! Requested size of buffers (i.e., either 32Mb or set by buffer_size parameter)
 	idx_t requested_size;
diff --git a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
new file mode 100644
index 000000000000..f0de6714f96a
--- /dev/null
+++ b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
@@ -0,0 +1,55 @@
+# name: test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
+# description: Force CSV Lines from errors to fall mid-buffers
+# group: [rejects]
+
+require skip_reload
+
+# FIXME: https://github.com/duckdb/duckdb/issues/7755
+require vector_size 2048
+
+# Ensure that we can get the schema if we reduce the sample size and ignore errors
+query IIIII
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    buffer_size=5,
+    rejects_table='csv_rejects_table',
+    ignore_errors=true);
+----
+BIGINT	VARCHAR	11044	11044	2
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	0	"column0"	CAST	B, A	10875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	0	"column0"	CAST	C, A	20875
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	0	"column0"	CAST	B, A	18395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	0	"column0"	CAST	C, A	28395
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=2176 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "B" to 'BIGINT'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=4176 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "C" to 'BIGINT'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=3680 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "B" to 'BIGINT'.*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where line=5680 and column_idx=0;
+----
+<REGEX>:.*Could not convert string "C" to 'BIGINT'.*
+
+statement ok
+DROP TABLE csv_rejects_table;
\ No newline at end of file

From 8ab1a9a3d67a0192e517d931d333cd1547a52626 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 29 Feb 2024 09:11:06 -0300
Subject: [PATCH 011/147] rounding off minor details

---
 .../scanner/string_value_scanner.cpp          | 26 +++++++++----------
 .../csv_scanner/string_value_scanner.hpp      |  3 +++
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 70da1ac6ee3a..4adf70bc3ac7 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -353,6 +353,18 @@ string StringValueResult::ReconstructCurrentLine() {
 }
 
 bool StringValueResult::AddRowInternal() {
+	LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, buffer_size};
+	idx_t current_line_size = current_line_start - previous_line_start;
+	if (store_line_size) {
+		error_handler.NewMaxLineSize(current_line_size);
+	}
+	if (current_line_size > state_machine.options.maximum_line_size) {
+		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), number_of_rows);
+		auto csv_error = CSVError::LineSizeError(state_machine.options, current_line_size, lines_per_batch);
+		error_handler.Error(csv_error);
+	}
+	pre_previous_line_start = previous_line_start;
+	previous_line_start = current_line_start;
 	if (ignore_current_row) {
 		// An error occurred on this row, we are ignoring it and resetting our control flag
 		ignore_current_row = false;
@@ -428,19 +440,6 @@ bool StringValueResult::AddRowInternal() {
 }
 
 bool StringValueResult::AddRow(StringValueResult &result, const idx_t buffer_pos) {
-	LinePosition current_line_start = {result.iterator.pos.buffer_idx, result.iterator.pos.buffer_pos,
-	                                   result.buffer_size};
-	idx_t current_line_size = current_line_start - result.previous_line_start;
-	if (result.store_line_size) {
-		result.error_handler.NewMaxLineSize(current_line_size);
-	}
-	if (current_line_size > result.state_machine.options.maximum_line_size) {
-		LinesPerBoundary lines_per_batch(result.iterator.GetBoundaryIdx(), result.number_of_rows);
-		auto csv_error = CSVError::LineSizeError(result.state_machine.options, current_line_size, lines_per_batch);
-		result.error_handler.Error(csv_error);
-	}
-	result.pre_previous_line_start = result.previous_line_start;
-	result.previous_line_start = current_line_start;
 	if (result.last_position <= buffer_pos) {
 		// We add the value
 		if (result.quoted) {
@@ -1082,6 +1081,7 @@ void StringValueScanner::SetStart() {
 				iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos;
 				result.last_position = iterator.pos.buffer_pos;
 				iterator.done = scan_finder->iterator.done;
+				result.lines_read++;
 				return;
 			}
 		}
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index d0e9124c81f2..74064587a621 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -43,6 +43,9 @@ class LinePosition {
 		return other.buffer_size - other.buffer_pos + buffer_pos;
 	}
 	idx_t GetGlobalPosition(idx_t requested_buffer_size) {
+		if (buffer_pos == requested_buffer_size) {
+			return requested_buffer_size * buffer_idx + buffer_pos;
+		}
 		return requested_buffer_size * buffer_idx + buffer_pos + 1;
 	}
 	idx_t buffer_pos = 0;

From b6fd5674a628cb9234c984b3578af2563a31777a Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 29 Feb 2024 14:27:22 -0300
Subject: [PATCH 012/147] lots of adjustments to make the errors accurate for
 small buffer sizes

---
 .../scanner/string_value_scanner.cpp          | 57 ++++++++++++-------
 .../csv_scanner/string_value_scanner.hpp      |  9 +--
 .../csv/rejects/csv_buffer_size_rejects.test  | 28 +++++----
 3 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 4adf70bc3ac7..dcb58116595e 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -324,29 +324,35 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() {
 }
 
 //! Reconstructs the current line to be used in error messages
-string StringValueResult::ReconstructCurrentLine() {
-	idx_t current_line_size = previous_line_start - pre_previous_line_start;
+string StringValueResult::ReconstructCurrentLine(bool &first_char_nl) {
 	string result;
 	if (previous_line_start.buffer_idx == pre_previous_line_start.buffer_idx) {
-		result.resize(current_line_size - 1);
-		idx_t result_idx = 0;
-		for (idx_t i = pre_previous_line_start.buffer_pos + 1; i < previous_line_start.buffer_pos; i++) {
-			result[result_idx++] = buffer_ptr[i];
+		if (buffer_handles.find(previous_line_start.buffer_idx) == buffer_handles.end()) {
+			throw InternalException("CSV Buffer is not available to reconstruct CSV Line, please open an issue with "
+			                        "your query and dataset.");
+		}
+		auto buffer = buffer_handles[pre_previous_line_start.buffer_idx]->Ptr();
+		first_char_nl =
+		    buffer[pre_previous_line_start.buffer_pos] == '\n' || buffer[pre_previous_line_start.buffer_pos] == '\r';
+		for (idx_t i = pre_previous_line_start.buffer_pos + first_char_nl; i < previous_line_start.buffer_pos; i++) {
+			result += buffer[i];
 		}
 	} else {
-		result.resize(current_line_size);
-		if (buffer_handles.find(pre_previous_line_start.buffer_idx) == buffer_handles.end()) {
+		if (buffer_handles.find(pre_previous_line_start.buffer_idx) == buffer_handles.end() ||
+		    buffer_handles.find(previous_line_start.buffer_idx) == buffer_handles.end()) {
 			throw InternalException("CSV Buffer is not available to reconstruct CSV Line, please open an issue with "
 			                        "your query and dataset.");
 		}
-		idx_t result_idx = 0;
 		auto first_buffer = buffer_handles[pre_previous_line_start.buffer_idx]->Ptr();
 		auto first_buffer_size = buffer_handles[pre_previous_line_start.buffer_idx]->actual_size;
-		for (idx_t i = pre_previous_line_start.buffer_pos + 1; i < first_buffer_size; i++) {
-			result[result_idx++] = first_buffer[i];
+		auto second_buffer = buffer_handles[previous_line_start.buffer_idx]->Ptr();
+		first_char_nl = first_buffer[pre_previous_line_start.buffer_pos] == '\n' ||
+		                first_buffer[pre_previous_line_start.buffer_pos] == '\r';
+		for (idx_t i = pre_previous_line_start.buffer_pos + first_char_nl; i < first_buffer_size; i++) {
+			result += first_buffer[i];
 		}
 		for (idx_t i = 0; i < previous_line_start.buffer_pos; i++) {
-			result[result_idx++] = buffer_ptr[i];
+			result += second_buffer[i];
 		}
 	}
 	return result;
@@ -379,10 +385,11 @@ bool StringValueResult::AddRowInternal() {
 			      << LogicalTypeIdToString(parse_types[cast_error.first]) << "\'";
 			auto error_string = error.str();
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read - 1);
-			auto borked_line = ReconstructCurrentLine();
+			bool first_nl;
+			auto borked_line = ReconstructCurrentLine(first_nl);
 			auto csv_error = CSVError::CastError(state_machine.options, names[cast_error.first], error_string,
 			                                     cast_error.first, borked_line, lines_per_batch,
-			                                     pre_previous_line_start.GetGlobalPosition(requested_size));
+			                                     pre_previous_line_start.GetGlobalPosition(requested_size, first_nl));
 			error_handler.Error(csv_error);
 		}
 		// If we got here it means we are ignoring errors, hence we need to signify to our result scanner to ignore this
@@ -1069,23 +1076,35 @@ void StringValueScanner::SetStart() {
 					return;
 				}
 			}
-			if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size) {
+			if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size ||
+			    scan_finder->iterator.GetBufferIdx() >= iterator.GetBufferIdx()) {
 				// Propagate any errors
-				for (auto &error_vector : scan_finder->error_handler->errors) {
-					for (auto &error : error_vector.second) {
-						error_handler->Error(error);
+				if (!scan_finder->error_handler->errors.empty()) {
+					for (auto &error_vector : scan_finder->error_handler->errors) {
+						for (auto &error : error_vector.second) {
+							error_handler->Error(error);
+						}
 					}
+					result.lines_read++;
 				}
 				// If things go terribly wrong, we never loop indefinetly.
 				iterator.pos.buffer_idx = scan_finder->iterator.pos.buffer_idx;
 				iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos;
 				result.last_position = iterator.pos.buffer_pos;
 				iterator.done = scan_finder->iterator.done;
-				result.lines_read++;
 				return;
 			}
 		}
 	} while (!line_found);
+	// Propagate any errors
+	if (!scan_finder->error_handler->errors.empty()) {
+		for (auto &error_vector : scan_finder->error_handler->errors) {
+			for (auto &error : error_vector.second) {
+				error_handler->Error(error);
+			}
+		}
+		result.lines_read++;
+	}
 	iterator.pos.buffer_idx = scan_finder->result.pre_previous_line_start.buffer_idx;
 	iterator.pos.buffer_pos = scan_finder->result.pre_previous_line_start.buffer_pos;
 	result.last_position = iterator.pos.buffer_pos;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 74064587a621..84ef406b30aa 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -42,11 +42,8 @@ class LinePosition {
 		}
 		return other.buffer_size - other.buffer_pos + buffer_pos;
 	}
-	idx_t GetGlobalPosition(idx_t requested_buffer_size) {
-		if (buffer_pos == requested_buffer_size) {
-			return requested_buffer_size * buffer_idx + buffer_pos;
-		}
-		return requested_buffer_size * buffer_idx + buffer_pos + 1;
+	idx_t GetGlobalPosition(idx_t requested_buffer_size, bool first_char_nl) {
+		return requested_buffer_size * buffer_idx + buffer_pos + first_char_nl;
 	}
 	idx_t buffer_pos = 0;
 	idx_t buffer_size = 0;
@@ -127,7 +124,7 @@ class StringValueResult : public ScannerResult {
 	static inline bool EmptyLine(StringValueResult &result, const idx_t buffer_pos);
 	inline bool AddRowInternal();
 	//! Reconstructs the current line to be used in error messages
-	string ReconstructCurrentLine();
+	string ReconstructCurrentLine(bool &first_char_nl);
 
 	void HandleOverLimitRows();
 
diff --git a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
index f0de6714f96a..15461ee1c9f3 100644
--- a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
@@ -7,49 +7,53 @@ require skip_reload
 # FIXME: https://github.com/duckdb/duckdb/issues/7755
 require vector_size 2048
 
+loop buffer_size 5 10
+
 # Ensure that we can get the schema if we reduce the sample size and ignore errors
 query IIIII
 SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
     'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
     sample_size=1,
-    buffer_size=5,
+    buffer_size=${buffer_size},
     rejects_table='csv_rejects_table',
     ignore_errors=true);
 ----
 BIGINT	VARCHAR	11044	11044	2
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+query IIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	0	"column0"	CAST	B, A	10875
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	0	"column0"	CAST	C, A	20875
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	0	"column0"	CAST	B, A	18395
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	0	"column0"	CAST	C, A	28395
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	0	"column0"	CAST	B, A	10875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	0	"column0"	CAST	C, A	20875
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	0	"column0"	CAST	B, A	18395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	0	"column0"	CAST	C, A	28395
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=2176 and column_idx=0;
+FROM csv_rejects_table where byte_position = 10875;
 ----
 <REGEX>:.*Could not convert string "B" to 'BIGINT'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=4176 and column_idx=0;
+FROM csv_rejects_table where byte_position = 20875;
 ----
 <REGEX>:.*Could not convert string "C" to 'BIGINT'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=3680 and column_idx=0;
+FROM csv_rejects_table where  byte_position = 18395;
 ----
 <REGEX>:.*Could not convert string "B" to 'BIGINT'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=5680 and column_idx=0;
+FROM csv_rejects_table where byte_position = 28395;
 ----
 <REGEX>:.*Could not convert string "C" to 'BIGINT'.*
 
 statement ok
-DROP TABLE csv_rejects_table;
\ No newline at end of file
+DROP TABLE csv_rejects_table;
+
+endloop
\ No newline at end of file

From 7ad39f20b9d0462b28a5f15993d0c95288d652fa Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 29 Feb 2024 14:50:35 -0300
Subject: [PATCH 013/147] When resetting the buffer_handle we might have to
 keep the last one

---
 .../operator/csv_scanner/scanner/string_value_scanner.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index dcb58116595e..da67d5f1bd4a 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -252,7 +252,15 @@ void StringValueResult::Reset() {
 	for (auto &v : validity_mask) {
 		v->SetAllValid(result_size);
 	}
+	// We keep a reference to the buffer from our current iteration if it already exists
+	shared_ptr<CSVBufferHandle> cur_buffer;
+	if (buffer_handles.find(iterator.GetBufferIdx()) != buffer_handles.end()) {
+		cur_buffer = buffer_handles[iterator.GetBufferIdx()];
+	}
 	buffer_handles.clear();
+	if (cur_buffer) {
+		buffer_handles[cur_buffer->buffer_idx] = cur_buffer;
+	}
 }
 
 void StringValueResult::AddQuotedValue(StringValueResult &result, const idx_t buffer_pos) {

From 2826f951f537bd1976bb154b86a1bc7c5a7961d8 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 29 Feb 2024 15:02:21 -0300
Subject: [PATCH 014/147] fix test

---
 test/sql/copy/csv/rejects/csv_rejects_read.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/sql/copy/csv/rejects/csv_rejects_read.test b/test/sql/copy/csv/rejects/csv_rejects_read.test
index 458e485ffc75..ab5b12db949a 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_read.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_read.test
@@ -46,7 +46,7 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/bad2.csv	1	2	"col2"	CAST	,2,DDD,	1
+test/sql/copy/csv/data/error/mismatch/bad2.csv	1	2	"col2"	CAST	1,2,DDD,	0
 test/sql/copy/csv/data/error/mismatch/bad2.csv	3	0	"col0"	CAST	EEE,7,FFF,	16
 test/sql/copy/csv/data/error/mismatch/bad2.csv	3	2	"col2"	CAST	EEE,7,FFF,	16
 

From f6496fc22504f5d3ba62476203e436f21661e1ae Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 29 Feb 2024 15:20:05 -0300
Subject: [PATCH 015/147] We only care about propagting errors if we are
 ignoring them, as weird as this sounds

---
 .../scanner/string_value_scanner.cpp          |  4 +-
 .../parallel/csv_parallel_buffer_size.test    | 90 +++++++++----------
 2 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index da67d5f1bd4a..4785f54129b8 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -1087,7 +1087,7 @@ void StringValueScanner::SetStart() {
 			if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size ||
 			    scan_finder->iterator.GetBufferIdx() >= iterator.GetBufferIdx()) {
 				// Propagate any errors
-				if (!scan_finder->error_handler->errors.empty()) {
+				if (!scan_finder->error_handler->errors.empty() && state_machine->options.ignore_errors) {
 					for (auto &error_vector : scan_finder->error_handler->errors) {
 						for (auto &error : error_vector.second) {
 							error_handler->Error(error);
@@ -1105,7 +1105,7 @@ void StringValueScanner::SetStart() {
 		}
 	} while (!line_found);
 	// Propagate any errors
-	if (!scan_finder->error_handler->errors.empty()) {
+	if (!scan_finder->error_handler->errors.empty() && state_machine->options.ignore_errors) {
 		for (auto &error_vector : scan_finder->error_handler->errors) {
 			for (auto &error : error_vector.second) {
 				error_handler->Error(error);
diff --git a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
index d1c9d8ce5ee8..f6e1d7ada93a 100644
--- a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
+++ b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
@@ -7,51 +7,51 @@ statement ok
 PRAGMA verify_parallelism
 
 
-query III
-SELECT sum(a), sum(b), sum(c) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
-----
-111111111	51866	3195
-
-query I
-SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
-----
-111111111
-
-query I
-SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
-----
-111111111
-
-query IIII
-select * from read_csv('test/sql/copy/csv/data/test/multi_column_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
-----
-1	6370	371	p1
-10	214	465	p2
-100	2403	160	p3
-1000	1564	67	p4
-10000	10617	138	p5
-100000	430	181	p6
-1000000	1904	658	p7
-10000000	12845	370	p8
-100000000	15519	785	p9
-
-query IIII
-select * from read_csv('test/sql/copy/csv/data/test/multi_column_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
-----
-1	6370	371	p1
-10	214	465	p2
-100	2403	160	p3
-1000	1564	67	p4
-10000	10617	138	p5
-100000	430	181	p6
-1000000	1904	658	p7
-10000000	12845	370	p8
-100000000	15519	785	p9
-
-query I
-SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/new_line_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|')
-----
-111
+#query III
+#SELECT sum(a), sum(b), sum(c) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
+#----
+#111111111	51866	3195
+#
+#query I
+#SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
+#----
+#111111111
+#
+#query I
+#SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
+#----
+#111111111
+#
+#query IIII
+#select * from read_csv('test/sql/copy/csv/data/test/multi_column_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
+#----
+#1	6370	371	p1
+#10	214	465	p2
+#100	2403	160	p3
+#1000	1564	67	p4
+#10000	10617	138	p5
+#100000	430	181	p6
+#1000000	1904	658	p7
+#10000000	12845	370	p8
+#100000000	15519	785	p9
+#
+#query IIII
+#select * from read_csv('test/sql/copy/csv/data/test/multi_column_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
+#----
+#1	6370	371	p1
+#10	214	465	p2
+#100	2403	160	p3
+#1000	1564	67	p4
+#10000	10617	138	p5
+#100000	430	181	p6
+#1000000	1904	658	p7
+#10000000	12845	370	p8
+#100000000	15519	785	p9
+#
+#query I
+#SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/new_line_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|')
+#----
+#111
 
 query I
 SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/new_line_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=80)

From 404c4da61b052555865554aaa89a441091cbb88f Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 29 Feb 2024 15:20:32 -0300
Subject: [PATCH 016/147] woopsie-doopsie

---
 .../parallel/csv_parallel_buffer_size.test    | 90 +++++++++----------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
index f6e1d7ada93a..d1c9d8ce5ee8 100644
--- a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
+++ b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
@@ -7,51 +7,51 @@ statement ok
 PRAGMA verify_parallelism
 
 
-#query III
-#SELECT sum(a), sum(b), sum(c) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
-#----
-#111111111	51866	3195
-#
-#query I
-#SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
-#----
-#111111111
-#
-#query I
-#SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
-#----
-#111111111
-#
-#query IIII
-#select * from read_csv('test/sql/copy/csv/data/test/multi_column_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
-#----
-#1	6370	371	p1
-#10	214	465	p2
-#100	2403	160	p3
-#1000	1564	67	p4
-#10000	10617	138	p5
-#100000	430	181	p6
-#1000000	1904	658	p7
-#10000000	12845	370	p8
-#100000000	15519	785	p9
-#
-#query IIII
-#select * from read_csv('test/sql/copy/csv/data/test/multi_column_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
-#----
-#1	6370	371	p1
-#10	214	465	p2
-#100	2403	160	p3
-#1000	1564	67	p4
-#10000	10617	138	p5
-#100000	430	181	p6
-#1000000	1904	658	p7
-#10000000	12845	370	p8
-#100000000	15519	785	p9
-#
-#query I
-#SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/new_line_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|')
-#----
-#111
+query III
+SELECT sum(a), sum(b), sum(c) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
+----
+111111111	51866	3195
+
+query I
+SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
+----
+111111111
+
+query I
+SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/multi_column_integer_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
+----
+111111111
+
+query IIII
+select * from read_csv('test/sql/copy/csv/data/test/multi_column_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
+----
+1	6370	371	p1
+10	214	465	p2
+100	2403	160	p3
+1000	1564	67	p4
+10000	10617	138	p5
+100000	430	181	p6
+1000000	1904	658	p7
+10000000	12845	370	p8
+100000000	15519	785	p9
+
+query IIII
+select * from read_csv('test/sql/copy/csv/data/test/multi_column_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
+----
+1	6370	371	p1
+10	214	465	p2
+100	2403	160	p3
+1000	1564	67	p4
+10000	10617	138	p5
+100000	430	181	p6
+1000000	1904	658	p7
+10000000	12845	370	p8
+100000000	15519	785	p9
+
+query I
+SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/new_line_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|')
+----
+111
 
 query I
 SELECT sum(a) FROM read_csv('test/sql/copy/csv/data/test/new_line_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=80)

From ecf76d42a23c69d385b70b8b3a05001636242b14 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 1 Mar 2024 08:49:03 -0300
Subject: [PATCH 017/147] wip on rejects from flush cast

---
 data/csv/error/flush_cast.csv                 | 2814 +++++++++++++++++
 .../csv/rejects/csv_rejects_flush_cast.test   |   35 +
 2 files changed, 2849 insertions(+)
 create mode 100644 data/csv/error/flush_cast.csv
 create mode 100644 test/sql/copy/csv/rejects/csv_rejects_flush_cast.test

diff --git a/data/csv/error/flush_cast.csv b/data/csv/error/flush_cast.csv
new file mode 100644
index 000000000000..33a2cc1af7be
--- /dev/null
+++ b/data/csv/error/flush_cast.csv
@@ -0,0 +1,2814 @@
+a,b
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+B, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+25-09-2001, bla
+c, bla
+25-09-2001, bla
diff --git a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
new file mode 100644
index 000000000000..05c9af1cb02d
--- /dev/null
+++ b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
@@ -0,0 +1,35 @@
+# name: test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
+# description: Test that Flush Cast functions properly for the rejects tables
+# group: [rejects]
+
+require skip_reload
+
+# FIXME: https://github.com/duckdb/duckdb/issues/7755
+require vector_size 2048
+
+query III
+SELECT typeof(first(a)), typeof(first(b)), COUNT(*) FROM read_csv(
+    'data/csv/error/flush_cast.csv',
+    columns = {'a': 'DATE', 'b': 'VARCHAR'},
+    rejects_table='csv_rejects_table',
+    delim = ',',
+    dateformat = '%d-%m-%Y',
+    ignore_errors=true);
+----
+DATE	VARCHAR	2811
+
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+test/sql/copy/csv/data/error/mismatch/bad.csv	2	1	"col1"	CAST	4,BBB,9,	9
+
+query I
+SELECT error_message
+FROM csv_rejects_table;
+----
+<REGEX>:.*Could not convert string "BBB" to 'INTEGER'.*
+
+statement ok
+DROP TABLE csv_rejects_table;
\ No newline at end of file

From 53f612bbe056c3f12c7ab0c3256d8130a64b38a9 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 1 Mar 2024 10:27:51 -0300
Subject: [PATCH 018/147] introduce FullLinePosition

---
 .../scanner/string_value_scanner.cpp          | 59 ++++++++++---------
 .../csv_scanner/string_value_scanner.hpp      | 21 +++++--
 2 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 4785f54129b8..516bf437edbb 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -29,8 +29,8 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
 	requested_size = buffer_handle->requested_size;
 
 	// Current Result information
-	previous_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, buffer_handle->actual_size};
-	pre_previous_line_start = previous_line_start;
+	current_line_position.begin = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, buffer_handle->actual_size};
+	current_line_position.end = current_line_position.begin;
 	// Fill out Parse Types
 	vector<LogicalType> logical_types;
 	parse_types = make_unsafe_uniq_array<LogicalTypeId>(number_of_columns);
@@ -332,34 +332,33 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() {
 }
 
 //! Reconstructs the current line to be used in error messages
-string StringValueResult::ReconstructCurrentLine(bool &first_char_nl) {
+string FullLinePosition::ReconstructCurrentLine(bool &first_char_nl,
+                                                unordered_map<idx_t, shared_ptr<CSVBufferHandle>> &buffer_handles) {
 	string result;
-	if (previous_line_start.buffer_idx == pre_previous_line_start.buffer_idx) {
-		if (buffer_handles.find(previous_line_start.buffer_idx) == buffer_handles.end()) {
+	if (end.buffer_idx == begin.buffer_idx) {
+		if (buffer_handles.find(end.buffer_idx) == buffer_handles.end()) {
 			throw InternalException("CSV Buffer is not available to reconstruct CSV Line, please open an issue with "
 			                        "your query and dataset.");
 		}
-		auto buffer = buffer_handles[pre_previous_line_start.buffer_idx]->Ptr();
-		first_char_nl =
-		    buffer[pre_previous_line_start.buffer_pos] == '\n' || buffer[pre_previous_line_start.buffer_pos] == '\r';
-		for (idx_t i = pre_previous_line_start.buffer_pos + first_char_nl; i < previous_line_start.buffer_pos; i++) {
+		auto buffer = buffer_handles[begin.buffer_idx]->Ptr();
+		first_char_nl = buffer[begin.buffer_pos] == '\n' || buffer[begin.buffer_pos] == '\r';
+		for (idx_t i = begin.buffer_pos + first_char_nl; i < end.buffer_pos; i++) {
 			result += buffer[i];
 		}
 	} else {
-		if (buffer_handles.find(pre_previous_line_start.buffer_idx) == buffer_handles.end() ||
-		    buffer_handles.find(previous_line_start.buffer_idx) == buffer_handles.end()) {
+		if (buffer_handles.find(begin.buffer_idx) == buffer_handles.end() ||
+		    buffer_handles.find(end.buffer_idx) == buffer_handles.end()) {
 			throw InternalException("CSV Buffer is not available to reconstruct CSV Line, please open an issue with "
 			                        "your query and dataset.");
 		}
-		auto first_buffer = buffer_handles[pre_previous_line_start.buffer_idx]->Ptr();
-		auto first_buffer_size = buffer_handles[pre_previous_line_start.buffer_idx]->actual_size;
-		auto second_buffer = buffer_handles[previous_line_start.buffer_idx]->Ptr();
-		first_char_nl = first_buffer[pre_previous_line_start.buffer_pos] == '\n' ||
-		                first_buffer[pre_previous_line_start.buffer_pos] == '\r';
-		for (idx_t i = pre_previous_line_start.buffer_pos + first_char_nl; i < first_buffer_size; i++) {
+		auto first_buffer = buffer_handles[begin.buffer_idx]->Ptr();
+		auto first_buffer_size = buffer_handles[begin.buffer_idx]->actual_size;
+		auto second_buffer = buffer_handles[end.buffer_idx]->Ptr();
+		first_char_nl = first_buffer[begin.buffer_pos] == '\n' || first_buffer[begin.buffer_pos] == '\r';
+		for (idx_t i = begin.buffer_pos + first_char_nl; i < first_buffer_size; i++) {
 			result += first_buffer[i];
 		}
-		for (idx_t i = 0; i < previous_line_start.buffer_pos; i++) {
+		for (idx_t i = 0; i < end.buffer_pos; i++) {
 			result += second_buffer[i];
 		}
 	}
@@ -368,7 +367,7 @@ string StringValueResult::ReconstructCurrentLine(bool &first_char_nl) {
 
 bool StringValueResult::AddRowInternal() {
 	LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, buffer_size};
-	idx_t current_line_size = current_line_start - previous_line_start;
+	idx_t current_line_size = current_line_start - current_line_position.end;
 	if (store_line_size) {
 		error_handler.NewMaxLineSize(current_line_size);
 	}
@@ -377,8 +376,8 @@ bool StringValueResult::AddRowInternal() {
 		auto csv_error = CSVError::LineSizeError(state_machine.options, current_line_size, lines_per_batch);
 		error_handler.Error(csv_error);
 	}
-	pre_previous_line_start = previous_line_start;
-	previous_line_start = current_line_start;
+	current_line_position.begin = current_line_position.end;
+	current_line_position.end = current_line_start;
 	if (ignore_current_row) {
 		// An error occurred on this row, we are ignoring it and resetting our control flag
 		ignore_current_row = false;
@@ -394,10 +393,10 @@ bool StringValueResult::AddRowInternal() {
 			auto error_string = error.str();
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read - 1);
 			bool first_nl;
-			auto borked_line = ReconstructCurrentLine(first_nl);
-			auto csv_error = CSVError::CastError(state_machine.options, names[cast_error.first], error_string,
-			                                     cast_error.first, borked_line, lines_per_batch,
-			                                     pre_previous_line_start.GetGlobalPosition(requested_size, first_nl));
+			auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
+			auto csv_error = CSVError::CastError(
+			    state_machine.options, names[cast_error.first], error_string, cast_error.first, borked_line,
+			    lines_per_batch, current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 			error_handler.Error(csv_error);
 		}
 		// If we got here it means we are ignoring errors, hence we need to signify to our result scanner to ignore this
@@ -444,6 +443,7 @@ bool StringValueResult::AddRowInternal() {
 			number_of_rows--;
 		}
 	}
+	line_positions_per_row[number_of_rows] = current_line_position;
 	cur_col_id = 0;
 	chunk_col_id = 0;
 	number_of_rows++;
@@ -699,9 +699,10 @@ void StringValueScanner::Initialize() {
 		SetStart();
 	}
 	result.last_position = iterator.pos.buffer_pos;
-	result.previous_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, cur_buffer_handle->actual_size};
+	result.current_line_position.begin = {iterator.pos.buffer_idx, iterator.pos.buffer_pos,
+	                                      cur_buffer_handle->actual_size};
 
-	result.pre_previous_line_start = result.previous_line_start;
+	result.current_line_position.end = result.current_line_position.begin;
 }
 
 void StringValueScanner::ProcessExtraRow() {
@@ -1113,8 +1114,8 @@ void StringValueScanner::SetStart() {
 		}
 		result.lines_read++;
 	}
-	iterator.pos.buffer_idx = scan_finder->result.pre_previous_line_start.buffer_idx;
-	iterator.pos.buffer_pos = scan_finder->result.pre_previous_line_start.buffer_pos;
+	iterator.pos.buffer_idx = scan_finder->result.current_line_position.begin.buffer_idx;
+	iterator.pos.buffer_pos = scan_finder->result.current_line_position.begin.buffer_pos;
 	result.last_position = iterator.pos.buffer_pos;
 }
 
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 84ef406b30aa..3c7ba2a28660 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -50,6 +50,18 @@ class LinePosition {
 	idx_t buffer_idx = 0;
 };
 
+//! Keeps track of start and end of line positions in regard to the CSV file
+class FullLinePosition {
+public:
+	FullLinePosition() {};
+	LinePosition begin;
+	LinePosition end;
+
+	//! Reconstructs the current line to be used in error messages
+	string ReconstructCurrentLine(bool &first_char_nl,
+	                              unordered_map<idx_t, shared_ptr<CSVBufferHandle>> &buffer_handles);
+};
+
 class StringValueResult : public ScannerResult {
 public:
 	StringValueResult(CSVStates &states, CSVStateMachine &state_machine,
@@ -83,9 +95,10 @@ class StringValueResult : public ScannerResult {
 	//! Information to properly handle errors
 	CSVErrorHandler &error_handler;
 	CSVIterator &iterator;
-	//! Where the previous line started, used to validate the maximum_line_size option
-	LinePosition previous_line_start;
-	LinePosition pre_previous_line_start;
+	//! Line position of the current line
+	FullLinePosition current_line_position;
+	//! Used for CSV line reconstruction on flushed errors
+	unordered_map<idx_t, FullLinePosition> line_positions_per_row;
 	bool store_line_size = false;
 	bool added_last_line = false;
 	bool quoted_new_line = false;
@@ -123,8 +136,6 @@ class StringValueResult : public ScannerResult {
 	//! Handles EmptyLine states
 	static inline bool EmptyLine(StringValueResult &result, const idx_t buffer_pos);
 	inline bool AddRowInternal();
-	//! Reconstructs the current line to be used in error messages
-	string ReconstructCurrentLine(bool &first_char_nl);
 
 	void HandleOverLimitRows();
 

From 4403d16353b3cddd7995408a4c049af2a3d743ef Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 1 Mar 2024 10:36:20 -0300
Subject: [PATCH 019/147] Errors during flush being properly propagated

---
 .../scanner/string_value_scanner.cpp          | 22 +++++++++++++------
 .../csv/rejects/csv_rejects_flush_cast.test   | 15 +++++++++----
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 516bf437edbb..d08a4a06f009 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -649,10 +649,13 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 				}
 				LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(),
 				                                 lines_read - parse_chunk.size() + line_error);
-				//				auto borked_line = result.ReconstructCurrentLine();
-				string empty;
-				auto csv_error = CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx],
-				                                     error_message, col_idx, empty, lines_per_batch, 0);
+				bool first_nl;
+				auto borked_line =
+				    result.line_positions_per_row[line_error].ReconstructCurrentLine(first_nl, result.buffer_handles);
+				auto csv_error = CSVError::CastError(
+				    state_machine->options, csv_file_scan->names[col_idx], error_message, col_idx, borked_line,
+				    lines_per_batch,
+				    result.line_positions_per_row[line_error].begin.GetGlobalPosition(result.result_size, first_nl));
 				error_handler->Error(csv_error);
 			}
 			borked_lines.insert(line_error++);
@@ -668,9 +671,14 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 					}
 					LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(),
 					                                 lines_read - parse_chunk.size() + line_error);
-					string empty;
-					auto csv_error = CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx],
-					                                     error_message, col_idx, empty, lines_per_batch, 0);
+					bool first_nl;
+					auto borked_line = result.line_positions_per_row[line_error].ReconstructCurrentLine(
+					    first_nl, result.buffer_handles);
+					auto csv_error =
+					    CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx], error_message,
+					                        col_idx, borked_line, lines_per_batch,
+					                        result.line_positions_per_row[line_error].begin.GetGlobalPosition(
+					                            result.result_size, first_nl));
 
 					error_handler->Error(csv_error);
 				}
diff --git a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
index 05c9af1cb02d..87768b805d8a 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
@@ -21,15 +21,22 @@ DATE	VARCHAR	2811
 
 query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+FROM csv_rejects_table order by all;
 ----
-test/sql/copy/csv/data/error/mismatch/bad.csv	2	1	"col1"	CAST	4,BBB,9,	9
+data/csv/error/flush_cast.csv	2813	0	"a"	CAST	c, bla	44971
+data/csv/error/flush_cast.csv	439	0	"a"	CAST	B, bla	6996
 
 query I
 SELECT error_message
-FROM csv_rejects_table;
+FROM csv_rejects_table where byte_position = 6996;
 ----
-<REGEX>:.*Could not convert string "BBB" to 'INTEGER'.*
+<REGEX>:.*Could not parse string "B" according to format specifier "%d-%m-%Y".*
+
+query I
+SELECT error_message
+FROM csv_rejects_table where byte_position = 44971;
+----
+<REGEX>:.*Could not parse string "c" according to format specifier "%d-%m-%Y".*
 
 statement ok
 DROP TABLE csv_rejects_table;
\ No newline at end of file

From 7129aaf150e0889c975ff29a27270bfc4151567f Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 1 Mar 2024 12:02:42 -0300
Subject: [PATCH 020/147] preparing the ground for the other errors

---
 .../scanner/string_value_scanner.cpp          | 29 ++++++++++++++-----
 .../operator/csv_scanner/util/csv_error.cpp   | 19 ++++++------
 .../operator/csv_scanner/csv_error.hpp        | 12 ++++----
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index d08a4a06f009..3852cd338d17 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -307,8 +307,11 @@ void StringValueResult::AddValue(StringValueResult &result, const idx_t buffer_p
 
 void StringValueResult::HandleOverLimitRows() {
 	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), number_of_rows + 1);
-	auto csv_error = CSVError::IncorrectColumnAmountError(state_machine.options, nullptr, number_of_columns,
-	                                                      cur_col_id + 1, lines_per_batch);
+	bool first_nl;
+	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
+	auto csv_error =
+	    CSVError::IncorrectColumnAmountError(state_machine.options, cur_col_id + 1, lines_per_batch, borked_line,
+	                                         current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 	error_handler.Error(csv_error);
 	// If we get here we need to remove the last line
 	cur_col_id = 0;
@@ -372,8 +375,12 @@ bool StringValueResult::AddRowInternal() {
 		error_handler.NewMaxLineSize(current_line_size);
 	}
 	if (current_line_size > state_machine.options.maximum_line_size) {
+		bool first_nl;
+		auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), number_of_rows);
-		auto csv_error = CSVError::LineSizeError(state_machine.options, current_line_size, lines_per_batch);
+		auto csv_error =
+		    CSVError::LineSizeError(state_machine.options, current_line_size, lines_per_batch, borked_line,
+		                            current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 		error_handler.Error(csv_error);
 	}
 	current_line_position.begin = current_line_position.end;
@@ -435,9 +442,12 @@ bool StringValueResult::AddRowInternal() {
 			}
 		} else {
 			// If we are not null-padding this is an error
+			bool first_nl;
+			auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), number_of_rows + 1);
-			auto csv_error = CSVError::IncorrectColumnAmountError(state_machine.options, nullptr, number_of_columns,
-			                                                      cur_col_id, lines_per_batch);
+			auto csv_error = CSVError::IncorrectColumnAmountError(
+			    state_machine.options, cur_col_id, lines_per_batch, borked_line,
+			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 			error_handler.Error(csv_error);
 			// If we are here we ignore_errors, so we delete this line
 			number_of_rows--;
@@ -481,9 +491,12 @@ bool StringValueResult::AddRow(StringValueResult &result, const idx_t buffer_pos
 void StringValueResult::InvalidState(StringValueResult &result) {
 	// FIXME: How do we recover from an invalid state? Can we restart the state machine and jump to the next row?
 	LinesPerBoundary lines_per_batch(result.iterator.GetBoundaryIdx(), result.number_of_rows);
-	auto csv_error = CSVError::UnterminatedQuotesError(result.state_machine.options,
-	                                                   static_cast<string_t *>(result.vector_ptr[result.chunk_col_id]),
-	                                                   result.number_of_rows, result.cur_col_id, lines_per_batch);
+	bool first_nl;
+	auto borked_line = result.current_line_position.ReconstructCurrentLine(first_nl, result.buffer_handles);
+
+	auto csv_error = CSVError::UnterminatedQuotesError(
+	    result.state_machine.options, result.cur_col_id, lines_per_batch, borked_line,
+	    result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl));
 	result.error_handler.Error(csv_error);
 }
 
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index b77437c88f6b..3174ff2d7723 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -115,12 +115,13 @@ CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_nam
 	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info, byte_position);
 }
 
-CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info) {
+CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
+                                 string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	error << "Maximum line size of " << options.maximum_line_size << " bytes exceeded. ";
 	error << "Actual Size:" << actual_size << " bytes." << std::endl;
 	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::MAXIMUM_LINE_SIZE, error_info);
+	return CSVError(error.str(), CSVErrorType::MAXIMUM_LINE_SIZE, 0, csv_row, error_info, byte_position);
 }
 
 CSVError CSVError::SniffingError(string &file_path) {
@@ -141,26 +142,26 @@ CSVError CSVError::NullPaddingFail(const CSVReaderOptions &options, LinesPerBoun
 	return CSVError(error.str(), CSVErrorType::NULLPADDED_QUOTED_NEW_VALUE, error_info);
 }
 
-CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, string_t *vector_ptr,
-                                           idx_t vector_line_start, idx_t current_column, LinesPerBoundary error_info) {
+CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_t current_column,
+                                           LinesPerBoundary error_info, string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	error << "Value with unterminated quote found." << std::endl;
 	error << std::endl;
 	// What were the options
 	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, error_info);
+	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info, byte_position);
 }
 
-CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, string_t *vector_ptr,
-                                              idx_t vector_line_start, idx_t actual_columns,
-                                              LinesPerBoundary error_info) {
+CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, idx_t actual_columns,
+                                              LinesPerBoundary error_info, string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	// How many columns were expected and how many were found
 	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns
 	      << std::endl;
 	// What were the options
 	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::INCORRECT_COLUMN_AMOUNT, error_info);
+	return CSVError(error.str(), CSVErrorType::INCORRECT_COLUMN_AMOUNT, actual_columns, csv_row, error_info,
+	                byte_position);
 }
 
 bool CSVErrorHandler::PrintLineNumber(CSVError &error) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index 44bd4f25913a..d8f755600e95 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -59,18 +59,18 @@ class CSVError {
 	static CSVError CastError(const CSVReaderOptions &options, string &column_name, string &cast_error,
 	                          idx_t column_idx, string &csv_row, LinesPerBoundary error_info, idx_t byte_position);
 	//! Produces error for when the line size exceeds the maximum line size option
-	static CSVError LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info);
+	static CSVError LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
+	                              string &csv_row, idx_t byte_position);
 	//! Produces error for when the sniffer couldn't find viable options
 	static CSVError SniffingError(string &file_path);
 	//! Produces error messages for unterminated quoted values
-	static CSVError UnterminatedQuotesError(const CSVReaderOptions &options, string_t *vector_ptr,
-	                                        idx_t vector_line_start, idx_t current_column, LinesPerBoundary error_info);
+	static CSVError UnterminatedQuotesError(const CSVReaderOptions &options, idx_t current_column,
+	                                        LinesPerBoundary error_info, string &csv_row, idx_t byte_position);
 	//! Produces error messages for null_padding option is set and we have quoted new values in parallel
 	static CSVError NullPaddingFail(const CSVReaderOptions &options, LinesPerBoundary error_info);
 	//! Produces error for incorrect (e.g., smaller and lower than the predefined) number of columns in a CSV Line
-	static CSVError IncorrectColumnAmountError(const CSVReaderOptions &options, string_t *vector_ptr,
-	                                           idx_t vector_line_start, idx_t actual_columns,
-	                                           LinesPerBoundary error_info);
+	static CSVError IncorrectColumnAmountError(const CSVReaderOptions &options, idx_t actual_columns,
+	                                           LinesPerBoundary error_info, string &csv_row, idx_t byte_position);
 	idx_t GetBoundaryIndex() {
 		return error_info.boundary_idx;
 	}

From 7cecc4b983a9e1830abb8a77aee5bf7c97f5c11c Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 6 Mar 2024 11:02:10 +0100
Subject: [PATCH 021/147] All rejects tests pass with vector_size=2

---
 .../duckdb/execution/operator/csv_scanner/csv_error.hpp      | 2 +-
 test/sql/copy/csv/rejects/csv_buffer_size_rejects.test       | 3 ---
 .../csv/rejects/csv_incorrect_columns_amount_rejects.test    | 0
 test/sql/copy/csv/rejects/csv_rejects_auto.test              | 3 ---
 test/sql/copy/csv/rejects/csv_rejects_flush_cast.test        | 5 -----
 test/sql/copy/csv/rejects/csv_rejects_read.test              | 4 ----
 test/sql/copy/csv/rejects/test_invalid_parameters.test       | 3 ---
 7 files changed, 1 insertion(+), 19 deletions(-)
 create mode 100644 test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test

diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index d8f755600e95..9331d62c34f5 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -43,7 +43,7 @@ enum CSVErrorType : uint8_t {
 	UNTERMINATED_QUOTES = 3,       // If a quote is not terminated
 	SNIFFING = 4,          // If something went wrong during sniffing and was not possible to find suitable candidates
 	MAXIMUM_LINE_SIZE = 5, // Maximum line size was exceeded by a line in the CSV File
-	NULLPADDED_QUOTED_NEW_VALUE = 6, // If the null_padding option is set and we have quoted new values in parallel
+	NULLPADDED_QUOTED_NEW_VALUE = 6, // If the null_padding option is set, and we have quoted new values in parallel
 
 };
 
diff --git a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
index 15461ee1c9f3..dcef91e814ee 100644
--- a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
@@ -4,9 +4,6 @@
 
 require skip_reload
 
-# FIXME: https://github.com/duckdb/duckdb/issues/7755
-require vector_size 2048
-
 loop buffer_size 5 10
 
 # Ensure that we can get the schema if we reduce the sample size and ignore errors
diff --git a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/sql/copy/csv/rejects/csv_rejects_auto.test b/test/sql/copy/csv/rejects/csv_rejects_auto.test
index 887bd282db73..841ed42465f3 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_auto.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_auto.test
@@ -3,9 +3,6 @@
 
 require skip_reload
 
-# FIXME: https://github.com/duckdb/duckdb/issues/7755
-require vector_size 2048
-
 # Ensure that we can get the schema if we reduce the sample size and ignore errors
 query IIIII
 SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
diff --git a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
index 87768b805d8a..6b2f5e59d7f5 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
@@ -2,11 +2,6 @@
 # description: Test that Flush Cast functions properly for the rejects tables
 # group: [rejects]
 
-require skip_reload
-
-# FIXME: https://github.com/duckdb/duckdb/issues/7755
-require vector_size 2048
-
 query III
 SELECT typeof(first(a)), typeof(first(b)), COUNT(*) FROM read_csv(
     'data/csv/error/flush_cast.csv',
diff --git a/test/sql/copy/csv/rejects/csv_rejects_read.test b/test/sql/copy/csv/rejects/csv_rejects_read.test
index ab5b12db949a..a0b2e751289d 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_read.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_read.test
@@ -3,10 +3,6 @@
 
 require skip_reload
 
-# FIXME: https://github.com/duckdb/duckdb/issues/7755
-require vector_size 2048
-
-
 query III rowsort
 SELECT * FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/bad.csv',
diff --git a/test/sql/copy/csv/rejects/test_invalid_parameters.test b/test/sql/copy/csv/rejects/test_invalid_parameters.test
index 5209960fef88..2e343a30765d 100644
--- a/test/sql/copy/csv/rejects/test_invalid_parameters.test
+++ b/test/sql/copy/csv/rejects/test_invalid_parameters.test
@@ -3,9 +3,6 @@
 
 require skip_reload
 
-# FIXME: https://github.com/duckdb/duckdb/issues/7755
-require vector_size 2048
-
 # Test invalid arguments
 statement error
 SELECT * FROM read_csv(

From 8222dd4f0888703e1ad2883671087e5e0976d239 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 6 Mar 2024 13:32:28 +0100
Subject: [PATCH 022/147] WIP on column amount incorrect

---
 .../rejects/incorrect_columns/few_columns.csv | 2773 +++++++++++++++++
 .../incorrect_columns/many_columns.csv        | 2773 +++++++++++++++++
 .../rejects/incorrect_columns/mix_columns.csv | 2773 +++++++++++++++++
 .../scanner/string_value_scanner.cpp          |    4 +-
 .../table_function/global_csv_state.cpp       |   36 +-
 .../operator/csv_scanner/util/csv_error.cpp   |   12 +-
 .../operator/csv_scanner/csv_error.hpp        |   13 +-
 .../csv_incorrect_columns_amount_rejects.test |   22 +
 8 files changed, 8392 insertions(+), 14 deletions(-)
 create mode 100644 data/csv/rejects/incorrect_columns/few_columns.csv
 create mode 100644 data/csv/rejects/incorrect_columns/many_columns.csv
 create mode 100644 data/csv/rejects/incorrect_columns/mix_columns.csv

diff --git a/data/csv/rejects/incorrect_columns/few_columns.csv b/data/csv/rejects/incorrect_columns/few_columns.csv
new file mode 100644
index 000000000000..9b9d66a642d3
--- /dev/null
+++ b/data/csv/rejects/incorrect_columns/few_columns.csv
@@ -0,0 +1,2773 @@
+a,b,c,d
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
diff --git a/data/csv/rejects/incorrect_columns/many_columns.csv b/data/csv/rejects/incorrect_columns/many_columns.csv
new file mode 100644
index 000000000000..e8611f730f05
--- /dev/null
+++ b/data/csv/rejects/incorrect_columns/many_columns.csv
@@ -0,0 +1,2773 @@
+a,b,c,d
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4,5,6
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4,5,6
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4,5
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4,5
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
diff --git a/data/csv/rejects/incorrect_columns/mix_columns.csv b/data/csv/rejects/incorrect_columns/mix_columns.csv
new file mode 100644
index 000000000000..1217d34524cd
--- /dev/null
+++ b/data/csv/rejects/incorrect_columns/mix_columns.csv
@@ -0,0 +1,2773 @@
+a,b,c,d
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4,5,6
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4,5,6
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
+1,2,3,4
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 1d8c1ef2864e..a1ef8c54448d 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -308,7 +308,7 @@ void StringValueResult::AddValue(StringValueResult &result, const idx_t buffer_p
 }
 
 void StringValueResult::HandleOverLimitRows() {
-	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), number_of_rows + 1);
+	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read + 1);
 	bool first_nl;
 	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 	auto csv_error =
@@ -446,7 +446,7 @@ bool StringValueResult::AddRowInternal() {
 			// If we are not null-padding this is an error
 			bool first_nl;
 			auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
-			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), number_of_rows + 1);
+			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read + 1);
 			auto csv_error = CSVError::IncorrectColumnAmountError(
 			    state_machine.options, cur_col_id, lines_per_batch, borked_line,
 			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 77982d83fab2..73a3a2fe420c 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -137,6 +137,36 @@ void CSVGlobalState::DecrementThread() {
 	}
 }
 
+bool IsCSVErrorAcceptedReject(CSVErrorType type) {
+	switch (type) {
+	case CSVErrorType::CAST_ERROR:
+	case CSVErrorType::TOO_MANY_COLUMNS:
+	case CSVErrorType::TOO_FEW_COLUMNS:
+	case CSVErrorType::MAXIMUM_LINE_SIZE:
+	case CSVErrorType::UNTERMINATED_QUOTES:
+		return true;
+	default:
+		return false;
+	}
+}
+
+string CSVErrorTypeToEnum(CSVErrorType type) {
+	switch (type) {
+	case CSVErrorType::CAST_ERROR:
+		return "CAST";
+	case CSVErrorType::TOO_FEW_COLUMNS:
+		return "MISSING COLUMNS";
+	case CSVErrorType::TOO_MANY_COLUMNS:
+		return "TOO MANY COLUMNS";
+	case CSVErrorType::MAXIMUM_LINE_SIZE:
+		return "LINE SIZE OVER MAXIMUM";
+	case CSVErrorType::UNTERMINATED_QUOTES:
+		return "UNQUOTED VALUE";
+	default:
+		throw InternalException("CSV Error is not valid to be stored in a Rejects Table");
+	}
+}
+
 void CSVGlobalState::FillRejectsTable() {
 	auto &options = bind_data.options;
 
@@ -152,7 +182,7 @@ void CSVGlobalState::FillRejectsTable() {
 			auto &errors = file->error_handler->errors;
 			for (auto &error_vector : errors) {
 				for (auto &error : error_vector.second) {
-					if (error.type != CSVErrorType::CAST_ERROR) {
+					if (!IsCSVErrorAcceptedReject(error.type)) {
 						// For now, we only will use it for casting errors
 						continue;
 					}
@@ -177,8 +207,8 @@ void CSVGlobalState::FillRejectsTable() {
 						appender.Append(col_idx);
 						// 5. Column Name (If Applicable)
 						appender.Append(string_t("\"" + col_name + "\""));
-						// 6. Error Type (ENUM?)
-						appender.Append(string_t("CAST"));
+						// 6. Error Type
+						appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
 						// 7. Original CSV Line
 						appender.Append(string_t(error.csv_row));
 						// 8. Full Error Message
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index 3174ff2d7723..af7d45864812 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -1,5 +1,6 @@
 #include "duckdb/execution/operator/csv_scanner/csv_error.hpp"
 #include "duckdb/common/exception/conversion_exception.hpp"
+
 #include <sstream>
 
 namespace duckdb {
@@ -160,8 +161,12 @@ CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, i
 	      << std::endl;
 	// What were the options
 	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::INCORRECT_COLUMN_AMOUNT, actual_columns, csv_row, error_info,
-	                byte_position);
+	if (actual_columns > options.dialect_options.num_cols) {
+		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info,
+		                byte_position);
+	} else {
+		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info, byte_position);
+	}
 }
 
 bool CSVErrorHandler::PrintLineNumber(CSVError &error) {
@@ -171,7 +176,8 @@ bool CSVErrorHandler::PrintLineNumber(CSVError &error) {
 	switch (error.type) {
 	case CSVErrorType::CAST_ERROR:
 	case CSVErrorType::UNTERMINATED_QUOTES:
-	case CSVErrorType::INCORRECT_COLUMN_AMOUNT:
+	case CSVErrorType::TOO_FEW_COLUMNS:
+	case CSVErrorType::TOO_MANY_COLUMNS:
 	case CSVErrorType::MAXIMUM_LINE_SIZE:
 	case CSVErrorType::NULLPADDED_QUOTED_NEW_VALUE:
 		return true;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index 9331d62c34f5..95436017f2a1 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -39,11 +39,12 @@ class LinesPerBoundary {
 enum CSVErrorType : uint8_t {
 	CAST_ERROR = 0,                // If when casting a value from string to the column type fails
 	COLUMN_NAME_TYPE_MISMATCH = 1, // If there is a mismatch between Column Names and Types
-	INCORRECT_COLUMN_AMOUNT = 2,   // If the CSV is missing a column
-	UNTERMINATED_QUOTES = 3,       // If a quote is not terminated
-	SNIFFING = 4,          // If something went wrong during sniffing and was not possible to find suitable candidates
-	MAXIMUM_LINE_SIZE = 5, // Maximum line size was exceeded by a line in the CSV File
-	NULLPADDED_QUOTED_NEW_VALUE = 6, // If the null_padding option is set, and we have quoted new values in parallel
+	TOO_FEW_COLUMNS = 2,           // If the CSV has too few columns
+	TOO_MANY_COLUMNS = 3,          // If the CSV has too many  column
+	UNTERMINATED_QUOTES = 4,       // If a quote is not terminated
+	SNIFFING = 5,          // If something went wrong during sniffing and was not possible to find suitable candidates
+	MAXIMUM_LINE_SIZE = 6, // Maximum line size was exceeded by a line in the CSV File
+	NULLPADDED_QUOTED_NEW_VALUE = 7, // If the null_padding option is set, and we have quoted new values in parallel
 
 };
 
@@ -69,7 +70,7 @@ class CSVError {
 	//! Produces error messages for null_padding option is set and we have quoted new values in parallel
 	static CSVError NullPaddingFail(const CSVReaderOptions &options, LinesPerBoundary error_info);
 	//! Produces error for incorrect (e.g., smaller and lower than the predefined) number of columns in a CSV Line
-	static CSVError IncorrectColumnAmountError(const CSVReaderOptions &options, idx_t actual_columns,
+	static CSVError IncorrectColumnAmountError(const CSVReaderOptions &state_machine, idx_t actual_columns,
 	                                           LinesPerBoundary error_info, string &csv_row, idx_t byte_position);
 	idx_t GetBoundaryIndex() {
 		return error_info.boundary_idx;
diff --git a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
index e69de29bb2d1..7f899e860de0 100644
--- a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
@@ -0,0 +1,22 @@
+# name: test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
+# description: Test that incorrect column amounts return correct info as rejects tables
+# group: [rejects]
+
+require skip_reload
+
+
+statement ok
+SELECT * FROM read_csv(
+    'data/csv/rejects/incorrect_columns/few_columns.csv',
+    columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1);
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/incorrect_columns/few_columns.csv	1816	3	"d"	MISSING COLUMNS	1,2,3	14504
+data/csv/rejects/incorrect_columns/few_columns.csv	1825	1	"b"	MISSING COLUMNS	1	14574
+data/csv/rejects/incorrect_columns/few_columns.csv	2380	1	"b"	MISSING COLUMNS	1	19008
+data/csv/rejects/incorrect_columns/few_columns.csv	2764	2	"c"	MISSING COLUMNS	1,2	22074
\ No newline at end of file

From d9ffcb922d214dd366f046517e81027dfacfd8c3 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 6 Mar 2024 13:42:03 +0100
Subject: [PATCH 023/147] remainnig merge

---
 src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index 1427b1e840c4..82160a8490d8 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -74,7 +74,6 @@ class CSVError {
 	                                           LinesPerBoundary error_info, string &csv_row, idx_t byte_position);
 	static CSVError InvalidUTF8(const CSVReaderOptions &options, LinesPerBoundary error_info);
 
-
 	idx_t GetBoundaryIndex() {
 		return error_info.boundary_idx;
 	}

From d54bf5b7c590803ea5707881325b9fc205ae4694 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 6 Mar 2024 13:47:22 +0100
Subject: [PATCH 024/147] line error fix

---
 .../operator/csv_scanner/scanner/string_value_scanner.cpp | 4 ++--
 .../csv/rejects/csv_incorrect_columns_amount_rejects.test | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 6a7586ee5b4e..4d50b2ec7a8e 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -325,7 +325,7 @@ void StringValueResult::AddValue(StringValueResult &result, const idx_t buffer_p
 }
 
 void StringValueResult::HandleOverLimitRows() {
-	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read + 1);
+	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 	bool first_nl;
 	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 	auto csv_error =
@@ -465,7 +465,7 @@ bool StringValueResult::AddRowInternal() {
 			// If we are not null-padding this is an error
 			bool first_nl;
 			auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
-			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read + 1);
+			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 			auto csv_error = CSVError::IncorrectColumnAmountError(
 			    state_machine.options, cur_col_id, lines_per_batch, borked_line,
 			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
diff --git a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
index 7f899e860de0..b7d50fca8b7a 100644
--- a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
@@ -16,7 +16,7 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-data/csv/rejects/incorrect_columns/few_columns.csv	1816	3	"d"	MISSING COLUMNS	1,2,3	14504
-data/csv/rejects/incorrect_columns/few_columns.csv	1825	1	"b"	MISSING COLUMNS	1	14574
-data/csv/rejects/incorrect_columns/few_columns.csv	2380	1	"b"	MISSING COLUMNS	1	19008
-data/csv/rejects/incorrect_columns/few_columns.csv	2764	2	"c"	MISSING COLUMNS	1,2	22074
\ No newline at end of file
+data/csv/rejects/incorrect_columns/few_columns.csv	1814	3	"d"	MISSING COLUMNS	1,2,3	14504
+data/csv/rejects/incorrect_columns/few_columns.csv	1823	1	"b"	MISSING COLUMNS	1	14574
+data/csv/rejects/incorrect_columns/few_columns.csv	2378	1	"b"	MISSING COLUMNS	1	19008
+data/csv/rejects/incorrect_columns/few_columns.csv	2762	2	"c"	MISSING COLUMNS	1,2	22074
\ No newline at end of file

From 67703ebf6062b66969d8d8794f8622358c2f3cd0 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 6 Mar 2024 14:04:45 +0100
Subject: [PATCH 025/147] Get information on too many columns right

---
 .../scanner/string_value_scanner.cpp          | 16 ++++++++------
 .../table_function/global_csv_state.cpp       | 14 +++++++++---
 .../csv_incorrect_columns_amount_rejects.test | 22 +++++++++++++++++--
 3 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 4d50b2ec7a8e..caf511b4a7c4 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -111,6 +111,10 @@ inline bool IsValueNull(const char *null_str_ptr, const char *value_ptr, const i
 }
 
 void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size, bool allocate) {
+	if (ignore_current_row) {
+		cur_col_id++;
+		return;
+	}
 	if (cur_col_id >= number_of_columns) {
 		bool error = true;
 		if (cur_col_id == number_of_columns && ((quoted && state_machine.options.allow_quoted_nulls) || !quoted)) {
@@ -118,12 +122,11 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 			error = !IsValueNull(null_str_ptr, value_ptr, size);
 		}
 		if (error) {
-			HandleOverLimitRows();
+			ignore_current_row = true;
 		}
-	}
-	if (ignore_current_row) {
 		return;
 	}
+
 	if (projecting_columns) {
 		if (!projected_columns[cur_col_id]) {
 			cur_col_id++;
@@ -332,10 +335,6 @@ void StringValueResult::HandleOverLimitRows() {
 	    CSVError::IncorrectColumnAmountError(state_machine.options, cur_col_id + 1, lines_per_batch, borked_line,
 	                                         current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 	error_handler.Error(csv_error);
-	// If we get here we need to remove the last line
-	cur_col_id = 0;
-	chunk_col_id = 0;
-	ignore_current_row = true;
 }
 
 void StringValueResult::QuotedNewLine(StringValueResult &result) {
@@ -405,6 +404,9 @@ bool StringValueResult::AddRowInternal() {
 	current_line_position.begin = current_line_position.end;
 	current_line_position.end = current_line_start;
 	if (ignore_current_row) {
+		if (cur_col_id >= number_of_columns) {
+			HandleOverLimitRows();
+		}
 		cur_col_id = 0;
 		chunk_col_id = 0;
 		// An error occurred on this row, we are ignoring it and resetting our control flag
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 8e5bc26f411c..e412e21f8c4c 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -194,7 +194,11 @@ void CSVGlobalState::FillRejectsTable() {
 						rejects->count++;
 						auto row_line = file->error_handler->GetLine(error.error_info);
 						auto col_idx = error.column_idx;
-						auto col_name = bind_data.return_names[col_idx];
+						string col_name;
+						if (error.type != CSVErrorType::TOO_MANY_COLUMNS){
+							// Too many columns does not have a name, all other errors have
+							col_name = bind_data.return_names[col_idx];
+						}
 						// Add the row to the rejects table
 						appender.BeginRow();
 						// 1. File Path
@@ -203,10 +207,14 @@ void CSVGlobalState::FillRejectsTable() {
 						appender.Append(row_line);
 						// 3. Byte Position where error occurred
 						appender.Append(error.byte_position);
-						// 4. Column Index (If Applicable)
+						// 4. Column Index
 						appender.Append(col_idx);
 						// 5. Column Name (If Applicable)
-						appender.Append(string_t("\"" + col_name + "\""));
+						if (col_name.empty()){
+							appender.Append(Value());
+						} else {
+							appender.Append(string_t("\"" + col_name + "\""));
+						}
 						// 6. Error Type
 						appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
 						// 7. Original CSV Line
diff --git a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
index b7d50fca8b7a..f0ea6c9eda57 100644
--- a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
@@ -4,7 +4,6 @@
 
 require skip_reload
 
-
 statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/incorrect_columns/few_columns.csv',
@@ -19,4 +18,23 @@ FROM csv_rejects_table;
 data/csv/rejects/incorrect_columns/few_columns.csv	1814	3	"d"	MISSING COLUMNS	1,2,3	14504
 data/csv/rejects/incorrect_columns/few_columns.csv	1823	1	"b"	MISSING COLUMNS	1	14574
 data/csv/rejects/incorrect_columns/few_columns.csv	2378	1	"b"	MISSING COLUMNS	1	19008
-data/csv/rejects/incorrect_columns/few_columns.csv	2762	2	"c"	MISSING COLUMNS	1,2	22074
\ No newline at end of file
+data/csv/rejects/incorrect_columns/few_columns.csv	2762	2	"c"	MISSING COLUMNS	1,2	22074
+
+statement ok
+DROP TABLE csv_rejects_table;
+
+statement ok
+SELECT * FROM read_csv(
+    'data/csv/rejects/incorrect_columns/many_columns.csv',
+    columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1);
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/incorrect_columns/many_columns.csv	1096	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	8760
+data/csv/rejects/incorrect_columns/many_columns.csv	1159	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	9268
+data/csv/rejects/incorrect_columns/many_columns.csv	1206	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	9648
+data/csv/rejects/incorrect_columns/many_columns.csv	2769	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	22154
\ No newline at end of file

From 12542a2cdbeed1bfbd868802bcafa6979db521f1 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 6 Mar 2024 14:10:52 +0100
Subject: [PATCH 026/147] More tests for different incorrect column amounts

---
 .../rejects/incorrect_columns/small_mix.csv   |  5 ++
 .../csv_incorrect_columns_amount_rejects.test | 73 ++++++++++++++++++-
 2 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 data/csv/rejects/incorrect_columns/small_mix.csv

diff --git a/data/csv/rejects/incorrect_columns/small_mix.csv b/data/csv/rejects/incorrect_columns/small_mix.csv
new file mode 100644
index 000000000000..1cfae5653bcd
--- /dev/null
+++ b/data/csv/rejects/incorrect_columns/small_mix.csv
@@ -0,0 +1,5 @@
+a,b,c,d
+1,2,3,4
+1,2,3,4,5
+1,2,3
+1,2,3,4
\ No newline at end of file
diff --git a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
index f0ea6c9eda57..414e9fdd8594 100644
--- a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
@@ -37,4 +37,75 @@ FROM csv_rejects_table;
 data/csv/rejects/incorrect_columns/many_columns.csv	1096	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	8760
 data/csv/rejects/incorrect_columns/many_columns.csv	1159	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	9268
 data/csv/rejects/incorrect_columns/many_columns.csv	1206	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	9648
-data/csv/rejects/incorrect_columns/many_columns.csv	2769	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	22154
\ No newline at end of file
+data/csv/rejects/incorrect_columns/many_columns.csv	2769	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	22154
+
+statement ok
+DROP TABLE csv_rejects_table;
+
+statement ok
+SELECT * FROM read_csv(
+    'data/csv/rejects/incorrect_columns/mix_columns.csv',
+    columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1);
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/incorrect_columns/mix_columns.csv	1604	1	"b"	MISSING COLUMNS	1	12824
+data/csv/rejects/incorrect_columns/mix_columns.csv	1671	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	13354
+data/csv/rejects/incorrect_columns/mix_columns.csv	2751	2	"c"	MISSING COLUMNS	1,2	21998
+data/csv/rejects/incorrect_columns/mix_columns.csv	2768	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	22130
+
+# Different Buffer Sizes
+loop buffer_size 10 15
+
+statement ok
+DROP TABLE csv_rejects_table;
+
+statement ok
+SELECT * FROM read_csv(
+    'data/csv/rejects/incorrect_columns/small_mix.csv',
+    columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1);
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/incorrect_columns/small_mix.csv	3	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	16
+data/csv/rejects/incorrect_columns/small_mix.csv	4	3	"d"	MISSING COLUMNS	1,2,3	26
+
+endloop
+
+# All files
+statement ok
+DROP TABLE csv_rejects_table;
+
+statement ok
+SELECT * FROM read_csv(
+    'data/csv/rejects/incorrect_columns/*.csv',
+    columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1);
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/incorrect_columns/few_columns.csv	1814	3	"d"	MISSING COLUMNS	1,2,3	14504
+data/csv/rejects/incorrect_columns/few_columns.csv	1823	1	"b"	MISSING COLUMNS	1	14574
+data/csv/rejects/incorrect_columns/few_columns.csv	2378	1	"b"	MISSING COLUMNS	1	19008
+data/csv/rejects/incorrect_columns/few_columns.csv	2762	2	"c"	MISSING COLUMNS	1,2	22074
+data/csv/rejects/incorrect_columns/many_columns.csv	1096	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	8760
+data/csv/rejects/incorrect_columns/many_columns.csv	1159	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	9268
+data/csv/rejects/incorrect_columns/many_columns.csv	1206	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	9648
+data/csv/rejects/incorrect_columns/many_columns.csv	2769	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	22154
+data/csv/rejects/incorrect_columns/mix_columns.csv	1604	1	"b"	MISSING COLUMNS	1	12824
+data/csv/rejects/incorrect_columns/mix_columns.csv	1671	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	13354
+data/csv/rejects/incorrect_columns/mix_columns.csv	2751	2	"c"	MISSING COLUMNS	1,2	21998
+data/csv/rejects/incorrect_columns/mix_columns.csv	2768	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	22130
+data/csv/rejects/incorrect_columns/small_mix.csv	3	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	16
+data/csv/rejects/incorrect_columns/small_mix.csv	4	3	"d"	MISSING COLUMNS	1,2,3	26

From 8ea5f0a7d0255a3317a2e2152c29f0abaf42031b Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 6 Mar 2024 16:56:57 +0100
Subject: [PATCH 027/147] WIP on sanitizing invalid utfs and more on utf
 rejects tables

---
 .../scanner/string_value_scanner.cpp          | 45 ++++++++++++++-----
 .../table_function/global_csv_state.cpp       |  7 ++-
 .../operator/csv_scanner/util/csv_error.cpp   |  5 ++-
 .../operator/persistent/csv_rejects_table.cpp |  3 +-
 .../operator/csv_scanner/csv_error.hpp        |  3 +-
 .../csv_scanner/string_value_scanner.hpp      | 16 ++++++-
 .../csv/rejects/test_invalid_utf_rejects.test | 15 +++++++
 .../utf8proc/include/utf8proc_wrapper.hpp     |  2 +
 third_party/utf8proc/utf8proc_wrapper.cpp     | 33 ++++++++++++++
 9 files changed, 109 insertions(+), 20 deletions(-)
 create mode 100644 test/sql/copy/csv/rejects/test_invalid_utf_rejects.test

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index caf511b4a7c4..a87f43917e1b 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -111,7 +111,7 @@ inline bool IsValueNull(const char *null_str_ptr, const char *value_ptr, const i
 }
 
 void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size, bool allocate) {
-	if (ignore_current_row) {
+	if (current_error.is_set) {
 		cur_col_id++;
 		return;
 	}
@@ -122,7 +122,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 			error = !IsValueNull(null_str_ptr, value_ptr, size);
 		}
 		if (error) {
-			ignore_current_row = true;
+			current_error = {CSVErrorType::TOO_MANY_COLUMNS};
 		}
 		return;
 	}
@@ -221,11 +221,11 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 		if (parse_types[chunk_col_id].second && !Utf8Proc::IsValid(value_ptr, UnsafeNumericCast<uint32_t>(size))) {
 			bool force_error = !state_machine.options.ignore_errors && sniffing;
 			// Invalid unicode, we must error
-			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
-			auto csv_error = CSVError::InvalidUTF8(state_machine.options, lines_per_batch);
-			error_handler.Error(csv_error, force_error);
+			if (force_error) {
+				HandleUnicodeError(force_error);
+			}
 			// If we got here, we are ingoring errors, hence we must ignore this line.
-			ignore_current_row = true;
+			current_error = {CSVErrorType::INVALID_UNICODE};
 			break;
 		}
 		if (allocate) {
@@ -282,7 +282,7 @@ void StringValueResult::Reset() {
 	if (cur_buffer) {
 		buffer_handles[cur_buffer->buffer_idx] = cur_buffer;
 	}
-	ignore_current_row = false;
+	current_error.Reset();
 }
 
 void StringValueResult::AddQuotedValue(StringValueResult &result, const idx_t buffer_pos) {
@@ -337,6 +337,20 @@ void StringValueResult::HandleOverLimitRows() {
 	error_handler.Error(csv_error);
 }
 
+void StringValueResult::HandleUnicodeError(bool force_error) {
+	bool first_nl;
+	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
+	// sanitize borked line
+	std::vector<char> charArray(borked_line.begin(), borked_line.end());
+	charArray.push_back('\0'); // Null-terminate the character array
+	Utf8Proc::MakeValid(&charArray[0], charArray.size());
+	borked_line = {charArray.begin(), charArray.end() - 1};
+	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
+	auto csv_error = CSVError::InvalidUTF8(state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
+	                                       current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
+	error_handler.Error(csv_error, force_error);
+}
+
 void StringValueResult::QuotedNewLine(StringValueResult &result) {
 	result.quoted_new_line = true;
 }
@@ -403,14 +417,21 @@ bool StringValueResult::AddRowInternal() {
 	}
 	current_line_position.begin = current_line_position.end;
 	current_line_position.end = current_line_start;
-	if (ignore_current_row) {
-		if (cur_col_id >= number_of_columns) {
+	if (current_error.is_set) {
+		switch (current_error.type) {
+		case CSVErrorType::TOO_MANY_COLUMNS:
 			HandleOverLimitRows();
+			break;
+		case CSVErrorType::INVALID_UNICODE:
+			HandleUnicodeError();
+			break;
+		default:
+			InvalidInputException("CSV Error not allowed when inserting row");
 		}
 		cur_col_id = 0;
 		chunk_col_id = 0;
 		// An error occurred on this row, we are ignoring it and resetting our control flag
-		ignore_current_row = false;
+		current_error.Reset();
 		return false;
 	}
 	if (!cast_errors.empty()) {
@@ -766,14 +787,14 @@ void StringValueScanner::ProcessExtraRow() {
 			return;
 		case CSVState::RECORD_SEPARATOR:
 			if (states.states[0] == CSVState::RECORD_SEPARATOR) {
-				lines_read++;
 				result.EmptyLine(result, iterator.pos.buffer_pos);
 				iterator.pos.buffer_pos++;
+				lines_read++;
 				return;
 			} else if (states.states[0] != CSVState::CARRIAGE_RETURN) {
-				lines_read++;
 				result.AddRow(result, iterator.pos.buffer_pos);
 				iterator.pos.buffer_pos++;
+				lines_read++;
 				return;
 			}
 			lines_read++;
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index e412e21f8c4c..6380d2e24c6c 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -144,6 +144,7 @@ bool IsCSVErrorAcceptedReject(CSVErrorType type) {
 	case CSVErrorType::TOO_FEW_COLUMNS:
 	case CSVErrorType::MAXIMUM_LINE_SIZE:
 	case CSVErrorType::UNTERMINATED_QUOTES:
+	case CSVErrorType::INVALID_UNICODE:
 		return true;
 	default:
 		return false;
@@ -162,6 +163,8 @@ string CSVErrorTypeToEnum(CSVErrorType type) {
 		return "LINE SIZE OVER MAXIMUM";
 	case CSVErrorType::UNTERMINATED_QUOTES:
 		return "UNQUOTED VALUE";
+	case CSVErrorType::INVALID_UNICODE:
+		return "INVALID UNICODE";
 	default:
 		throw InternalException("CSV Error is not valid to be stored in a Rejects Table");
 	}
@@ -195,7 +198,7 @@ void CSVGlobalState::FillRejectsTable() {
 						auto row_line = file->error_handler->GetLine(error.error_info);
 						auto col_idx = error.column_idx;
 						string col_name;
-						if (error.type != CSVErrorType::TOO_MANY_COLUMNS){
+						if (error.type != CSVErrorType::TOO_MANY_COLUMNS) {
 							// Too many columns does not have a name, all other errors have
 							col_name = bind_data.return_names[col_idx];
 						}
@@ -210,7 +213,7 @@ void CSVGlobalState::FillRejectsTable() {
 						// 4. Column Index
 						appender.Append(col_idx);
 						// 5. Column Name (If Applicable)
-						if (col_name.empty()){
+						if (col_name.empty()) {
 							appender.Append(Value());
 						} else {
 							appender.Append(string_t("\"" + col_name + "\""));
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index 217bf0ac9fec..9c1eaffd394f 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -169,13 +169,14 @@ CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, i
 	}
 }
 
-CSVError CSVError::InvalidUTF8(const CSVReaderOptions &options, LinesPerBoundary error_info) {
+CSVError CSVError::InvalidUTF8(const CSVReaderOptions &options, idx_t current_column, LinesPerBoundary error_info,
+                               string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	// How many columns were expected and how many were found
 	error << "Invalid unicode (byte sequence mismatch) detected." << std::endl;
 	// What were the options
 	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, error_info);
+	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, current_column, csv_row, error_info, byte_position);
 }
 
 bool CSVErrorHandler::PrintLineNumber(CSVError &error) {
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 3f2acf553f21..4140b5d3eac5 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -31,7 +31,8 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 	order_errors.SetValue(2, "TOO MANY COLUMNS");
 	order_errors.SetValue(3, "UNQUOTED VALUE");
 	order_errors.SetValue(4, "LINE SIZE OVER MAXIMUM");
-	LogicalType enum_type = LogicalType::ENUM(enum_name, order_errors, 5);
+	order_errors.SetValue(5, "INVALID UNICODE");
+	LogicalType enum_type = LogicalType::ENUM(enum_name, order_errors, 6);
 	auto type_info = make_uniq<CreateTypeInfo>(enum_name, enum_type);
 	type_info->temporary = true;
 	type_info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index 82160a8490d8..3ced0619aa67 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -72,7 +72,8 @@ class CSVError {
 	//! Produces error for incorrect (e.g., smaller and lower than the predefined) number of columns in a CSV Line
 	static CSVError IncorrectColumnAmountError(const CSVReaderOptions &state_machine, idx_t actual_columns,
 	                                           LinesPerBoundary error_info, string &csv_row, idx_t byte_position);
-	static CSVError InvalidUTF8(const CSVReaderOptions &options, LinesPerBoundary error_info);
+	static CSVError InvalidUTF8(const CSVReaderOptions &options, idx_t current_column, LinesPerBoundary error_info,
+	                            string &csv_row, idx_t byte_position);
 
 	idx_t GetBoundaryIndex() {
 		return error_info.boundary_idx;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index d88fc76e7390..18d1776b7a71 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -62,6 +62,17 @@ class FullLinePosition {
 	                              unordered_map<idx_t, shared_ptr<CSVBufferHandle>> &buffer_handles);
 };
 
+class CurrentError {
+public:
+	CurrentError() : is_set(false) {};
+	CurrentError(CSVErrorType type) : is_set(true), type(type) {};
+	void Reset() {
+		is_set = false;
+	}
+	bool is_set;
+	CSVErrorType type;
+};
+
 class StringValueResult : public ScannerResult {
 public:
 	StringValueResult(CSVStates &states, CSVStateMachine &state_machine,
@@ -120,8 +131,8 @@ class StringValueResult : public ScannerResult {
 	//! Requested size of buffers (i.e., either 32Mb or set by buffer_size parameter)
 	idx_t requested_size;
 
-	//! If the current row has an error, we have to skip it
-	bool ignore_current_row = false;
+	//! Current Error if any
+	CurrentError current_error;
 
 	bool sniffing;
 	//! Specialized code for quoted values, makes sure to remove quotes and escapes
@@ -140,6 +151,7 @@ class StringValueResult : public ScannerResult {
 	inline bool AddRowInternal();
 
 	void HandleOverLimitRows();
+	void HandleUnicodeError(bool force_error = false);
 
 	inline void AddValueToVector(const char *value_ptr, const idx_t size, bool allocate = false);
 
diff --git a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
new file mode 100644
index 000000000000..bcf1b6f64ecb
--- /dev/null
+++ b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
@@ -0,0 +1,15 @@
+# name: test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
+# description: Test that invalid unicodes return correct info as rejects tables
+# group: [rejects]
+
+require skip_reload
+
+statement ok
+from read_csv('test/sql/copy/csv/data/test/invalid_utf_big.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'},
+ auto_detect=false, rejects_table='csv_rejects_table', header = 0, delim = ',', ignore_errors=true)
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+
diff --git a/third_party/utf8proc/include/utf8proc_wrapper.hpp b/third_party/utf8proc/include/utf8proc_wrapper.hpp
index fb988b254b76..a7fb8c662b24 100644
--- a/third_party/utf8proc/include/utf8proc_wrapper.hpp
+++ b/third_party/utf8proc/include/utf8proc_wrapper.hpp
@@ -26,6 +26,8 @@ class Utf8Proc {
 	static char* Normalize(const char* s, size_t len);
 	//! Returns whether or not the UTF8 string is valid
 	static bool IsValid(const char *s, size_t len);
+	//! Makes Invalid Unicode valid by replacing invalid parts with a given character
+	static void MakeValid(char *s, size_t len, char special_flag = '?');
 	//! Returns the position (in bytes) of the next grapheme cluster
 	static size_t NextGraphemeCluster(const char *s, size_t len, size_t pos);
 	//! Returns the position (in bytes) of the previous grapheme cluster
diff --git a/third_party/utf8proc/utf8proc_wrapper.cpp b/third_party/utf8proc/utf8proc_wrapper.cpp
index c47472a39eb6..02f6c0efc5de 100644
--- a/third_party/utf8proc/utf8proc_wrapper.cpp
+++ b/third_party/utf8proc/utf8proc_wrapper.cpp
@@ -102,6 +102,39 @@ UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *i
 	return type;
 }
 
+void Utf8Proc::MakeValid(char *s, size_t len, char special_flag){
+	UnicodeType type = UnicodeType::ASCII;
+	for (size_t i = 0; i < len; i++) {
+		int c = (int) s[i];
+		if ((c & 0x80) == 0) {
+			continue;
+		}
+		int first_pos_seq = i;
+		if ((c & 0xE0) == 0xC0) {
+			/* 2 byte sequence */
+			int utf8char = c & 0x1F;
+			type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, nullptr, nullptr);
+		} else if ((c & 0xF0) == 0xE0) {
+			/* 3 byte sequence */
+			int utf8char = c & 0x0F;
+			type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, nullptr, nullptr);
+		} else if ((c & 0xF8) == 0xF0) {
+			/* 4 byte sequence */
+			int utf8char = c & 0x07;
+			type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, nullptr, nullptr);
+		} else {
+			/* invalid UTF-8 start byte */
+			s[i] = special_flag; // Rewrite invalid byte
+		}
+		if (type == UnicodeType::INVALID) {
+			for (size_t j = first_pos_seq; j <= i; j++) {
+                s[j] = special_flag; // Rewrite each byte of the invalid sequence
+            }
+			type = UnicodeType::ASCII;
+		}
+	}
+}
+
 char* Utf8Proc::Normalize(const char *s, size_t len) {
 	assert(s);
 	assert(Utf8Proc::Analyze(s, len) != UnicodeType::INVALID);

From 4ad73e518e0f945bf2760712d6cc91a009024e58 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 6 Mar 2024 18:41:45 +0100
Subject: [PATCH 028/147] invalid utf working

---
 src/execution/operator/persistent/csv_rejects_table.cpp | 2 +-
 test/sql/copy/csv/rejects/test_invalid_utf_rejects.test | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 4140b5d3eac5..a2f80d855d15 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -25,7 +25,7 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 
 	// Create CSV_ERROR_TYPE ENUM
 	string enum_name = "CSV_ERROR_TYPE";
-	Vector order_errors(LogicalType::VARCHAR, 5);
+	Vector order_errors(LogicalType::VARCHAR, 6);
 	order_errors.SetValue(0, "CAST");
 	order_errors.SetValue(1, "MISSING COLUMNS");
 	order_errors.SetValue(2, "TOO MANY COLUMNS");
diff --git a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
index bcf1b6f64ecb..b2082773ae5e 100644
--- a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
+++ b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
@@ -12,4 +12,7 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-
+test/sql/copy/csv/data/test/invalid_utf_big.csv	3001	2	"col3"	INVALID UNICODE	valid,invalid_??_part,valid	54000
+test/sql/copy/csv/data/test/invalid_utf_big.csv	3012	2	"col3"	INVALID UNICODE	valid,valid,invalid_??_part	54208
+test/sql/copy/csv/data/test/invalid_utf_big.csv	3023	2	"col3"	INVALID UNICODE	valid,invalid_??_part,valid	54416
+test/sql/copy/csv/data/test/invalid_utf_big.csv	3034	2	"col3"	INVALID UNICODE	valid,valid,invalid_??_part	54624

From cd1c2d70b28a4f94f9d77cbc107f6b510fcb1aee Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 7 Mar 2024 11:51:24 +0100
Subject: [PATCH 029/147] tidy happy and skip tests on windows due do
 byte_position mismatch

---
 .../csv_scanner/scanner/string_value_scanner.cpp       | 10 +++++-----
 test/sql/copy/csv/rejects/csv_buffer_size_rejects.test |  3 +++
 .../rejects/csv_incorrect_columns_amount_rejects.test  |  3 +++
 test/sql/copy/csv/rejects/csv_rejects_auto.test        |  3 +++
 test/sql/copy/csv/rejects/csv_rejects_flush_cast.test  |  5 +++++
 test/sql/copy/csv/rejects/csv_rejects_read.test        |  3 +++
 test/sql/copy/csv/rejects/test_invalid_parameters.test |  3 +++
 .../sql/copy/csv/rejects/test_invalid_utf_rejects.test |  3 +++
 8 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index a87f43917e1b..6e9ecdd1d1c3 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -341,10 +341,10 @@ void StringValueResult::HandleUnicodeError(bool force_error) {
 	bool first_nl;
 	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 	// sanitize borked line
-	std::vector<char> charArray(borked_line.begin(), borked_line.end());
-	charArray.push_back('\0'); // Null-terminate the character array
-	Utf8Proc::MakeValid(&charArray[0], charArray.size());
-	borked_line = {charArray.begin(), charArray.end() - 1};
+	std::vector<char> char_array(borked_line.begin(), borked_line.end());
+	char_array.push_back('\0'); // Null-terminate the character array
+	Utf8Proc::MakeValid(&char_array[0], char_array.size());
+	borked_line = {char_array.begin(), char_array.end() - 1};
 	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 	auto csv_error = CSVError::InvalidUTF8(state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
 	                                       current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
@@ -426,7 +426,7 @@ bool StringValueResult::AddRowInternal() {
 			HandleUnicodeError();
 			break;
 		default:
-			InvalidInputException("CSV Error not allowed when inserting row");
+			throw InvalidInputException("CSV Error not allowed when inserting row");
 		}
 		cur_col_id = 0;
 		chunk_col_id = 0;
diff --git a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
index dcef91e814ee..a8fd11728781 100644
--- a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
@@ -4,6 +4,9 @@
 
 require skip_reload
 
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
 loop buffer_size 5 10
 
 # Ensure that we can get the schema if we reduce the sample size and ignore errors
diff --git a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
index 414e9fdd8594..070b413a8497 100644
--- a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
@@ -4,6 +4,9 @@
 
 require skip_reload
 
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
 statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/incorrect_columns/few_columns.csv',
diff --git a/test/sql/copy/csv/rejects/csv_rejects_auto.test b/test/sql/copy/csv/rejects/csv_rejects_auto.test
index 841ed42465f3..febda7d1c0fc 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_auto.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_auto.test
@@ -3,6 +3,9 @@
 
 require skip_reload
 
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
 # Ensure that we can get the schema if we reduce the sample size and ignore errors
 query IIIII
 SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
diff --git a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
index 6b2f5e59d7f5..20ff320b36a5 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
@@ -2,6 +2,11 @@
 # description: Test that Flush Cast functions properly for the rejects tables
 # group: [rejects]
 
+require skip_reload
+
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
 query III
 SELECT typeof(first(a)), typeof(first(b)), COUNT(*) FROM read_csv(
     'data/csv/error/flush_cast.csv',
diff --git a/test/sql/copy/csv/rejects/csv_rejects_read.test b/test/sql/copy/csv/rejects/csv_rejects_read.test
index a0b2e751289d..8cf3d5ac883f 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_read.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_read.test
@@ -3,6 +3,9 @@
 
 require skip_reload
 
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
 query III rowsort
 SELECT * FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/bad.csv',
diff --git a/test/sql/copy/csv/rejects/test_invalid_parameters.test b/test/sql/copy/csv/rejects/test_invalid_parameters.test
index 2e343a30765d..9325f3780f24 100644
--- a/test/sql/copy/csv/rejects/test_invalid_parameters.test
+++ b/test/sql/copy/csv/rejects/test_invalid_parameters.test
@@ -3,6 +3,9 @@
 
 require skip_reload
 
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
 # Test invalid arguments
 statement error
 SELECT * FROM read_csv(
diff --git a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
index b2082773ae5e..52ff0ac19823 100644
--- a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
+++ b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
@@ -4,6 +4,9 @@
 
 require skip_reload
 
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
 statement ok
 from read_csv('test/sql/copy/csv/data/test/invalid_utf_big.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'},
  auto_detect=false, rejects_table='csv_rejects_table', header = 0, delim = ',', ignore_errors=true)

From 2bd5af56ffb1dc9d3dd7ef64259f0bb23969d795 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 7 Mar 2024 12:12:00 +0100
Subject: [PATCH 030/147] Adding tests and fixes for rejects over maximum line
 size

---
 data/csv/rejects/maximum_line/max_10.csv      |    8 +
 data/csv/rejects/maximum_line/over_vector.csv | 2926 +++++++++++++++++
 .../scanner/string_value_scanner.cpp          |    6 +-
 .../csv/rejects/csv_rejects_maximum_line.test |   86 +
 4 files changed, 3023 insertions(+), 3 deletions(-)
 create mode 100644 data/csv/rejects/maximum_line/max_10.csv
 create mode 100644 data/csv/rejects/maximum_line/over_vector.csv
 create mode 100644 test/sql/copy/csv/rejects/csv_rejects_maximum_line.test

diff --git a/data/csv/rejects/maximum_line/max_10.csv b/data/csv/rejects/maximum_line/max_10.csv
new file mode 100644
index 000000000000..02a3683c09e0
--- /dev/null
+++ b/data/csv/rejects/maximum_line/max_10.csv
@@ -0,0 +1,8 @@
+a,b
+bla,1
+bla,2
+bla,3
+blaaaaaaaaaaaaaa,4
+bla,1
+bla,2
+bla,3
\ No newline at end of file
diff --git a/data/csv/rejects/maximum_line/over_vector.csv b/data/csv/rejects/maximum_line/over_vector.csv
new file mode 100644
index 000000000000..c8fe70a412a4
--- /dev/null
+++ b/data/csv/rejects/maximum_line/over_vector.csv
@@ -0,0 +1,2926 @@
+a,b
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+blaaaaaaaaaaaaaaaaaaaa,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+blaaaaaaaaaaaaaaaaaaaa,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+bla,3
+bla,1
+bla,2
+blaaaaaaaaaaaaaaaaaaaa,3
+bla,1
+bla,2
+bla,3
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 6e9ecdd1d1c3..c9207c2298ee 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -406,17 +406,17 @@ bool StringValueResult::AddRowInternal() {
 	if (store_line_size) {
 		error_handler.NewMaxLineSize(current_line_size);
 	}
+	current_line_position.begin = current_line_position.end;
+	current_line_position.end = current_line_start;
 	if (current_line_size > state_machine.options.maximum_line_size) {
 		bool first_nl;
 		auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
-		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), number_of_rows);
+		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 		auto csv_error =
 		    CSVError::LineSizeError(state_machine.options, current_line_size, lines_per_batch, borked_line,
 		                            current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 		error_handler.Error(csv_error);
 	}
-	current_line_position.begin = current_line_position.end;
-	current_line_position.end = current_line_start;
 	if (current_error.is_set) {
 		switch (current_error.type) {
 		case CSVErrorType::TOO_MANY_COLUMNS:
diff --git a/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
new file mode 100644
index 000000000000..f7bb5447485c
--- /dev/null
+++ b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
@@ -0,0 +1,86 @@
+# name: test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
+# description: Tests rejects tables on max line size parameter
+# group: [rejects]
+
+require skip_reload
+
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
+statement ok
+SELECT * FROM read_csv(
+    'data/csv/rejects/maximum_line/max_10.csv',
+    columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1, max_line_size=10);
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/maximum_line/max_10.csv	5	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
+
+statement ok
+DROP TABLE csv_rejects_table;
+
+# Test with buffer sizes
+
+loop buffer_size 22 27
+
+statement ok
+SELECT * FROM read_csv(
+    'data/csv/rejects/maximum_line/max_10.csv',
+    columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1, max_line_size=10, buffer_size=${buffer_size});
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/maximum_line/max_10.csv	5	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
+
+statement ok
+DROP TABLE csv_rejects_table;
+
+endloop
+
+# Test over vector size file
+statement ok
+SELECT * FROM read_csv(
+    'data/csv/rejects/maximum_line/over_vector.csv',
+    columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1, max_line_size=20);
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/maximum_line/over_vector.csv	2282	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	13684
+data/csv/rejects/maximum_line/over_vector.csv	2591	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	15557
+data/csv/rejects/maximum_line/over_vector.csv	2923	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	17568
+
+statement ok
+DROP TABLE csv_rejects_table;
+
+# Read Multiple Files
+
+statement ok
+SELECT * FROM read_csv(
+    'data/csv/rejects/maximum_line/*.csv',
+    columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1, max_line_size=10);
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/maximum_line/max_10.csv	5	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
+data/csv/rejects/maximum_line/over_vector.csv	2282	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	13684
+data/csv/rejects/maximum_line/over_vector.csv	2591	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	15557
+data/csv/rejects/maximum_line/over_vector.csv	2923	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	17568
+
+statement ok
+DROP TABLE csv_rejects_table;
\ No newline at end of file

From 73e8c36c6b537aa3df0bed2b2980fd2b2e6ef75b Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 7 Mar 2024 15:29:07 +0100
Subject: [PATCH 031/147] Properly handling unquoted values w ignore_errors

---
 data/csv/rejects/unquoted/basic.csv           |  8 ++
 .../scanner/string_value_scanner.cpp          | 73 ++++++++++++-------
 .../state_machine/csv_state_machine_cache.cpp | 20 ++---
 .../operator/csv_scanner/csv_error.hpp        | 18 ++---
 .../csv_scanner/string_value_scanner.hpp      |  5 ++
 .../csv/rejects/csv_unquoted_rejects.test     | 31 ++++++++
 6 files changed, 110 insertions(+), 45 deletions(-)
 create mode 100644 data/csv/rejects/unquoted/basic.csv
 create mode 100644 test/sql/copy/csv/rejects/csv_unquoted_rejects.test

diff --git a/data/csv/rejects/unquoted/basic.csv b/data/csv/rejects/unquoted/basic.csv
new file mode 100644
index 000000000000..8f7dc567086a
--- /dev/null
+++ b/data/csv/rejects/unquoted/basic.csv
@@ -0,0 +1,8 @@
+a,b
+"bla",1
+"bla",2
+"bla",3
+"blaaaaaaaaaaaaaa"bla,4
+"bla",1
+"bla",2
+"bla",3
\ No newline at end of file
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index c9207c2298ee..180563271a74 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -351,6 +351,40 @@ void StringValueResult::HandleUnicodeError(bool force_error) {
 	error_handler.Error(csv_error, force_error);
 }
 
+void StringValueResult::HandleUnterminatedQuotes(bool force_error) {
+	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
+	bool first_nl;
+	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
+	auto csv_error =
+	    CSVError::UnterminatedQuotesError(state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
+	                                      current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
+	error_handler.Error(csv_error, force_error);
+}
+
+bool StringValueResult::HandleError() {
+	if (current_error.is_set) {
+		switch (current_error.type) {
+		case CSVErrorType::TOO_MANY_COLUMNS:
+			HandleOverLimitRows();
+			break;
+		case CSVErrorType::INVALID_UNICODE:
+			HandleUnicodeError();
+			break;
+		case CSVErrorType::UNTERMINATED_QUOTES:
+			HandleUnterminatedQuotes();
+			break;
+		default:
+			throw InvalidInputException("CSV Error not allowed when inserting row");
+		}
+		cur_col_id = 0;
+		chunk_col_id = 0;
+		// An error occurred on this row, we are ignoring it and resetting our control flag
+		current_error.Reset();
+		return true;
+	}
+	return false;
+}
+
 void StringValueResult::QuotedNewLine(StringValueResult &result) {
 	result.quoted_new_line = true;
 }
@@ -360,7 +394,7 @@ void StringValueResult::NullPaddingQuotedNewlineCheck() {
 	if (state_machine.options.null_padding && iterator.IsBoundarySet() && quoted_new_line && iterator.done) {
 		// If we have null_padding set, we found a quoted new line, we are scanning the file in parallel and it's the
 		// last row of this thread.
-		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), number_of_rows + 1);
+		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 		auto csv_error = CSVError::NullPaddingFail(state_machine.options, lines_per_batch);
 		error_handler.Error(csv_error);
 	}
@@ -417,21 +451,7 @@ bool StringValueResult::AddRowInternal() {
 		                            current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 		error_handler.Error(csv_error);
 	}
-	if (current_error.is_set) {
-		switch (current_error.type) {
-		case CSVErrorType::TOO_MANY_COLUMNS:
-			HandleOverLimitRows();
-			break;
-		case CSVErrorType::INVALID_UNICODE:
-			HandleUnicodeError();
-			break;
-		default:
-			throw InvalidInputException("CSV Error not allowed when inserting row");
-		}
-		cur_col_id = 0;
-		chunk_col_id = 0;
-		// An error occurred on this row, we are ignoring it and resetting our control flag
-		current_error.Reset();
+	if (HandleError()) {
 		return false;
 	}
 	if (!cast_errors.empty()) {
@@ -533,15 +553,12 @@ bool StringValueResult::AddRow(StringValueResult &result, const idx_t buffer_pos
 }
 
 void StringValueResult::InvalidState(StringValueResult &result) {
-	// FIXME: How do we recover from an invalid state? Can we restart the state machine and jump to the next row?
-	LinesPerBoundary lines_per_batch(result.iterator.GetBoundaryIdx(), result.number_of_rows);
-	bool first_nl;
-	auto borked_line = result.current_line_position.ReconstructCurrentLine(first_nl, result.buffer_handles);
-
-	auto csv_error = CSVError::UnterminatedQuotesError(
-	    result.state_machine.options, result.cur_col_id, lines_per_batch, borked_line,
-	    result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl));
-	result.error_handler.Error(csv_error);
+	bool force_error = !result.state_machine.options.ignore_errors && result.sniffing;
+	// Invalid unicode, we must error
+	if (force_error) {
+		result.HandleUnicodeError(force_error);
+	}
+	result.current_error = {CSVErrorType::UNTERMINATED_QUOTES};
 }
 
 bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_pos) {
@@ -1205,7 +1222,9 @@ void StringValueScanner::FinalizeChunkProcess() {
 	// If we are not done we have two options.
 	// 1) If a boundary is set.
 	if (iterator.IsBoundarySet()) {
-		iterator.done = true;
+		if (!(result.current_error == CSVErrorType::UNTERMINATED_QUOTES)) {
+			iterator.done = true;
+		}
 		// We read until the next line or until we have nothing else to read.
 		// Move to next buffer
 		if (!cur_buffer_handle) {
@@ -1221,6 +1240,8 @@ void StringValueScanner::FinalizeChunkProcess() {
 			if (cur_buffer_handle->is_last_buffer && iterator.pos.buffer_pos >= cur_buffer_handle->actual_size) {
 				MoveToNextBuffer();
 			}
+		} else {
+			result.HandleError();
 		}
 	} else {
 		// 2) If a boundary is not set
diff --git a/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp b/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
index 1f9768826edd..fbe0752311df 100644
--- a/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
+++ b/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
@@ -21,7 +21,6 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
 			InitializeTransitionArray(transition_array, cur_state, CSVState::QUOTED);
 			break;
 		case CSVState::UNQUOTED:
-		case CSVState::INVALID:
 		case CSVState::ESCAPE:
 			InitializeTransitionArray(transition_array, cur_state, CSVState::INVALID);
 			break;
@@ -38,15 +37,16 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
 	auto new_line_id = state_machine_options.new_line.GetValue();
 
 	// Now set values depending on configuration
-	// 1) Standard State
-	transition_array[delimiter][static_cast<uint8_t>(static_cast<uint8_t>(CSVState::STANDARD))] = CSVState::DELIMITER;
-	transition_array[static_cast<uint8_t>('\n')][static_cast<uint8_t>(CSVState::STANDARD)] = CSVState::RECORD_SEPARATOR;
-	if (new_line_id == NewLineIdentifier::CARRY_ON) {
-		transition_array[static_cast<uint8_t>('\r')][static_cast<uint8_t>(CSVState::STANDARD)] =
-		    CSVState::CARRIAGE_RETURN;
-	} else {
-		transition_array[static_cast<uint8_t>('\r')][static_cast<uint8_t>(CSVState::STANDARD)] =
-		    CSVState::RECORD_SEPARATOR;
+	// 1) Standard/Invalid State
+	vector<uint8_t> std_inv {static_cast<uint8_t>(CSVState::STANDARD), static_cast<uint8_t>(CSVState::INVALID)};
+	for (auto &state : std_inv) {
+		transition_array[delimiter][state] = CSVState::DELIMITER;
+		transition_array[static_cast<uint8_t>('\n')][state] = CSVState::RECORD_SEPARATOR;
+		if (new_line_id == NewLineIdentifier::CARRY_ON) {
+			transition_array[static_cast<uint8_t>('\r')][state] = CSVState::CARRIAGE_RETURN;
+		} else {
+			transition_array[static_cast<uint8_t>('\r')][state] = CSVState::RECORD_SEPARATOR;
+		}
 	}
 	// 2) Field Separator State
 	transition_array[delimiter][static_cast<uint8_t>(CSVState::DELIMITER)] = CSVState::DELIMITER;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index 3ced0619aa67..befc3a669219 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -37,15 +37,15 @@ class LinesPerBoundary {
 };
 
 enum CSVErrorType : uint8_t {
-	CAST_ERROR = 0,                // If when casting a value from string to the column type fails
-	COLUMN_NAME_TYPE_MISMATCH = 1, // If there is a mismatch between Column Names and Types
-	TOO_FEW_COLUMNS = 2,           // If the CSV has too few columns
-	TOO_MANY_COLUMNS = 3,          // If the CSV has too many  column
-	UNTERMINATED_QUOTES = 4,       // If a quote is not terminated
-	SNIFFING = 5,          // If something went wrong during sniffing and was not possible to find suitable candidates
-	MAXIMUM_LINE_SIZE = 6, // Maximum line size was exceeded by a line in the CSV File
-	NULLPADDED_QUOTED_NEW_VALUE = 7, // If the null_padding option is set, and we have quoted new values in parallel
-	INVALID_UNICODE = 8
+	CAST_ERROR = 0,                //! If when casting a value from string to the column type fails
+	COLUMN_NAME_TYPE_MISMATCH = 1, //! If there is a mismatch between Column Names and Types
+	TOO_FEW_COLUMNS = 2,           //! If the CSV has too few columns
+	TOO_MANY_COLUMNS = 3,          //! If the CSV has too many  column
+	UNTERMINATED_QUOTES = 4,       //! If a quote is not terminated
+	SNIFFING = 5,          //! If something went wrong during sniffing and was not possible to find suitable candidates
+	MAXIMUM_LINE_SIZE = 6, //! Maximum line size was exceeded by a line in the CSV File
+	NULLPADDED_QUOTED_NEW_VALUE = 7, //! If the null_padding option is set, and we have quoted new values in parallel
+	INVALID_UNICODE = 8              //! If we have invalid unicode values
 };
 
 class CSVError {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 18d1776b7a71..3869f0b94d5d 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -71,6 +71,9 @@ class CurrentError {
 	}
 	bool is_set;
 	CSVErrorType type;
+	friend bool operator==(const CurrentError &error, CSVErrorType other) {
+		return error.is_set && error.type == other;
+	}
 };
 
 class StringValueResult : public ScannerResult {
@@ -152,6 +155,8 @@ class StringValueResult : public ScannerResult {
 
 	void HandleOverLimitRows();
 	void HandleUnicodeError(bool force_error = false);
+	void HandleUnterminatedQuotes(bool force_error = false);
+	bool HandleError();
 
 	inline void AddValueToVector(const char *value_ptr, const idx_t size, bool allocate = false);
 
diff --git a/test/sql/copy/csv/rejects/csv_unquoted_rejects.test b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
new file mode 100644
index 000000000000..a50aef9c60da
--- /dev/null
+++ b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
@@ -0,0 +1,31 @@
+# name: test/sql/copy/csv/rejects/csv_unquoted_rejects.test
+# description: Tests rejects tables on max line size parameter
+# group: [rejects]
+
+require skip_reload
+
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
+query II
+SELECT * FROM read_csv(
+    'data/csv/rejects/unquoted/basic.csv',
+    columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1, quote = '"', escape = '"');
+----
+bla	1
+bla	2
+bla	3
+bla	1
+bla	2
+bla	3
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/unquoted/basic.csv	5	1	"b"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
+
+statement ok
+DROP TABLE csv_rejects_table;

From bc26aa060be9a85c56e3a3ef34b6353214020dac Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 7 Mar 2024 18:40:08 +0100
Subject: [PATCH 032/147] More work on rejects + unquoted

---
 .../rejects/unquoted/unquoted_last_value.csv  |  5 ++
 .../rejects/unquoted/unquoted_new_line.csv    |  9 +++
 .../scanner/string_value_scanner.cpp          | 28 ++++----
 .../operator/csv_scanner/base_scanner.hpp     |  2 +-
 .../csv_scanner/string_value_scanner.hpp      |  9 +--
 .../csv/rejects/csv_unquoted_rejects.test     | 68 ++++++++++++++++++-
 6 files changed, 101 insertions(+), 20 deletions(-)
 create mode 100644 data/csv/rejects/unquoted/unquoted_last_value.csv
 create mode 100644 data/csv/rejects/unquoted/unquoted_new_line.csv

diff --git a/data/csv/rejects/unquoted/unquoted_last_value.csv b/data/csv/rejects/unquoted/unquoted_last_value.csv
new file mode 100644
index 000000000000..0d714083e9c8
--- /dev/null
+++ b/data/csv/rejects/unquoted/unquoted_last_value.csv
@@ -0,0 +1,5 @@
+"blaaaaaaaaaaaaaa"
+"bla"
+"bla"
+"bla"
+"bla
diff --git a/data/csv/rejects/unquoted/unquoted_new_line.csv b/data/csv/rejects/unquoted/unquoted_new_line.csv
new file mode 100644
index 000000000000..e42978c5565e
--- /dev/null
+++ b/data/csv/rejects/unquoted/unquoted_new_line.csv
@@ -0,0 +1,9 @@
+a,b
+"bla",1
+"bla",2
+"bla",3
+"blaaaaaaaaaaaaaa
+"bla,4
+"bla",1
+"bla",2
+"bla",3
\ No newline at end of file
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 180563271a74..b2fc8baa8aa4 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -122,7 +122,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 			error = !IsValueNull(null_str_ptr, value_ptr, size);
 		}
 		if (error) {
-			current_error = {CSVErrorType::TOO_MANY_COLUMNS};
+			current_error = {CSVErrorType::TOO_MANY_COLUMNS, cur_col_id};
 		}
 		return;
 	}
@@ -222,10 +222,10 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 			bool force_error = !state_machine.options.ignore_errors && sniffing;
 			// Invalid unicode, we must error
 			if (force_error) {
-				HandleUnicodeError(force_error);
+				HandleUnicodeError(cur_col_id, force_error);
 			}
 			// If we got here, we are ingoring errors, hence we must ignore this line.
-			current_error = {CSVErrorType::INVALID_UNICODE};
+			current_error = {CSVErrorType::INVALID_UNICODE, cur_col_id};
 			break;
 		}
 		if (allocate) {
@@ -327,17 +327,17 @@ void StringValueResult::AddValue(StringValueResult &result, const idx_t buffer_p
 	result.last_position = buffer_pos + 1;
 }
 
-void StringValueResult::HandleOverLimitRows() {
+void StringValueResult::HandleOverLimitRows(idx_t col_idx) {
 	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 	bool first_nl;
 	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 	auto csv_error =
-	    CSVError::IncorrectColumnAmountError(state_machine.options, cur_col_id + 1, lines_per_batch, borked_line,
+	    CSVError::IncorrectColumnAmountError(state_machine.options, col_idx, lines_per_batch, borked_line,
 	                                         current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 	error_handler.Error(csv_error);
 }
 
-void StringValueResult::HandleUnicodeError(bool force_error) {
+void StringValueResult::HandleUnicodeError(idx_t col_idx, bool force_error) {
 	bool first_nl;
 	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 	// sanitize borked line
@@ -346,17 +346,17 @@ void StringValueResult::HandleUnicodeError(bool force_error) {
 	Utf8Proc::MakeValid(&char_array[0], char_array.size());
 	borked_line = {char_array.begin(), char_array.end() - 1};
 	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
-	auto csv_error = CSVError::InvalidUTF8(state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
+	auto csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
 	                                       current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 	error_handler.Error(csv_error, force_error);
 }
 
-void StringValueResult::HandleUnterminatedQuotes(bool force_error) {
+void StringValueResult::HandleUnterminatedQuotes(idx_t col_idx, bool force_error) {
 	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 	bool first_nl;
 	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 	auto csv_error =
-	    CSVError::UnterminatedQuotesError(state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
+	    CSVError::UnterminatedQuotesError(state_machine.options, col_idx, lines_per_batch, borked_line,
 	                                      current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 	error_handler.Error(csv_error, force_error);
 }
@@ -365,13 +365,13 @@ bool StringValueResult::HandleError() {
 	if (current_error.is_set) {
 		switch (current_error.type) {
 		case CSVErrorType::TOO_MANY_COLUMNS:
-			HandleOverLimitRows();
+			HandleOverLimitRows(current_error.col_idx);
 			break;
 		case CSVErrorType::INVALID_UNICODE:
-			HandleUnicodeError();
+			HandleUnicodeError(current_error.col_idx);
 			break;
 		case CSVErrorType::UNTERMINATED_QUOTES:
-			HandleUnterminatedQuotes();
+			HandleUnterminatedQuotes(current_error.col_idx);
 			break;
 		default:
 			throw InvalidInputException("CSV Error not allowed when inserting row");
@@ -556,9 +556,9 @@ void StringValueResult::InvalidState(StringValueResult &result) {
 	bool force_error = !result.state_machine.options.ignore_errors && result.sniffing;
 	// Invalid unicode, we must error
 	if (force_error) {
-		result.HandleUnicodeError(force_error);
+		result.HandleUnicodeError(result.cur_col_id, force_error);
 	}
-	result.current_error = {CSVErrorType::UNTERMINATED_QUOTES};
+	result.current_error = {CSVErrorType::UNTERMINATED_QUOTES, result.cur_col_id};
 }
 
 bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_pos) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp
index c8a2f886fa9f..29a62b8e79ae 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp
@@ -252,7 +252,7 @@ class BaseScanner {
 			Initialize();
 			initialized = true;
 		}
-		if (!iterator.done) {
+		if (!iterator.done && cur_buffer_handle) {
 			Process(result);
 		}
 		FinalizeChunkProcess();
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 3869f0b94d5d..e36266d90f69 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -65,12 +65,13 @@ class FullLinePosition {
 class CurrentError {
 public:
 	CurrentError() : is_set(false) {};
-	CurrentError(CSVErrorType type) : is_set(true), type(type) {};
+	CurrentError(CSVErrorType type, idx_t col_idx_p) : is_set(true), type(type), col_idx(col_idx_p) {};
 	void Reset() {
 		is_set = false;
 	}
 	bool is_set;
 	CSVErrorType type;
+	idx_t col_idx;
 	friend bool operator==(const CurrentError &error, CSVErrorType other) {
 		return error.is_set && error.type == other;
 	}
@@ -153,9 +154,9 @@ class StringValueResult : public ScannerResult {
 	static inline bool EmptyLine(StringValueResult &result, const idx_t buffer_pos);
 	inline bool AddRowInternal();
 
-	void HandleOverLimitRows();
-	void HandleUnicodeError(bool force_error = false);
-	void HandleUnterminatedQuotes(bool force_error = false);
+	void HandleOverLimitRows(idx_t col_idx);
+	void HandleUnicodeError(idx_t col_idx, bool force_error = false);
+	void HandleUnterminatedQuotes(idx_t col_idx, bool force_error = false);
 	bool HandleError();
 
 	inline void AddValueToVector(const char *value_ptr, const idx_t size, bool allocate = false);
diff --git a/test/sql/copy/csv/rejects/csv_unquoted_rejects.test b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
index a50aef9c60da..976b66d0ff05 100644
--- a/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
@@ -25,7 +25,73 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-data/csv/rejects/unquoted/basic.csv	5	1	"b"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
+data/csv/rejects/unquoted/basic.csv	5	0	"a"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
 
 statement ok
 DROP TABLE csv_rejects_table;
+
+query II
+SELECT * FROM read_csv(
+    'data/csv/rejects/unquoted/unquoted_new_line.csv',
+    columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1, quote = '"', escape = '"');
+----
+bla	1
+bla	2
+bla	3
+bla	1
+bla	2
+bla	3
+
+query IIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/unquoted/unquoted_new_line.csv	5	0	"a"	UNQUOTED VALUE	28
+
+statement ok
+DROP TABLE csv_rejects_table;
+
+query I
+SELECT * FROM read_csv(
+    'data/csv/rejects/unquoted/unquoted_last_value.csv',
+    columns = {'a': 'VARCHAR'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 0, quote = '"', escape = '"');
+----
+blaaaaaaaaaaaaaa
+bla
+bla
+bla
+
+query IIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/unquoted/unquoted_last_value.csv	5	0	"a"	UNQUOTED VALUE	31
+
+statement ok
+DROP TABLE csv_rejects_table;
+
+# Test buffer sizes (borked :( )
+#
+#loop buffer_size 35 1001
+#
+#statement ok
+#SELECT * FROM read_csv(
+#    'data/csv/rejects/unquoted/basic.csv',
+#    columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
+#    rejects_table='csv_rejects_table',
+#    ignore_errors=true, auto_detect=false, header = 1, quote = '"', escape = '"', buffer_size=${buffer_size});
+#
+#query IIIIIII rowsort
+#SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+#FROM csv_rejects_table;
+#----
+#data/csv/rejects/unquoted/basic.csv	5	0	"a"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
+#
+#statement ok
+#DROP TABLE csv_rejects_table;
+#
+#endloop
\ No newline at end of file

From 0a87b47e63197896bc3361a805d514840de3e1b8 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 8 Mar 2024 14:27:22 +0100
Subject: [PATCH 033/147] Make column idx 1-indexes

---
 .../scanner/string_value_scanner.cpp          |  4 +-
 .../table_function/global_csv_state.cpp       | 19 ++---
 .../operator/csv_scanner/util/csv_error.cpp   |  4 +-
 .../csv/rejects/csv_buffer_size_rejects.test  |  8 +-
 .../copy/csv/rejects/csv_rejects_auto.test    | 16 ++--
 .../csv/rejects/csv_rejects_flush_cast.test   |  4 +-
 .../csv/rejects/csv_rejects_maximum_line.test | 18 ++---
 .../copy/csv/rejects/csv_rejects_read.test    | 74 +++++++++----------
 .../csv/rejects/csv_unquoted_rejects.test     |  6 +-
 .../csv/rejects/test_invalid_utf_rejects.test |  8 +-
 10 files changed, 81 insertions(+), 80 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index b2fc8baa8aa4..0dea719fb455 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -365,7 +365,7 @@ bool StringValueResult::HandleError() {
 	if (current_error.is_set) {
 		switch (current_error.type) {
 		case CSVErrorType::TOO_MANY_COLUMNS:
-			HandleOverLimitRows(current_error.col_idx);
+			HandleOverLimitRows(cur_col_id);
 			break;
 		case CSVErrorType::INVALID_UNICODE:
 			HandleUnicodeError(current_error.col_idx);
@@ -510,7 +510,7 @@ bool StringValueResult::AddRowInternal() {
 			auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 			auto csv_error = CSVError::IncorrectColumnAmountError(
-			    state_machine.options, cur_col_id, lines_per_batch, borked_line,
+			    state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
 			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 			error_handler.Error(csv_error);
 			// If we are here we ignore_errors, so we delete this line
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 6380d2e24c6c..1d012349da21 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -197,11 +197,6 @@ void CSVGlobalState::FillRejectsTable() {
 						rejects->count++;
 						auto row_line = file->error_handler->GetLine(error.error_info);
 						auto col_idx = error.column_idx;
-						string col_name;
-						if (error.type != CSVErrorType::TOO_MANY_COLUMNS) {
-							// Too many columns does not have a name, all other errors have
-							col_name = bind_data.return_names[col_idx];
-						}
 						// Add the row to the rejects table
 						appender.BeginRow();
 						// 1. File Path
@@ -211,12 +206,18 @@ void CSVGlobalState::FillRejectsTable() {
 						// 3. Byte Position where error occurred
 						appender.Append(error.byte_position);
 						// 4. Column Index
-						appender.Append(col_idx);
+						appender.Append(col_idx + 1);
 						// 5. Column Name (If Applicable)
-						if (col_name.empty()) {
+						switch (error.type) {
+						case CSVErrorType::TOO_MANY_COLUMNS:
 							appender.Append(Value());
-						} else {
-							appender.Append(string_t("\"" + col_name + "\""));
+							break;
+						case CSVErrorType::TOO_FEW_COLUMNS:
+							D_ASSERT(bind_data.return_names.size() > col_idx + 1);
+							appender.Append(string_t("\"" + bind_data.return_names[col_idx + 1] + "\""));
+							break;
+						default:
+							appender.Append(string_t("\"" + bind_data.return_names[col_idx] + "\""));
 						}
 						// 6. Error Type
 						appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index 9c1eaffd394f..fadfad6716ff 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -157,11 +157,11 @@ CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, i
                                               LinesPerBoundary error_info, string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	// How many columns were expected and how many were found
-	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns
+	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns + 1
 	      << std::endl;
 	// What were the options
 	error << options.ToString();
-	if (actual_columns > options.dialect_options.num_cols) {
+	if (actual_columns >= options.dialect_options.num_cols) {
 		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info,
 		                byte_position);
 	} else {
diff --git a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
index a8fd11728781..76b95cfbe731 100644
--- a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
@@ -24,10 +24,10 @@ query IIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	0	"column0"	CAST	B, A	10875
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	0	"column0"	CAST	C, A	20875
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	0	"column0"	CAST	B, A	18395
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	0	"column0"	CAST	C, A	28395
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	1	"column0"	CAST	B, A	10875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	1	"column0"	CAST	C, A	20875
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	1	"column0"	CAST	B, A	18395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	1	"column0"	CAST	C, A	28395
 
 query I
 SELECT error_message
diff --git a/test/sql/copy/csv/rejects/csv_rejects_auto.test b/test/sql/copy/csv/rejects/csv_rejects_auto.test
index febda7d1c0fc..bfa8073a6567 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_auto.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_auto.test
@@ -20,32 +20,32 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	0	"column0"	CAST	B, A	10875
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	0	"column0"	CAST	C, A	20875
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	0	"column0"	CAST	B, A	18395
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	0	"column0"	CAST	C, A	28395
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	1	"column0"	CAST	B, A	10875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	1	"column0"	CAST	C, A	20875
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	1	"column0"	CAST	B, A	18395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	1	"column0"	CAST	C, A	28395
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=2176 and column_idx=0;
+FROM csv_rejects_table where line=2176 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "B" to 'BIGINT'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=4176 and column_idx=0;
+FROM csv_rejects_table where line=4176 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "C" to 'BIGINT'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=3680 and column_idx=0;
+FROM csv_rejects_table where line=3680 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "B" to 'BIGINT'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=5680 and column_idx=0;
+FROM csv_rejects_table where line=5680 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "C" to 'BIGINT'.*
 
diff --git a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
index 20ff320b36a5..69530026555e 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
@@ -23,8 +23,8 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table order by all;
 ----
-data/csv/error/flush_cast.csv	2813	0	"a"	CAST	c, bla	44971
-data/csv/error/flush_cast.csv	439	0	"a"	CAST	B, bla	6996
+data/csv/error/flush_cast.csv	2813	1	"a"	CAST	c, bla	44971
+data/csv/error/flush_cast.csv	439	1	"a"	CAST	B, bla	6996
 
 query I
 SELECT error_message
diff --git a/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
index f7bb5447485c..f6214aab0906 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
@@ -18,7 +18,7 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-data/csv/rejects/maximum_line/max_10.csv	5	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
+data/csv/rejects/maximum_line/max_10.csv	5	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -38,7 +38,7 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-data/csv/rejects/maximum_line/max_10.csv	5	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
+data/csv/rejects/maximum_line/max_10.csv	5	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -57,9 +57,9 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-data/csv/rejects/maximum_line/over_vector.csv	2282	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	13684
-data/csv/rejects/maximum_line/over_vector.csv	2591	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	15557
-data/csv/rejects/maximum_line/over_vector.csv	2923	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	17568
+data/csv/rejects/maximum_line/over_vector.csv	2282	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	13684
+data/csv/rejects/maximum_line/over_vector.csv	2591	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	15557
+data/csv/rejects/maximum_line/over_vector.csv	2923	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	17568
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -77,10 +77,10 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-data/csv/rejects/maximum_line/max_10.csv	5	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
-data/csv/rejects/maximum_line/over_vector.csv	2282	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	13684
-data/csv/rejects/maximum_line/over_vector.csv	2591	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	15557
-data/csv/rejects/maximum_line/over_vector.csv	2923	0	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	17568
+data/csv/rejects/maximum_line/max_10.csv	5	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
+data/csv/rejects/maximum_line/over_vector.csv	2282	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	13684
+data/csv/rejects/maximum_line/over_vector.csv	2591	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	15557
+data/csv/rejects/maximum_line/over_vector.csv	2923	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	17568
 
 statement ok
 DROP TABLE csv_rejects_table;
\ No newline at end of file
diff --git a/test/sql/copy/csv/rejects/csv_rejects_read.test b/test/sql/copy/csv/rejects/csv_rejects_read.test
index 8cf3d5ac883f..9917965558ba 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_read.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_read.test
@@ -20,7 +20,7 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/bad.csv	2	1	"col1"	CAST	4,BBB,9,	9
+test/sql/copy/csv/data/error/mismatch/bad.csv	2	2	"col1"	CAST	4,BBB,9,	9
 
 query I
 SELECT error_message
@@ -45,25 +45,25 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/bad2.csv	1	2	"col2"	CAST	1,2,DDD,	0
-test/sql/copy/csv/data/error/mismatch/bad2.csv	3	0	"col0"	CAST	EEE,7,FFF,	16
-test/sql/copy/csv/data/error/mismatch/bad2.csv	3	2	"col2"	CAST	EEE,7,FFF,	16
+test/sql/copy/csv/data/error/mismatch/bad2.csv	1	3	"col2"	CAST	1,2,DDD,	0
+test/sql/copy/csv/data/error/mismatch/bad2.csv	3	1	"col0"	CAST	EEE,7,FFF,	16
+test/sql/copy/csv/data/error/mismatch/bad2.csv	3	3	"col2"	CAST	EEE,7,FFF,	16
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=1 and column_idx=2;
+FROM csv_rejects_table where line=1 and column_idx=3;
 ----
 <REGEX>:.*Could not convert string "DDD" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=3 and column_idx=0;
+FROM csv_rejects_table where line=3 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "EEE" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=3 and column_idx=2;
+FROM csv_rejects_table where line=3 and column_idx=3;
 ----
 <REGEX>:.*Could not convert string "FFF" to 'INTEGER'.*
 
@@ -88,18 +88,18 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/bad.csv	2	1	"col1"	CAST	4,BBB,9,	9
-test/sql/copy/csv/data/error/mismatch/bad2.csv	3	0	"col0"	CAST	EEE,7,FFF,	16
+test/sql/copy/csv/data/error/mismatch/bad.csv	2	2	"col1"	CAST	4,BBB,9,	9
+test/sql/copy/csv/data/error/mismatch/bad2.csv	3	1	"col0"	CAST	EEE,7,FFF,	16
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=2 and column_idx=1;
+FROM csv_rejects_table where line=2 and column_idx=2;
 ----
 <REGEX>:.*Could not convert string "BBB" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=3 and column_idx=0;
+FROM csv_rejects_table where line=3 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "EEE" to 'INTEGER'.*
 
@@ -144,18 +144,18 @@ query  IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	0	"num"	CAST	B, A	10875
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	0	"num"	CAST	C, A	20875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	1	"num"	CAST	B, A	10875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	1	"num"	CAST	C, A	20875
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=2176 and column_idx=0;
+FROM csv_rejects_table where line=2176 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "B" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=4176 and column_idx=0;
+FROM csv_rejects_table where line=4176 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "C" to 'INTEGER'.*
 
@@ -175,18 +175,18 @@ query  IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	0	"num"	CAST	B, A	18395
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	0	"num"	CAST	C, A	28395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	1	"num"	CAST	B, A	18395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	1	"num"	CAST	C, A	28395
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=3680 and column_idx=0;
+FROM csv_rejects_table where line=3680 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "B" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=5680 and column_idx=0;
+FROM csv_rejects_table where line=5680 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "C" to 'INTEGER'.*
 
@@ -207,32 +207,32 @@ query  IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	0	"num"	CAST	B, A	10875
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	0	"num"	CAST	C, A	20875
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	0	"num"	CAST	B, A	18395
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	0	"num"	CAST	C, A	28395
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	1	"num"	CAST	B, A	10875
+test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	1	"num"	CAST	C, A	20875
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	1	"num"	CAST	B, A	18395
+test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	1	"num"	CAST	C, A	28395
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=3680 and column_idx=0;
+FROM csv_rejects_table where line=3680 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "B" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=5680 and column_idx=0;
+FROM csv_rejects_table where line=5680 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "C" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=2176 and column_idx=0;
+FROM csv_rejects_table where line=2176 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "B" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table where line=4176 and column_idx=0;
+FROM csv_rejects_table where line=4176 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "C" to 'INTEGER'.*
 
@@ -261,37 +261,37 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table_left;
 ----
-test/sql/copy/csv/data/error/mismatch/small1.csv	3	0	"num"	CAST	X,Y	14
-test/sql/copy/csv/data/error/mismatch/small1.csv	6	0	"num"	CAST	X,Y	26
+test/sql/copy/csv/data/error/mismatch/small1.csv	3	1	"num"	CAST	X,Y	14
+test/sql/copy/csv/data/error/mismatch/small1.csv	6	1	"num"	CAST	X,Y	26
 
 query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table_right;
 ----
-test/sql/copy/csv/data/error/mismatch/small2.csv	3	0	"num"	CAST	X,Y	14
-test/sql/copy/csv/data/error/mismatch/small2.csv	5	0	"num"	CAST	X,Y	22
+test/sql/copy/csv/data/error/mismatch/small2.csv	3	1	"num"	CAST	X,Y	14
+test/sql/copy/csv/data/error/mismatch/small2.csv	5	1	"num"	CAST	X,Y	22
 
 query I
 SELECT error_message
-FROM csv_rejects_table_left where line=3 and column_idx=0;
+FROM csv_rejects_table_left where line=3 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "X" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table_left where line=6 and column_idx=0;
+FROM csv_rejects_table_left where line=6 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "X" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table_right where line=3 and column_idx=0;
+FROM csv_rejects_table_right where line=3 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "X" to 'INTEGER'.*
 
 query I
 SELECT error_message
-FROM csv_rejects_table_right where line=5 and column_idx=0;
+FROM csv_rejects_table_right where line=5 and column_idx=1;
 ----
 <REGEX>:.*Could not convert string "X" to 'INTEGER'.*
 
@@ -326,8 +326,8 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table_left;
 ----
-test/sql/copy/csv/data/error/mismatch/small1.csv	3	0	"num"	CAST	X,Y	14
-test/sql/copy/csv/data/error/mismatch/small1.csv	6	0	"num"	CAST	X,Y	26
+test/sql/copy/csv/data/error/mismatch/small1.csv	3	1	"num"	CAST	X,Y	14
+test/sql/copy/csv/data/error/mismatch/small1.csv	6	1	"num"	CAST	X,Y	26
 
 query I
 SELECT COUNT(*)
diff --git a/test/sql/copy/csv/rejects/csv_unquoted_rejects.test b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
index 976b66d0ff05..0ce1d845df64 100644
--- a/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
@@ -25,7 +25,7 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-data/csv/rejects/unquoted/basic.csv	5	0	"a"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
+data/csv/rejects/unquoted/basic.csv	5	1	"a"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -48,7 +48,7 @@ query IIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, byte_position
 FROM csv_rejects_table;
 ----
-data/csv/rejects/unquoted/unquoted_new_line.csv	5	0	"a"	UNQUOTED VALUE	28
+data/csv/rejects/unquoted/unquoted_new_line.csv	5	1	"a"	UNQUOTED VALUE	28
 
 statement ok
 DROP TABLE csv_rejects_table;
@@ -69,7 +69,7 @@ query IIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, byte_position
 FROM csv_rejects_table;
 ----
-data/csv/rejects/unquoted/unquoted_last_value.csv	5	0	"a"	UNQUOTED VALUE	31
+data/csv/rejects/unquoted/unquoted_last_value.csv	5	1	"a"	UNQUOTED VALUE	31
 
 statement ok
 DROP TABLE csv_rejects_table;
diff --git a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
index 52ff0ac19823..94c56cc71562 100644
--- a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
+++ b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
@@ -15,7 +15,7 @@ query IIIIIII rowsort
 SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
 FROM csv_rejects_table;
 ----
-test/sql/copy/csv/data/test/invalid_utf_big.csv	3001	2	"col3"	INVALID UNICODE	valid,invalid_??_part,valid	54000
-test/sql/copy/csv/data/test/invalid_utf_big.csv	3012	2	"col3"	INVALID UNICODE	valid,valid,invalid_??_part	54208
-test/sql/copy/csv/data/test/invalid_utf_big.csv	3023	2	"col3"	INVALID UNICODE	valid,invalid_??_part,valid	54416
-test/sql/copy/csv/data/test/invalid_utf_big.csv	3034	2	"col3"	INVALID UNICODE	valid,valid,invalid_??_part	54624
+test/sql/copy/csv/data/test/invalid_utf_big.csv	3001	2	"col2"	INVALID UNICODE	valid,invalid_??_part,valid	54000
+test/sql/copy/csv/data/test/invalid_utf_big.csv	3012	3	"col3"	INVALID UNICODE	valid,valid,invalid_??_part	54208
+test/sql/copy/csv/data/test/invalid_utf_big.csv	3023	2	"col2"	INVALID UNICODE	valid,invalid_??_part,valid	54416
+test/sql/copy/csv/data/test/invalid_utf_big.csv	3034	3	"col3"	INVALID UNICODE	valid,valid,invalid_??_part	54624
\ No newline at end of file

From ba1da6a688803300c39610fe03be73b145e86af4 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 8 Mar 2024 14:38:32 +0100
Subject: [PATCH 034/147] Handle invalid states in overbuffer values

---
 .../scanner/string_value_scanner.cpp          |  6 +++
 .../csv_scanner/csv_state_machine.hpp         |  4 ++
 .../csv/rejects/csv_unquoted_rejects.test     | 47 ++++++++++---------
 3 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 0dea719fb455..ffdf15c5d523 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -926,6 +926,9 @@ void StringValueScanner::ProcessOverbufferValue() {
 		if (states.IsEscaped()) {
 			result.escaped = true;
 		}
+		if (states.IsInvalid()) {
+			result.InvalidState(result);
+		}
 		j++;
 	}
 	if (overbuffer_string.empty() &&
@@ -955,6 +958,9 @@ void StringValueScanner::ProcessOverbufferValue() {
 		if (states.IsEscaped()) {
 			result.escaped = true;
 		}
+		if (states.IsInvalid()) {
+			result.InvalidState(result);
+		}
 		j++;
 	}
 	string_t value;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp
index 49542782f56a..a1628e100f63 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp
@@ -54,6 +54,10 @@ struct CSVStates {
 		return states[1] == CSVState::CARRIAGE_RETURN;
 	}
 
+	inline bool IsInvalid() {
+		return states[1] == CSVState::INVALID;
+	}
+
 	inline bool IsQuoted() {
 		return states[0] == CSVState::QUOTED;
 	}
diff --git a/test/sql/copy/csv/rejects/csv_unquoted_rejects.test b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
index 0ce1d845df64..13c13b8b9fa7 100644
--- a/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
@@ -74,24 +74,29 @@ data/csv/rejects/unquoted/unquoted_last_value.csv	5	1	"a"	UNQUOTED VALUE	31
 statement ok
 DROP TABLE csv_rejects_table;
 
-# Test buffer sizes (borked :( )
-#
-#loop buffer_size 35 1001
-#
-#statement ok
-#SELECT * FROM read_csv(
-#    'data/csv/rejects/unquoted/basic.csv',
-#    columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
-#    rejects_table='csv_rejects_table',
-#    ignore_errors=true, auto_detect=false, header = 1, quote = '"', escape = '"', buffer_size=${buffer_size});
-#
-#query IIIIIII rowsort
-#SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-#FROM csv_rejects_table;
-#----
-#data/csv/rejects/unquoted/basic.csv	5	0	"a"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
-#
-#statement ok
-#DROP TABLE csv_rejects_table;
-#
-#endloop
\ No newline at end of file
+loop buffer_size 35 40
+
+query II
+SELECT * FROM read_csv(
+    'data/csv/rejects/unquoted/basic.csv',
+    columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
+    rejects_table='csv_rejects_table', buffer_size=${buffer_size},
+    ignore_errors=true, auto_detect=false, header = 1, quote = '"', escape = '"', buffer_size=35);
+----
+bla	1
+bla	2
+bla	3
+bla	1
+bla	2
+bla	3
+
+query IIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
+FROM csv_rejects_table;
+----
+data/csv/rejects/unquoted/basic.csv	5	1	"a"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
+
+statement ok
+DROP TABLE csv_rejects_table;
+
+endloop
\ No newline at end of file

From 9aa5d2a2088fad1da5b337677f777f0513ebc7fe Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 8 Mar 2024 15:22:22 +0100
Subject: [PATCH 035/147] Add a mixed test with a bunch of different errors

---
 data/csv/rejects/frankstein/nightmare.csv     | 48 +++++++++++++
 .../scanner/string_value_scanner.cpp          |  1 +
 .../operator/csv_scanner/util/csv_error.cpp   | 50 +++++++-------
 .../operator/csv_scanner/csv_error.hpp        |  4 +-
 test/sql/copy/csv/rejects/test_mixed.test     | 68 +++++++++++++++++++
 5 files changed, 144 insertions(+), 27 deletions(-)
 create mode 100644 data/csv/rejects/frankstein/nightmare.csv
 create mode 100644 test/sql/copy/csv/rejects/test_mixed.test

diff --git a/data/csv/rejects/frankstein/nightmare.csv b/data/csv/rejects/frankstein/nightmare.csv
new file mode 100644
index 000000000000..579f46a359b7
--- /dev/null
+++ b/data/csv/rejects/frankstein/nightmare.csv
@@ -0,0 +1,48 @@
+a,b,c
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro",5
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,bla,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"bla
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro thiago timbo holanda"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro��"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
+1,2,"pedro"
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index ffdf15c5d523..e568a904b19c 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -450,6 +450,7 @@ bool StringValueResult::AddRowInternal() {
 		    CSVError::LineSizeError(state_machine.options, current_line_size, lines_per_batch, borked_line,
 		                            current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 		error_handler.Error(csv_error);
+		number_of_rows--;
 	}
 	if (HandleError()) {
 		return false;
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index fadfad6716ff..cfab97c9e55d 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -19,7 +19,11 @@ void CSVErrorHandler::ThrowError(CSVError csv_error) {
 	if (PrintLineNumber(csv_error)) {
 		error << "CSV Error on Line: " << GetLine(csv_error.error_info) << std::endl;
 	}
-	error << csv_error.error_message;
+	if (csv_error.error_message_with_options.empty()){
+		error << csv_error.error_message;
+	} else{
+		error << csv_error.error_message_with_options;
+	}
 	switch (csv_error.type) {
 	case CSVErrorType::CAST_ERROR:
 		throw ConversionException(error.str());
@@ -78,9 +82,15 @@ CSVError::CSVError(string error_message_p, CSVErrorType type_p, LinesPerBoundary
 }
 
 CSVError::CSVError(string error_message_p, CSVErrorType type_p, idx_t column_idx_p, string csv_row_p,
-                   LinesPerBoundary error_info_p, idx_t byte_position_p)
+                   LinesPerBoundary error_info_p, idx_t byte_position_p, const CSVReaderOptions &reader_options)
     : error_message(std::move(error_message_p)), type(type_p), column_idx(column_idx_p), csv_row(std::move(csv_row_p)),
       error_info(error_info_p), byte_position(byte_position_p) {
+	// What were the options
+	std::ostringstream error;
+	error << error_message << std::endl;
+	error << reader_options.ToString();
+	error << std::endl;
+	error_message_with_options = error.str();
 }
 
 CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names) {
@@ -107,22 +117,18 @@ CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_nam
                              string &csv_row, LinesPerBoundary error_info, idx_t byte_position) {
 	std::ostringstream error;
 	// Which column
-	error << "Error when converting column \"" << column_name << "\"." << std::endl;
+	error << "Error when converting column \"" << column_name << "\". ";
 	// What was the cast error
-	error << cast_error << std::endl;
-	error << std::endl;
-	// What were the options
-	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info, byte_position);
+	error << cast_error;
+	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info, byte_position, options);
 }
 
 CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
                                  string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	error << "Maximum line size of " << options.maximum_line_size << " bytes exceeded. ";
-	error << "Actual Size:" << actual_size << " bytes." << std::endl;
-	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::MAXIMUM_LINE_SIZE, 0, csv_row, error_info, byte_position);
+	error << "Actual Size:" << actual_size << " bytes.";
+	return CSVError(error.str(), CSVErrorType::MAXIMUM_LINE_SIZE, 0, csv_row, error_info, byte_position, options);
 }
 
 CSVError CSVError::SniffingError(string &file_path) {
@@ -146,26 +152,20 @@ CSVError CSVError::NullPaddingFail(const CSVReaderOptions &options, LinesPerBoun
 CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_t current_column,
                                            LinesPerBoundary error_info, string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
-	error << "Value with unterminated quote found." << std::endl;
-	error << std::endl;
-	// What were the options
-	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info, byte_position);
+	error << "Value with unterminated quote found.";
+	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info, byte_position, options);
 }
 
 CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, idx_t actual_columns,
                                               LinesPerBoundary error_info, string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	// How many columns were expected and how many were found
-	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns + 1
-	      << std::endl;
-	// What were the options
-	error << options.ToString();
+	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns + 1;
 	if (actual_columns >= options.dialect_options.num_cols) {
 		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info,
-		                byte_position);
+		                byte_position, options);
 	} else {
-		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info, byte_position);
+		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info, byte_position, options);
 	}
 }
 
@@ -173,10 +173,8 @@ CSVError CSVError::InvalidUTF8(const CSVReaderOptions &options, idx_t current_co
                                string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	// How many columns were expected and how many were found
-	error << "Invalid unicode (byte sequence mismatch) detected." << std::endl;
-	// What were the options
-	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, current_column, csv_row, error_info, byte_position);
+	error << "Invalid unicode (byte sequence mismatch) detected.";
+	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, current_column, csv_row, error_info, byte_position, options);
 }
 
 bool CSVErrorHandler::PrintLineNumber(CSVError &error) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index befc3a669219..98f460127d83 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -52,7 +52,7 @@ class CSVError {
 public:
 	CSVError() {};
 	CSVError(string error_message, CSVErrorType type, idx_t column_idx, string csv_row, LinesPerBoundary error_info,
-	         idx_t byte_position);
+	         idx_t byte_position, const CSVReaderOptions &reader_options);
 	CSVError(string error_message, CSVErrorType type, LinesPerBoundary error_info);
 	//! Produces error messages for column name -> type mismatch.
 	static CSVError ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names);
@@ -81,6 +81,8 @@ class CSVError {
 
 	//! Actual error message
 	string error_message;
+	//! Actual error message
+	string error_message_with_options;
 	//! Error Type
 	CSVErrorType type;
 	//! Column Index where error happened
diff --git a/test/sql/copy/csv/rejects/test_mixed.test b/test/sql/copy/csv/rejects/test_mixed.test
new file mode 100644
index 000000000000..45001a5e4b05
--- /dev/null
+++ b/test/sql/copy/csv/rejects/test_mixed.test
@@ -0,0 +1,68 @@
+# name: test/sql/copy/csv/rejects/test_mixed.test
+# description: Tests a mix of all possible CSV Errors
+# group: [rejects]
+
+require skip_reload
+
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
+query III
+SELECT * FROM read_csv(
+    'data/csv/rejects/frankstein/nightmare.csv',
+    columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'VARCHAR'},
+    rejects_table='csv_rejects_table',
+    ignore_errors=true, auto_detect=false, header = 1, max_line_size=20);
+----
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+1	2	pedro
+
+query IIIIIIII rowsort
+SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position, error_message
+FROM csv_rejects_table;
+----
+data/csv/rejects/frankstein/nightmare.csv	10	2	"c"	MISSING COLUMNS	1,2	102	Expected Number of Columns: 3 Found: 2
+data/csv/rejects/frankstein/nightmare.csv	14	4	NULL	TOO MANY COLUMNS	1,2,"pedro",5	142	Expected Number of Columns: 3 Found: 4
+data/csv/rejects/frankstein/nightmare.csv	19	2	"b"	CAST	1,bla,"pedro"	204	Error when converting column "b". Could not convert string "bla" to 'INTEGER'
+data/csv/rejects/frankstein/nightmare.csv	22	3	"c"	UNQUOTED VALUE	1,2,"pedro"bla	242	Value with unterminated quote found.
+data/csv/rejects/frankstein/nightmare.csv	32	1	"a"	LINE SIZE OVER MAXIMUM	1,2,"pedro thiago timbo holanda"	365	Maximum line size of 20 bytes exceeded. Actual Size:33 bytes.
+data/csv/rejects/frankstein/nightmare.csv	38	3	"c"	INVALID UNICODE	1,2,"pedro??"	458	Invalid unicode (byte sequence mismatch) detected.
\ No newline at end of file

From 65cf2d4c9d94fab39e5cd3b791390a01964471a1 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 11 Mar 2024 14:56:14 +0100
Subject: [PATCH 036/147] [wip] changing to store rejects into two tables, add
 store_rejects flag

---
 .../scanner/string_value_scanner.cpp          | 14 ++--
 .../csv_scanner/sniffer/csv_sniffer.cpp       |  4 +-
 .../csv_scanner/sniffer/dialect_detection.cpp | 12 +--
 .../csv_scanner/sniffer/type_detection.cpp    |  2 +-
 .../table_function/csv_file_scanner.cpp       |  8 +-
 .../table_function/global_csv_state.cpp       |  5 +-
 .../operator/csv_scanner/util/csv_error.cpp   | 17 ++--
 .../csv_scanner/util/csv_reader_options.cpp   | 13 +--
 .../operator/persistent/csv_rejects_table.cpp | 84 +++++++++++++------
 src/function/table/read_csv.cpp               | 20 ++---
 .../operator/csv_scanner/csv_option.hpp       |  2 +-
 .../csv_scanner/csv_reader_options.hpp        |  6 +-
 .../operator/persistent/csv_rejects_table.hpp |  6 +-
 .../duckdb/storage/serialization/nodes.json   |  6 +-
 src/storage/serialization/serialize_nodes.cpp |  8 +-
 .../csv/rejects/csv_rejects_double_table.test |  0
 16 files changed, 119 insertions(+), 88 deletions(-)
 create mode 100644 test/sql/copy/csv/rejects/csv_rejects_double_table.test

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index e568a904b19c..ae9d470a32ce 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -17,7 +17,7 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
                                      shared_ptr<CSVFileScan> csv_file_scan_p, idx_t &lines_read_p, bool sniffing_p)
     : ScannerResult(states, state_machine),
       number_of_columns(NumericCast<uint32_t>(state_machine.dialect_options.num_cols)),
-      null_padding(state_machine.options.null_padding), ignore_errors(state_machine.options.ignore_errors),
+      null_padding(state_machine.options.null_padding), ignore_errors(state_machine.options.ignore_errors.GetValue()),
       null_str_ptr(state_machine.options.null_str.c_str()), null_str_size(state_machine.options.null_str.size()),
       result_size(result_size_p), error_handler(error_hander_p), iterator(iterator_p),
       store_line_size(store_line_size_p), csv_file_scan(std::move(csv_file_scan_p)), lines_read(lines_read_p),
@@ -219,7 +219,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 		// By default we add a string
 		// We only evaluate if a string is utf8 valid, if it's actually a varchar
 		if (parse_types[chunk_col_id].second && !Utf8Proc::IsValid(value_ptr, UnsafeNumericCast<uint32_t>(size))) {
-			bool force_error = !state_machine.options.ignore_errors && sniffing;
+			bool force_error = !state_machine.options.ignore_errors.GetValue() && sniffing;
 			// Invalid unicode, we must error
 			if (force_error) {
 				HandleUnicodeError(cur_col_id, force_error);
@@ -554,7 +554,7 @@ bool StringValueResult::AddRow(StringValueResult &result, const idx_t buffer_pos
 }
 
 void StringValueResult::InvalidState(StringValueResult &result) {
-	bool force_error = !result.state_machine.options.ignore_errors && result.sniffing;
+	bool force_error = !result.state_machine.options.ignore_errors.GetValue() && result.sniffing;
 	// Invalid unicode, we must error
 	if (force_error) {
 		result.HandleUnicodeError(result.cur_col_id, force_error);
@@ -721,7 +721,7 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 			{
 				vector<Value> row;
 
-				if (state_machine->options.ignore_errors) {
+				if (state_machine->options.ignore_errors.GetValue()) {
 					for (idx_t col = 0; col < parse_chunk.ColumnCount(); col++) {
 						row.push_back(parse_chunk.GetValue(col, line_error));
 					}
@@ -739,7 +739,7 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 				error_handler->Error(csv_error);
 			}
 			borked_lines.insert(line_error++);
-			D_ASSERT(state_machine->options.ignore_errors);
+			D_ASSERT(state_machine->options.ignore_errors.GetValue());
 			// We are ignoring errors. We must continue but ignoring borked rows
 			for (; line_error < parse_chunk.size(); line_error++) {
 				if (!inserted_column_data.validity.RowIsValid(line_error) &&
@@ -1184,7 +1184,7 @@ void StringValueScanner::SetStart() {
 			if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size ||
 			    scan_finder->iterator.GetBufferIdx() >= iterator.GetBufferIdx()) {
 				// Propagate any errors
-				if (!scan_finder->error_handler->errors.empty() && state_machine->options.ignore_errors) {
+				if (!scan_finder->error_handler->errors.empty() && state_machine->options.ignore_errors.GetValue()) {
 					for (auto &error_vector : scan_finder->error_handler->errors) {
 						for (auto &error : error_vector.second) {
 							error_handler->Error(error);
@@ -1202,7 +1202,7 @@ void StringValueScanner::SetStart() {
 		}
 	} while (!line_found);
 	// Propagate any errors
-	if (!scan_finder->error_handler->errors.empty() && state_machine->options.ignore_errors) {
+	if (!scan_finder->error_handler->errors.empty() && state_machine->options.ignore_errors.GetValue()) {
 		for (auto &error_vector : scan_finder->error_handler->errors) {
 			for (auto &error : error_vector.second) {
 				error_handler->Error(error);
diff --git a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
index 9009210359f1..238b56426b52 100644
--- a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
@@ -13,7 +13,7 @@ CSVSniffer::CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager>
 	}
 	// Initialize max columns found to either 0 or however many were set
 	max_columns_found = set_columns.Size();
-	error_handler = make_shared<CSVErrorHandler>(options.ignore_errors);
+	error_handler = make_shared<CSVErrorHandler>(options.ignore_errors.GetValue());
 	detection_error_handler = make_shared<CSVErrorHandler>(true);
 }
 
@@ -93,7 +93,7 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
 	DetectHeader();
 	// 5. Type Replacement
 	ReplaceTypes();
-	if (!best_candidate->error_handler->errors.empty() && !options.ignore_errors) {
+	if (!best_candidate->error_handler->errors.empty() && !options.ignore_errors.GetValue()) {
 		for (auto &error_vector : best_candidate->error_handler->errors) {
 			for (auto &error : error_vector.second) {
 				if (error.type == CSVErrorType::MAXIMUM_LINE_SIZE) {
diff --git a/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp b/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
index 7e23a6d1cc4b..0f5a485adff3 100644
--- a/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
@@ -101,18 +101,19 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	if (sniffed_column_counts.result_position > rows_read) {
 		rows_read = sniffed_column_counts.result_position;
 	}
-	if (set_columns.IsCandidateUnacceptable(num_cols, options.null_padding, options.ignore_errors,
+	if (set_columns.IsCandidateUnacceptable(num_cols, options.null_padding, options.ignore_errors.GetValue(),
 	                                        sniffed_column_counts.last_value_always_empty)) {
 		// Not acceptable
 		return;
 	}
 	for (idx_t row = start_row; row < sniffed_column_counts.result_position; row++) {
-		if (set_columns.IsCandidateUnacceptable(sniffed_column_counts[row], options.null_padding, options.ignore_errors,
+		if (set_columns.IsCandidateUnacceptable(sniffed_column_counts[row], options.null_padding,
+		                                        options.ignore_errors.GetValue(),
 		                                        sniffed_column_counts.last_value_always_empty)) {
 			// Not acceptable
 			return;
 		}
-		if (sniffed_column_counts[row] == num_cols || options.ignore_errors) {
+		if (sniffed_column_counts[row] == num_cols || options.ignore_errors.GetValue()) {
 			consistent_rows++;
 		} else if (num_cols < sniffed_column_counts[row] && !options.dialect_options.skip_rows.IsSetByUser() &&
 		           (!set_columns.IsSet() || options.null_padding)) {
@@ -212,10 +213,11 @@ bool CSVSniffer::RefineCandidateNextChunk(ColumnCountScanner &candidate) {
 	for (idx_t i = 0; i < sniffed_column_counts.result_position; i++) {
 		if (set_columns.IsSet()) {
 			return !set_columns.IsCandidateUnacceptable(sniffed_column_counts[i], options.null_padding,
-			                                            options.ignore_errors,
+			                                            options.ignore_errors.GetValue(),
 			                                            sniffed_column_counts.last_value_always_empty);
 		} else {
-			if (max_columns_found != sniffed_column_counts[i] && (!options.null_padding && !options.ignore_errors)) {
+			if (max_columns_found != sniffed_column_counts[i] &&
+			    (!options.null_padding && !options.ignore_errors.GetValue())) {
 				return false;
 			}
 		}
diff --git a/src/execution/operator/csv_scanner/sniffer/type_detection.cpp b/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
index 717472b3c211..fe1bf8644776 100644
--- a/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
@@ -273,7 +273,7 @@ void CSVSniffer::DetectTypes() {
 		// it's good if the dialect creates more non-varchar columns, but only if we sacrifice < 30% of
 		// best_num_cols.
 		if (varchar_cols < min_varchar_cols && info_sql_types_candidates.size() > (max_columns_found * 0.7) &&
-		    (!options.ignore_errors || candidate->error_handler->errors.size() < min_errors)) {
+		    (!options.ignore_errors.GetValue() || candidate->error_handler->errors.size() < min_errors)) {
 			min_errors = candidate->error_handler->errors.size();
 			best_header_row.clear();
 			// we have a new best_options candidate
diff --git a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
index 641cbdd06818..807fe700a402 100644
--- a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
+++ b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
@@ -10,7 +10,7 @@ CSVFileScan::CSVFileScan(ClientContext &context, shared_ptr<CSVBufferManager> bu
                          vector<LogicalType> &file_schema)
     : file_path(options_p.file_path), file_idx(0), buffer_manager(std::move(buffer_manager_p)),
       state_machine(std::move(state_machine_p)), file_size(buffer_manager->file_handle->FileSize()),
-      error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors)),
+      error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors.GetValue())),
       on_disk_file(buffer_manager->file_handle->OnDiskFile()), options(options_p) {
 	if (bind_data.initial_reader.get()) {
 		auto &union_reader = *bind_data.initial_reader;
@@ -43,7 +43,7 @@ CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, cons
                          const idx_t file_idx_p, const ReadCSVData &bind_data, const vector<column_t> &column_ids,
                          const vector<LogicalType> &file_schema)
     : file_path(file_path_p), file_idx(file_idx_p),
-      error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors)), options(options_p) {
+      error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors.GetValue())), options(options_p) {
 	if (file_idx < bind_data.union_readers.size()) {
 		// we are doing UNION BY NAME - fetch the options from the union reader for this file
 		optional_ptr<CSVFileScan> union_reader_ptr;
@@ -129,8 +129,8 @@ CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, cons
 }
 
 CSVFileScan::CSVFileScan(ClientContext &context, const string &file_name, CSVReaderOptions &options_p)
-    : file_path(file_name), file_idx(0), error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors)),
-      options(options_p) {
+    : file_path(file_name), file_idx(0),
+      error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors.GetValue())), options(options_p) {
 	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx);
 	// Initialize On Disk and Size of file
 	on_disk_file = buffer_manager->file_handle->OnDiskFile();
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 1d012349da21..4446a670e5a5 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -173,10 +173,9 @@ string CSVErrorTypeToEnum(CSVErrorType type) {
 void CSVGlobalState::FillRejectsTable() {
 	auto &options = bind_data.options;
 
-	if (!options.rejects_table_name.empty()) {
+	if (options.store_rejects) {
 		auto limit = options.rejects_limit;
-
-		auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_table_name);
+		auto rejects = CSVRejectsTable::GetOrCreate(context);
 		lock_guard<mutex> lock(rejects->write_lock);
 		auto &table = rejects->GetTable(context);
 		InternalAppender appender(context, table);
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index cfab97c9e55d..7a8349288c07 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -19,9 +19,9 @@ void CSVErrorHandler::ThrowError(CSVError csv_error) {
 	if (PrintLineNumber(csv_error)) {
 		error << "CSV Error on Line: " << GetLine(csv_error.error_info) << std::endl;
 	}
-	if (csv_error.error_message_with_options.empty()){
+	if (csv_error.error_message_with_options.empty()) {
 		error << csv_error.error_message;
-	} else{
+	} else {
 		error << csv_error.error_message_with_options;
 	}
 	switch (csv_error.type) {
@@ -153,7 +153,8 @@ CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_
                                            LinesPerBoundary error_info, string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	error << "Value with unterminated quote found.";
-	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info, byte_position, options);
+	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info, byte_position,
+	                options);
 }
 
 CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, idx_t actual_columns,
@@ -162,10 +163,11 @@ CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, i
 	// How many columns were expected and how many were found
 	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns + 1;
 	if (actual_columns >= options.dialect_options.num_cols) {
-		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info,
-		                byte_position, options);
+		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info, byte_position,
+		                options);
 	} else {
-		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info, byte_position, options);
+		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info, byte_position,
+		                options);
 	}
 }
 
@@ -174,7 +176,8 @@ CSVError CSVError::InvalidUTF8(const CSVReaderOptions &options, idx_t current_co
 	std::ostringstream error;
 	// How many columns were expected and how many were found
 	error << "Invalid unicode (byte sequence mismatch) detected.";
-	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, current_column, csv_row, error_info, byte_position, options);
+	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, current_column, csv_row, error_info, byte_position,
+	                options);
 }
 
 bool CSVErrorHandler::PrintLineNumber(CSVError &error) {
diff --git a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
index 72c73a2e5bac..849c0e97ec52 100644
--- a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
@@ -189,7 +189,7 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 		string format = ParseString(value, loption);
 		SetDateFormat(LogicalTypeId::TIMESTAMP, format, true);
 	} else if (loption == "ignore_errors") {
-		ignore_errors = ParseBoolean(value, loption);
+		ignore_errors.Set(ParseBoolean(value, loption));
 	} else if (loption == "buffer_size") {
 		buffer_size = ParseInteger(value, loption);
 		if (buffer_size == 0) {
@@ -206,13 +206,8 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 		parallel = ParseBoolean(value, loption);
 	} else if (loption == "allow_quoted_nulls") {
 		allow_quoted_nulls = ParseBoolean(value, loption);
-	} else if (loption == "rejects_table") {
-		// skip, handled in SetRejectsOptions
-		auto table_name = ParseString(value, loption);
-		if (table_name.empty()) {
-			throw BinderException("REJECTS_TABLE option cannot be empty");
-		}
-		rejects_table_name = table_name;
+	} else if (loption == "store_rejects") {
+		store_rejects = ParseBoolean(value, loption);
 	} else if (loption == "rejects_limit") {
 		int64_t limit = ParseInteger(value, loption);
 		if (limit < 0) {
@@ -323,7 +318,7 @@ string CSVReaderOptions::ToString() const {
 	// sample_size
 	error += "sample_size=" + std::to_string(sample_size_chunks * STANDARD_VECTOR_SIZE) + "\n  ";
 	// ignore_errors
-	error += "ignore_errors=" + std::to_string(ignore_errors) + "\n  ";
+	error += "ignore_errors=" + ignore_errors.FormatValue() + "\n  ";
 	// all_varchar
 	error += "all_varchar=" + std::to_string(all_varchar) + "\n";
 
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index a2f80d855d15..939ce125f39a 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -9,14 +9,14 @@ namespace duckdb {
 
 TableCatalogEntry &CSVRejectsTable::GetTable(ClientContext &context) {
 	auto &temp_catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
-	auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, name);
+	auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, "reject_scans");
 	return table_entry;
 }
 
-shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &name) {
-	auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(name);
+shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context) {
+	auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY";
 	auto &cache = ObjectCache::GetObjectCache(context);
-	return cache.GetOrCreate<CSVRejectsTable>(key, name);
+	return cache.GetOrCreate<CSVRejectsTable>(key);
 }
 
 void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData &data) {
@@ -38,27 +38,61 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 	type_info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
 	catalog.CreateType(context, *type_info);
 
-	// Create Rejects Table
-	auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, name);
-	info->temporary = true;
-	info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
-	// 1. File Path
-	info->columns.AddColumn(ColumnDefinition("file", LogicalType::VARCHAR));
-	// 2. Row Line
-	info->columns.AddColumn(ColumnDefinition("line", LogicalType::UBIGINT));
-	// 3. Byte Position where error occurred
-	info->columns.AddColumn(ColumnDefinition("byte_position", LogicalType::UBIGINT));
-	// 4. Column Index (If Applicable)
-	info->columns.AddColumn(ColumnDefinition("column_idx", LogicalType::UBIGINT));
-	// 5. Column Name (If Applicable)
-	info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
-	// 6. Error Type
-	info->columns.AddColumn(ColumnDefinition("error_type", enum_type));
-	// 7. Original CSV Line
-	info->columns.AddColumn(ColumnDefinition("csv_line", LogicalType::VARCHAR));
-	// 8. Full Error Message
-	info->columns.AddColumn(ColumnDefinition("error_message", LogicalType::VARCHAR));
-	catalog.CreateTable(context, std::move(info));
+	// Create Rejects Scans Table
+	{
+		auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, "reject_scans");
+		info->temporary = true;
+		info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
+		// 0. Scan ID
+		info->columns.AddColumn(ColumnDefinition("scan_id", LogicalType::UBIGINT));
+		// 1. File Path
+		info->columns.AddColumn(ColumnDefinition("file_path", LogicalType::VARCHAR));
+		// 2. Delimiter
+		info->columns.AddColumn(ColumnDefinition("delimiter", LogicalType::VARCHAR));
+		// 3. Quote
+		info->columns.AddColumn(ColumnDefinition("quote", LogicalType::VARCHAR));
+		// 4. Escape
+		info->columns.AddColumn(ColumnDefinition("escape", LogicalType::VARCHAR));
+		// 5. NewLine Delimiter
+		info->columns.AddColumn(ColumnDefinition("newline_delimiter", LogicalType::VARCHAR));
+		// 6. Skip Rows
+		info->columns.AddColumn(ColumnDefinition("skip_rows", LogicalType::UINTEGER));
+		// 7. Has Header
+		info->columns.AddColumn(ColumnDefinition("has_header", LogicalType::BOOLEAN));
+		// 8. List<Struct<Column-Name:Types>>
+		info->columns.AddColumn(ColumnDefinition("columns", LogicalType::VARCHAR));
+		// 9. Date Format
+		info->columns.AddColumn(ColumnDefinition("date_format", LogicalType::VARCHAR));
+		// 10. Timestamp Format
+		info->columns.AddColumn(ColumnDefinition("timestamp_format", LogicalType::VARCHAR));
+		// 11. CSV read function with all the options used
+		info->columns.AddColumn(ColumnDefinition("user_arguments", LogicalType::VARCHAR));
+		// 12. CSV read function with all the options used
+		info->columns.AddColumn(ColumnDefinition("prompt", LogicalType::VARCHAR));
+		catalog.CreateTable(context, std::move(info));
+	}
+	{
+		// Create Rejects Error Table
+		auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, "reject_errors");
+		info->temporary = true;
+		info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
+		// 1. Row Line
+		info->columns.AddColumn(ColumnDefinition("line", LogicalType::UBIGINT));
+		// 2. Byte Position where error occurred
+		info->columns.AddColumn(ColumnDefinition("byte_position", LogicalType::UBIGINT));
+		// 3. Column Index (If Applicable)
+		info->columns.AddColumn(ColumnDefinition("column_idx", LogicalType::UBIGINT));
+		// 4. Column Name (If Applicable)
+		info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
+		// 5. Error Type
+		info->columns.AddColumn(ColumnDefinition("error_type", enum_type));
+		// 6. Original CSV Line
+		info->columns.AddColumn(ColumnDefinition("csv_line", LogicalType::VARCHAR));
+		// 7. Full Error Message
+		info->columns.AddColumn(ColumnDefinition("error_message", LogicalType::VARCHAR));
+		catalog.CreateTable(context, std::move(info));
+	}
+
 	count = 0;
 }
 
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index 1b5d33f4df5b..2c691aa8bd21 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -53,19 +53,18 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	options.FromNamedParameters(input.named_parameters, context, return_types, names);
 
 	// Validate rejects_table options
-	if (!options.rejects_table_name.empty()) {
-		if (!options.ignore_errors) {
+	if (options.store_rejects) {
+		if (!options.ignore_errors.GetValue() && options.ignore_errors.IsSetByUser()) {
 			throw BinderException("REJECTS_TABLE option is only supported when IGNORE_ERRORS is set to true");
 		}
+		// Ensure we set ignore errors to true automagically
+		options.ignore_errors.Set(true, false);
 		if (options.file_options.union_by_name) {
 			throw BinderException("REJECTS_TABLE option is not supported when UNION_BY_NAME is set to true");
 		}
 	}
-
-	if (options.rejects_limit != 0) {
-		if (options.rejects_table_name.empty()) {
-			throw BinderException("REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name");
-		}
+	if (options.rejects_limit != 0 && !options.store_rejects) {
+		throw BinderException("REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name");
 	}
 
 	options.file_options.AutoDetectHivePartitioning(result->files, context);
@@ -146,9 +145,8 @@ static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &con
 	auto &bind_data = input.bind_data->Cast<ReadCSVData>();
 
 	// Create the temporary rejects table
-	auto rejects_table = bind_data.options.rejects_table_name;
-	if (!rejects_table.empty()) {
-		CSVRejectsTable::GetOrCreate(context, rejects_table)->InitializeTable(context, bind_data);
+	if (bind_data.options.store_rejects) {
+		CSVRejectsTable::GetOrCreate(context)->InitializeTable(context, bind_data);
 	}
 	if (bind_data.files.empty()) {
 		// This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
@@ -228,7 +226,7 @@ void ReadCSVTableFunction::ReadCSVAddNamedParameters(TableFunction &table_functi
 	table_function.named_parameters["max_line_size"] = LogicalType::VARCHAR;
 	table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
 	table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
-	table_function.named_parameters["rejects_table"] = LogicalType::VARCHAR;
+	table_function.named_parameters["store_rejects"] = LogicalType::BOOLEAN;
 	table_function.named_parameters["rejects_limit"] = LogicalType::BIGINT;
 	table_function.named_parameters["buffer_size"] = LogicalType::UBIGINT;
 	table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp
index 8c13e2c9f15f..57386f857963 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp
@@ -73,7 +73,7 @@ struct CSVOption {
 		return value != other;
 	}
 	//! Returns CSV Option value
-	const T GetValue() const {
+	inline const T GetValue() const {
 		return value;
 	}
 	bool IsSetByUser() const {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
index ee06436ed9d6..436802909c82 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
@@ -40,9 +40,9 @@ struct CSVReaderOptions {
 	//! See struct above.
 	DialectOptions dialect_options;
 	//! Whether or not we should ignore InvalidInput errors
-	bool ignore_errors = false;
-	//! Rejects table name
-	string rejects_table_name;
+	CSVOption<bool> ignore_errors = false;
+	//! Whether we store CSV Errors or not
+	bool store_rejects = false;
 	//! Rejects table entry limit (0 = no limit)
 	idx_t rejects_limit = 0;
 	//! Number of samples to buffer
diff --git a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
index 12c9bc61345e..bb4ff62fa4ae 100644
--- a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
+++ b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
@@ -14,14 +14,14 @@ class ClientContext;
 
 class CSVRejectsTable : public ObjectCacheEntry {
 public:
-	CSVRejectsTable(string name) : name(name), count(0) {
+	CSVRejectsTable() : count(0) {
 	}
 	~CSVRejectsTable() override = default;
 	mutex write_lock;
-	string name;
+
 	idx_t count;
 
-	static shared_ptr<CSVRejectsTable> GetOrCreate(ClientContext &context, const string &name);
+	static shared_ptr<CSVRejectsTable> GetOrCreate(ClientContext &context);
 
 	void InitializeTable(ClientContext &context, const ReadCSVData &options);
 	TableCatalogEntry &GetTable(ClientContext &context);
diff --git a/src/include/duckdb/storage/serialization/nodes.json b/src/include/duckdb/storage/serialization/nodes.json
index c601768a93cd..6dfb0b003f15 100644
--- a/src/include/duckdb/storage/serialization/nodes.json
+++ b/src/include/duckdb/storage/serialization/nodes.json
@@ -537,7 +537,7 @@
     "members": [
       {"id": 100,
         "name": "ignore_errors",
-        "type": "bool"
+        "type": "CSVOption<bool>"
       },
       {"id": 101,
         "name": "buffer_sample_size",
@@ -604,8 +604,8 @@
         "type": "vector<bool>"
       },
       {"id": 117,
-        "name": "rejects_table_name",
-        "type": "string"
+        "name": "store_rejects",
+        "type": "bool"
       },
       {"id": 118,
         "name": "rejects_limit",
diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp
index 96b233b8d6ca..c274e2a2b2ac 100644
--- a/src/storage/serialization/serialize_nodes.cpp
+++ b/src/storage/serialization/serialize_nodes.cpp
@@ -118,7 +118,7 @@ CSVOption<T> CSVOption<T>::Deserialize(Deserializer &deserializer) {
 }
 
 void CSVReaderOptions::Serialize(Serializer &serializer) const {
-	serializer.WritePropertyWithDefault<bool>(100, "ignore_errors", ignore_errors);
+	serializer.WriteProperty<CSVOption<bool>>(100, "ignore_errors", ignore_errors);
 	serializer.WritePropertyWithDefault<idx_t>(101, "buffer_sample_size", buffer_sample_size);
 	serializer.WritePropertyWithDefault<string>(102, "null_str", null_str);
 	serializer.WriteProperty<FileCompressionType>(103, "compression", compression);
@@ -135,7 +135,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WritePropertyWithDefault<idx_t>(114, "buffer_size", buffer_size);
 	serializer.WriteProperty<MultiFileReaderOptions>(115, "file_options", file_options);
 	serializer.WritePropertyWithDefault<vector<bool>>(116, "force_quote", force_quote);
-	serializer.WritePropertyWithDefault<string>(117, "rejects_table_name", rejects_table_name);
+	serializer.WritePropertyWithDefault<bool>(117, "store_rejects", store_rejects);
 	serializer.WritePropertyWithDefault<idx_t>(118, "rejects_limit", rejects_limit);
 	serializer.WriteProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
 	serializer.WriteProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
@@ -151,7 +151,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 
 CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	CSVReaderOptions result;
-	deserializer.ReadPropertyWithDefault<bool>(100, "ignore_errors", result.ignore_errors);
+	deserializer.ReadProperty<CSVOption<bool>>(100, "ignore_errors", result.ignore_errors);
 	deserializer.ReadPropertyWithDefault<idx_t>(101, "buffer_sample_size", result.buffer_sample_size);
 	deserializer.ReadPropertyWithDefault<string>(102, "null_str", result.null_str);
 	deserializer.ReadProperty<FileCompressionType>(103, "compression", result.compression);
@@ -168,7 +168,7 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	deserializer.ReadPropertyWithDefault<idx_t>(114, "buffer_size", result.buffer_size);
 	deserializer.ReadProperty<MultiFileReaderOptions>(115, "file_options", result.file_options);
 	deserializer.ReadPropertyWithDefault<vector<bool>>(116, "force_quote", result.force_quote);
-	deserializer.ReadPropertyWithDefault<string>(117, "rejects_table_name", result.rejects_table_name);
+	deserializer.ReadPropertyWithDefault<bool>(117, "store_rejects", result.store_rejects);
 	deserializer.ReadPropertyWithDefault<idx_t>(118, "rejects_limit", result.rejects_limit);
 	deserializer.ReadProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
 	deserializer.ReadProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
diff --git a/test/sql/copy/csv/rejects/csv_rejects_double_table.test b/test/sql/copy/csv/rejects/csv_rejects_double_table.test
new file mode 100644
index 000000000000..e69de29bb2d1

From 05bb2db09a4f309d4882335f77271e8cc3afc255 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 11 Mar 2024 15:32:13 +0100
Subject: [PATCH 037/147] More on making options magically work

---
 .../table_function/global_csv_state.cpp       |  4 ++--
 .../csv_scanner/util/csv_reader_options.cpp   |  7 +++++++
 .../operator/persistent/csv_rejects_table.cpp | 12 +++++------
 src/function/table/read_csv.cpp               | 21 +++++++++++++------
 .../csv_scanner/csv_reader_options.hpp        |  6 ++++--
 .../operator/persistent/csv_rejects_table.hpp | 15 ++++++++++---
 .../duckdb/storage/serialization/nodes.json   |  6 +++++-
 src/storage/serialization/serialize_nodes.cpp |  6 ++++--
 8 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 4446a670e5a5..00ba18b82f00 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -173,9 +173,9 @@ string CSVErrorTypeToEnum(CSVErrorType type) {
 void CSVGlobalState::FillRejectsTable() {
 	auto &options = bind_data.options;
 
-	if (options.store_rejects) {
+	if (options.store_rejects.GetValue()) {
 		auto limit = options.rejects_limit;
-		auto rejects = CSVRejectsTable::GetOrCreate(context);
+		auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_table_name);
 		lock_guard<mutex> lock(rejects->write_lock);
 		auto &table = rejects->GetTable(context);
 		InternalAppender appender(context, table);
diff --git a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
index 849c0e97ec52..7fcb7f3383b1 100644
--- a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
@@ -208,6 +208,13 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 		allow_quoted_nulls = ParseBoolean(value, loption);
 	} else if (loption == "store_rejects") {
 		store_rejects = ParseBoolean(value, loption);
+	} else if (loption == "rejects_table") {
+		// skip, handled in SetRejectsOptions
+		auto table_name = ParseString(value, loption);
+		if (table_name.empty()) {
+			throw BinderException("REJECTS_TABLE option cannot be empty");
+		}
+		rejects_table_name = table_name;
 	} else if (loption == "rejects_limit") {
 		int64_t limit = ParseInteger(value, loption);
 		if (limit < 0) {
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 939ce125f39a..7d8094659377 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -9,14 +9,14 @@ namespace duckdb {
 
 TableCatalogEntry &CSVRejectsTable::GetTable(ClientContext &context) {
 	auto &temp_catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
-	auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, "reject_scans");
+	auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, errors_table);
 	return table_entry;
 }
 
-shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context) {
-	auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY";
+shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &name) {
+	auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(name);
 	auto &cache = ObjectCache::GetObjectCache(context);
-	return cache.GetOrCreate<CSVRejectsTable>(key);
+	return cache.GetOrCreate<CSVRejectsTable>(key, name);
 }
 
 void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData &data) {
@@ -40,7 +40,7 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 
 	// Create Rejects Scans Table
 	{
-		auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, "reject_scans");
+		auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, scan_table);
 		info->temporary = true;
 		info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
 		// 0. Scan ID
@@ -73,7 +73,7 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 	}
 	{
 		// Create Rejects Error Table
-		auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, "reject_errors");
+		auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, errors_table);
 		info->temporary = true;
 		info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
 		// 1. Row Line
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index 2c691aa8bd21..b7f865fc718e 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -51,11 +51,18 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	result->files = MultiFileReader::GetFileList(context, input.inputs[0], "CSV");
 
 	options.FromNamedParameters(input.named_parameters, context, return_types, names);
-
+	if (!options.rejects_table_name.empty() && !options.store_rejects.GetValue() &&
+	    options.store_rejects.IsSetByUser()) {
+		throw BinderException(
+		    "rejects_table_name option is only supported when store_rejects is not manually set to false");
+	}
+	// Ensure we set ignore errors to true automagically
+	options.store_rejects.Set(true, false);
 	// Validate rejects_table options
-	if (options.store_rejects) {
+	if (options.store_rejects.GetValue()) {
 		if (!options.ignore_errors.GetValue() && options.ignore_errors.IsSetByUser()) {
-			throw BinderException("REJECTS_TABLE option is only supported when IGNORE_ERRORS is set to true");
+			throw BinderException(
+			    "STORE_REJECTS option is only supported when IGNORE_ERRORS is not manually set to false");
 		}
 		// Ensure we set ignore errors to true automagically
 		options.ignore_errors.Set(true, false);
@@ -63,7 +70,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 			throw BinderException("REJECTS_TABLE option is not supported when UNION_BY_NAME is set to true");
 		}
 	}
-	if (options.rejects_limit != 0 && !options.store_rejects) {
+	if (options.rejects_limit != 0 && !options.store_rejects.GetValue()) {
 		throw BinderException("REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name");
 	}
 
@@ -145,8 +152,9 @@ static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &con
 	auto &bind_data = input.bind_data->Cast<ReadCSVData>();
 
 	// Create the temporary rejects table
-	if (bind_data.options.store_rejects) {
-		CSVRejectsTable::GetOrCreate(context)->InitializeTable(context, bind_data);
+	if (bind_data.options.store_rejects.GetValue()) {
+		CSVRejectsTable::GetOrCreate(context, bind_data.options.rejects_table_name)
+		    ->InitializeTable(context, bind_data);
 	}
 	if (bind_data.files.empty()) {
 		// This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
@@ -227,6 +235,7 @@ void ReadCSVTableFunction::ReadCSVAddNamedParameters(TableFunction &table_functi
 	table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
 	table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
 	table_function.named_parameters["store_rejects"] = LogicalType::BOOLEAN;
+	table_function.named_parameters["rejects_table"] = LogicalType::VARCHAR;
 	table_function.named_parameters["rejects_limit"] = LogicalType::BIGINT;
 	table_function.named_parameters["buffer_size"] = LogicalType::UBIGINT;
 	table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
index 436802909c82..a7db5aeb06f4 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
@@ -41,8 +41,10 @@ struct CSVReaderOptions {
 	DialectOptions dialect_options;
 	//! Whether or not we should ignore InvalidInput errors
 	CSVOption<bool> ignore_errors = false;
-	//! Whether we store CSV Errors or not
-	bool store_rejects = false;
+	//! Whether we store CSV Errors in the rejects table or not
+	CSVOption<bool> store_rejects = false;
+	//! Rejects table name
+	string rejects_table_name;
 	//! Rejects table entry limit (0 = no limit)
 	idx_t rejects_limit = 0;
 	//! Number of samples to buffer
diff --git a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
index bb4ff62fa4ae..f88eff8028ea 100644
--- a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
+++ b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
@@ -14,14 +14,23 @@ class ClientContext;
 
 class CSVRejectsTable : public ObjectCacheEntry {
 public:
-	CSVRejectsTable() : count(0) {
+	CSVRejectsTable(string name) : name(name), count(0) {
+		if (name.empty()) {
+			scan_table = "reject_scan";
+			errors_table = "reject_errors";
+		} else {
+			scan_table = name + "_scan";
+			errors_table = name;
+		}
 	}
 	~CSVRejectsTable() override = default;
 	mutex write_lock;
-
+	string name;
 	idx_t count;
+	string scan_table;
+	string errors_table;
 
-	static shared_ptr<CSVRejectsTable> GetOrCreate(ClientContext &context);
+	static shared_ptr<CSVRejectsTable> GetOrCreate(ClientContext &context, const string &name);
 
 	void InitializeTable(ClientContext &context, const ReadCSVData &options);
 	TableCatalogEntry &GetTable(ClientContext &context);
diff --git a/src/include/duckdb/storage/serialization/nodes.json b/src/include/duckdb/storage/serialization/nodes.json
index 6dfb0b003f15..39961131cad2 100644
--- a/src/include/duckdb/storage/serialization/nodes.json
+++ b/src/include/duckdb/storage/serialization/nodes.json
@@ -605,7 +605,7 @@
       },
       {"id": 117,
         "name": "store_rejects",
-        "type": "bool"
+        "type": "CSVOption<bool>"
       },
       {"id": 118,
         "name": "rejects_limit",
@@ -650,6 +650,10 @@
       {"id": 128,
         "name": "parallel",
         "type": "bool"
+      },
+      {"id": 129,
+        "name": "rejects_table_name",
+        "type": "string"
       }
     ],
     "pointer_type": "none"
diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp
index c274e2a2b2ac..b7f0d3078810 100644
--- a/src/storage/serialization/serialize_nodes.cpp
+++ b/src/storage/serialization/serialize_nodes.cpp
@@ -135,7 +135,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WritePropertyWithDefault<idx_t>(114, "buffer_size", buffer_size);
 	serializer.WriteProperty<MultiFileReaderOptions>(115, "file_options", file_options);
 	serializer.WritePropertyWithDefault<vector<bool>>(116, "force_quote", force_quote);
-	serializer.WritePropertyWithDefault<bool>(117, "store_rejects", store_rejects);
+	serializer.WriteProperty<CSVOption<bool>>(117, "store_rejects", store_rejects);
 	serializer.WritePropertyWithDefault<idx_t>(118, "rejects_limit", rejects_limit);
 	serializer.WriteProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
 	serializer.WriteProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
@@ -147,6 +147,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WriteProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", dialect_options.date_format);
 	serializer.WritePropertyWithDefault<string>(127, "sniffer_user_mismatch_error", sniffer_user_mismatch_error);
 	serializer.WritePropertyWithDefault<bool>(128, "parallel", parallel);
+	serializer.WritePropertyWithDefault<string>(129, "rejects_table_name", rejects_table_name);
 }
 
 CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
@@ -168,7 +169,7 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	deserializer.ReadPropertyWithDefault<idx_t>(114, "buffer_size", result.buffer_size);
 	deserializer.ReadProperty<MultiFileReaderOptions>(115, "file_options", result.file_options);
 	deserializer.ReadPropertyWithDefault<vector<bool>>(116, "force_quote", result.force_quote);
-	deserializer.ReadPropertyWithDefault<bool>(117, "store_rejects", result.store_rejects);
+	deserializer.ReadProperty<CSVOption<bool>>(117, "store_rejects", result.store_rejects);
 	deserializer.ReadPropertyWithDefault<idx_t>(118, "rejects_limit", result.rejects_limit);
 	deserializer.ReadProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
 	deserializer.ReadProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
@@ -180,6 +181,7 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	deserializer.ReadProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", result.dialect_options.date_format);
 	deserializer.ReadPropertyWithDefault<string>(127, "sniffer_user_mismatch_error", result.sniffer_user_mismatch_error);
 	deserializer.ReadPropertyWithDefault<bool>(128, "parallel", result.parallel);
+	deserializer.ReadPropertyWithDefault<string>(129, "rejects_table_name", result.rejects_table_name);
 	return result;
 }
 

From 779ab7f36f9088d54e1f1662ac03daded8d3ed26 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 11 Mar 2024 15:51:26 +0100
Subject: [PATCH 038/147] got rejects table right

---
 .../table_function/global_csv_state.cpp       | 53 +++++++++++--------
 .../operator/persistent/csv_rejects_table.cpp | 52 +++++++++++-------
 .../operator/persistent/csv_rejects_table.hpp |  3 +-
 3 files changed, 64 insertions(+), 44 deletions(-)

diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 00ba18b82f00..917a581bcbcc 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -177,11 +177,16 @@ void CSVGlobalState::FillRejectsTable() {
 		auto limit = options.rejects_limit;
 		auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_table_name);
 		lock_guard<mutex> lock(rejects->write_lock);
-		auto &table = rejects->GetTable(context);
-		InternalAppender appender(context, table);
+		auto &errors_table = rejects->GetErrorsTable(context);
+		auto &scans_table = rejects->GetScansTable(context);
+		InternalAppender errors_appender(context, errors_table);
+		InternalAppender scans_appender(context, scans_table);
+		idx_t scan_id = context.transaction.GetActiveQuery();
+		idx_t file_id = 0;
 		for (auto &file : file_scans) {
 			auto file_name = file->file_path;
 			auto &errors = file->error_handler->errors;
+			// We first insert the file into the file scans table
 			for (auto &error_vector : errors) {
 				for (auto &error : error_vector.second) {
 					if (!IsCSVErrorAcceptedReject(error.type)) {
@@ -197,36 +202,38 @@ void CSVGlobalState::FillRejectsTable() {
 						auto row_line = file->error_handler->GetLine(error.error_info);
 						auto col_idx = error.column_idx;
 						// Add the row to the rejects table
-						appender.BeginRow();
-						// 1. File Path
-						appender.Append(string_t(file_name));
-						// 2. Row Line
-						appender.Append(row_line);
-						// 3. Byte Position where error occurred
-						appender.Append(error.byte_position);
-						// 4. Column Index
-						appender.Append(col_idx + 1);
-						// 5. Column Name (If Applicable)
+						errors_appender.BeginRow();
+						// 1. Scan Id
+						errors_appender.Append(scan_id);
+						// 2. File Id
+						errors_appender.Append(file_id);
+						// 3. Row Line
+						errors_appender.Append(row_line);
+						// 4. Byte Position where error occurred
+						errors_appender.Append(error.byte_position);
+						// 5. Column Index
+						errors_appender.Append(col_idx + 1);
+						// 6. Column Name (If Applicable)
 						switch (error.type) {
 						case CSVErrorType::TOO_MANY_COLUMNS:
-							appender.Append(Value());
+							errors_appender.Append(Value());
 							break;
 						case CSVErrorType::TOO_FEW_COLUMNS:
 							D_ASSERT(bind_data.return_names.size() > col_idx + 1);
-							appender.Append(string_t("\"" + bind_data.return_names[col_idx + 1] + "\""));
+							errors_appender.Append(string_t("\"" + bind_data.return_names[col_idx + 1] + "\""));
 							break;
 						default:
-							appender.Append(string_t("\"" + bind_data.return_names[col_idx] + "\""));
+							errors_appender.Append(string_t("\"" + bind_data.return_names[col_idx] + "\""));
 						}
-						// 6. Error Type
-						appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
-						// 7. Original CSV Line
-						appender.Append(string_t(error.csv_row));
-						// 8. Full Error Message
-						appender.Append(string_t(error.error_message));
-						appender.EndRow();
+						// 7. Error Type
+						errors_appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
+						// 8. Original CSV Line
+						errors_appender.Append(string_t(error.csv_row));
+						// 9. Full Error Message
+						errors_appender.Append(string_t(error.error_message));
+						errors_appender.EndRow();
 					}
-					appender.Close();
+					errors_appender.Close();
 				}
 			}
 		}
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 7d8094659377..50ddcedc7b54 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -7,12 +7,18 @@
 
 namespace duckdb {
 
-TableCatalogEntry &CSVRejectsTable::GetTable(ClientContext &context) {
+TableCatalogEntry &CSVRejectsTable::GetErrorsTable(ClientContext &context) {
 	auto &temp_catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
 	auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, errors_table);
 	return table_entry;
 }
 
+TableCatalogEntry &CSVRejectsTable::GetScansTable(ClientContext &context) {
+	auto &temp_catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
+	auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, scan_table);
+	return table_entry;
+}
+
 shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &name) {
 	auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(name);
 	auto &cache = ObjectCache::GetObjectCache(context);
@@ -45,29 +51,31 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 		info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
 		// 0. Scan ID
 		info->columns.AddColumn(ColumnDefinition("scan_id", LogicalType::UBIGINT));
-		// 1. File Path
+		// 1. File ID (within the scan)
+		info->columns.AddColumn(ColumnDefinition("file_id", LogicalType::UBIGINT));
+		// 2. File Path
 		info->columns.AddColumn(ColumnDefinition("file_path", LogicalType::VARCHAR));
-		// 2. Delimiter
+		// 3. Delimiter
 		info->columns.AddColumn(ColumnDefinition("delimiter", LogicalType::VARCHAR));
-		// 3. Quote
+		// 4. Quote
 		info->columns.AddColumn(ColumnDefinition("quote", LogicalType::VARCHAR));
-		// 4. Escape
+		// 5. Escape
 		info->columns.AddColumn(ColumnDefinition("escape", LogicalType::VARCHAR));
-		// 5. NewLine Delimiter
+		// 6. NewLine Delimiter
 		info->columns.AddColumn(ColumnDefinition("newline_delimiter", LogicalType::VARCHAR));
-		// 6. Skip Rows
+		// 7. Skip Rows
 		info->columns.AddColumn(ColumnDefinition("skip_rows", LogicalType::UINTEGER));
-		// 7. Has Header
+		// 8. Has Header
 		info->columns.AddColumn(ColumnDefinition("has_header", LogicalType::BOOLEAN));
-		// 8. List<Struct<Column-Name:Types>>
+		// 9. List<Struct<Column-Name:Types>>
 		info->columns.AddColumn(ColumnDefinition("columns", LogicalType::VARCHAR));
-		// 9. Date Format
+		// 10. Date Format
 		info->columns.AddColumn(ColumnDefinition("date_format", LogicalType::VARCHAR));
-		// 10. Timestamp Format
+		// 11. Timestamp Format
 		info->columns.AddColumn(ColumnDefinition("timestamp_format", LogicalType::VARCHAR));
-		// 11. CSV read function with all the options used
-		info->columns.AddColumn(ColumnDefinition("user_arguments", LogicalType::VARCHAR));
 		// 12. CSV read function with all the options used
+		info->columns.AddColumn(ColumnDefinition("user_arguments", LogicalType::VARCHAR));
+		// 13. CSV read function with all the options used
 		info->columns.AddColumn(ColumnDefinition("prompt", LogicalType::VARCHAR));
 		catalog.CreateTable(context, std::move(info));
 	}
@@ -76,19 +84,23 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 		auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, errors_table);
 		info->temporary = true;
 		info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
-		// 1. Row Line
+		// 0. Scan ID
+		info->columns.AddColumn(ColumnDefinition("scan_id", LogicalType::UBIGINT));
+		// 1. File ID (within the scan)
+		info->columns.AddColumn(ColumnDefinition("file_id", LogicalType::UBIGINT));
+		// 2. Row Line
 		info->columns.AddColumn(ColumnDefinition("line", LogicalType::UBIGINT));
-		// 2. Byte Position where error occurred
+		// 3. Byte Position where error occurred
 		info->columns.AddColumn(ColumnDefinition("byte_position", LogicalType::UBIGINT));
-		// 3. Column Index (If Applicable)
+		// 4. Column Index (If Applicable)
 		info->columns.AddColumn(ColumnDefinition("column_idx", LogicalType::UBIGINT));
-		// 4. Column Name (If Applicable)
+		// 5. Column Name (If Applicable)
 		info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
-		// 5. Error Type
+		// 6. Error Type
 		info->columns.AddColumn(ColumnDefinition("error_type", enum_type));
-		// 6. Original CSV Line
+		// 7. Original CSV Line
 		info->columns.AddColumn(ColumnDefinition("csv_line", LogicalType::VARCHAR));
-		// 7. Full Error Message
+		// 8. Full Error Message
 		info->columns.AddColumn(ColumnDefinition("error_message", LogicalType::VARCHAR));
 		catalog.CreateTable(context, std::move(info));
 	}
diff --git a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
index f88eff8028ea..2a17f0b61851 100644
--- a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
+++ b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
@@ -33,7 +33,8 @@ class CSVRejectsTable : public ObjectCacheEntry {
 	static shared_ptr<CSVRejectsTable> GetOrCreate(ClientContext &context, const string &name);
 
 	void InitializeTable(ClientContext &context, const ReadCSVData &options);
-	TableCatalogEntry &GetTable(ClientContext &context);
+	TableCatalogEntry &GetErrorsTable(ClientContext &context);
+	TableCatalogEntry &GetScansTable(ClientContext &context);
 
 public:
 	static string ObjectType() {

From 77bfe80f6124e7a950f8b95363e19e7002e84480 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 11 Mar 2024 16:25:31 +0100
Subject: [PATCH 039/147] First version of rejects_scans and reject_errors
 tables

---
 .../table_function/global_csv_state.cpp       | 76 +++++++++++++++++--
 .../operator/persistent/csv_rejects_table.cpp |  2 -
 src/function/table/sniff_csv.cpp              | 14 +---
 .../csv_scanner/csv_reader_options.hpp        | 11 +++
 .../operator/persistent/csv_rejects_table.hpp |  2 +-
 .../csv/rejects/csv_rejects_double_table.test | 34 +++++++++
 6 files changed, 117 insertions(+), 22 deletions(-)

diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 917a581bcbcc..b1fef47e4d1b 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -170,6 +170,65 @@ string CSVErrorTypeToEnum(CSVErrorType type) {
 	}
 }
 
+void FillScanErrorTable(InternalAppender &scan_appender, idx_t scan_idx, idx_t file_idx, CSVFileScan &file) {
+	CSVReaderOptions &options = file.options;
+	// Add the row to the rejects table
+	scan_appender.BeginRow();
+	// 1. Scan Idx
+	scan_appender.Append(scan_idx);
+	// 2. File Idx
+	scan_appender.Append(file_idx);
+	// 3. File Path
+	scan_appender.Append(string_t(file.file_path));
+	// 4. Delimiter
+	scan_appender.Append(string_t(options.dialect_options.state_machine_options.delimiter.FormatValue()));
+	// 5. Quote
+	scan_appender.Append(string_t(options.dialect_options.state_machine_options.quote.FormatValue()));
+	// 6. Escape
+	scan_appender.Append(string_t(options.dialect_options.state_machine_options.escape.FormatValue()));
+	// 7. NewLine Delimiter
+	scan_appender.Append(string_t(options.NewLineIdentifierToString()));
+	// 8. Skip Rows
+	scan_appender.Append(Value::UINTEGER(NumericCast<uint32_t>(options.dialect_options.skip_rows.GetValue())));
+	// 9. Has Header
+	scan_appender.Append(Value::BOOLEAN(options.dialect_options.header.GetValue()));
+	// 10. List<Struct<Column-Name:Types>> {'col1': 'INTEGER', 'col2': 'VARCHAR'}
+	std::ostringstream columns;
+	columns << "{";
+	for (idx_t i = 0; i < file.types.size(); i++) {
+		columns << "'" << file.names[i] << "': '" << file.types[i].ToString() << "'";
+		if (i != file.types.size() - 1) {
+			columns << ",";
+		}
+	}
+	columns << "}";
+	scan_appender.Append(string_t(columns.str()));
+	// 11. Date Format
+	auto date_format = options.dialect_options.date_format[LogicalType::DATE].GetValue();
+	if (!date_format.Empty()) {
+		scan_appender.Append(string_t(date_format.format_specifier));
+	} else {
+		scan_appender.Append(Value());
+	}
+
+	// 12. Timestamp Format
+	auto timestamp_format = options.dialect_options.date_format[LogicalType::TIMESTAMP].GetValue();
+	if (!timestamp_format.Empty()) {
+		scan_appender.Append(string_t(timestamp_format.format_specifier));
+	} else {
+		scan_appender.Append(Value());
+	}
+
+	// 13. The Extra User Arguments
+	if (options.user_defined_parameters.empty()) {
+		scan_appender.Append(Value());
+	} else {
+		scan_appender.Append(string_t(options.user_defined_parameters));
+	}
+	// Finish the row to the rejects table
+	scan_appender.EndRow();
+}
+
 void CSVGlobalState::FillRejectsTable() {
 	auto &options = bind_data.options;
 
@@ -181,8 +240,8 @@ void CSVGlobalState::FillRejectsTable() {
 		auto &scans_table = rejects->GetScansTable(context);
 		InternalAppender errors_appender(context, errors_table);
 		InternalAppender scans_appender(context, scans_table);
-		idx_t scan_id = context.transaction.GetActiveQuery();
-		idx_t file_id = 0;
+		idx_t scan_idx = context.transaction.GetActiveQuery();
+		idx_t file_idx = 0;
 		for (auto &file : file_scans) {
 			auto file_name = file->file_path;
 			auto &errors = file->error_handler->errors;
@@ -190,7 +249,6 @@ void CSVGlobalState::FillRejectsTable() {
 			for (auto &error_vector : errors) {
 				for (auto &error : error_vector.second) {
 					if (!IsCSVErrorAcceptedReject(error.type)) {
-						// For now, we only will use it for casting errors
 						continue;
 					}
 					// short circuit if we already have too many rejects
@@ -204,9 +262,9 @@ void CSVGlobalState::FillRejectsTable() {
 						// Add the row to the rejects table
 						errors_appender.BeginRow();
 						// 1. Scan Id
-						errors_appender.Append(scan_id);
+						errors_appender.Append(scan_idx);
 						// 2. File Id
-						errors_appender.Append(file_id);
+						errors_appender.Append(file_idx);
 						// 3. Row Line
 						errors_appender.Append(row_line);
 						// 4. Byte Position where error occurred
@@ -233,10 +291,16 @@ void CSVGlobalState::FillRejectsTable() {
 						errors_appender.Append(string_t(error.error_message));
 						errors_appender.EndRow();
 					}
-					errors_appender.Close();
 				}
 			}
+			if (rejects->count != 0) {
+				rejects->count = 0;
+				FillScanErrorTable(scans_appender, scan_idx, file_idx, *file);
+			}
+			file_idx++;
 		}
+		errors_appender.Close();
+		scans_appender.Close();
 	}
 }
 
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 50ddcedc7b54..e74a7806a4ef 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -75,8 +75,6 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 		info->columns.AddColumn(ColumnDefinition("timestamp_format", LogicalType::VARCHAR));
 		// 12. CSV read function with all the options used
 		info->columns.AddColumn(ColumnDefinition("user_arguments", LogicalType::VARCHAR));
-		// 13. CSV read function with all the options used
-		info->columns.AddColumn(ColumnDefinition("prompt", LogicalType::VARCHAR));
 		catalog.CreateTable(context, std::move(info));
 	}
 	{
diff --git a/src/function/table/sniff_csv.cpp b/src/function/table/sniff_csv.cpp
index f135b15c615d..28f248d4f459 100644
--- a/src/function/table/sniff_csv.cpp
+++ b/src/function/table/sniff_csv.cpp
@@ -83,17 +83,6 @@ static unique_ptr<FunctionData> CSVSniffBind(ClientContext &context, TableFuncti
 	return std::move(result);
 }
 
-string NewLineIdentifierToString(NewLineIdentifier identifier) {
-	switch (identifier) {
-	case NewLineIdentifier::SINGLE:
-		return "\\n";
-	case NewLineIdentifier::CARRY_ON:
-		return "\\r\\n";
-	default:
-		return "";
-	}
-}
-
 string FormatOptions(char opt) {
 	if (opt == '\'') {
 		return "''";
@@ -138,8 +127,7 @@ static void CSVSniffFunction(ClientContext &context, TableFunctionInput &data_p,
 	str_opt = sniffer_options.dialect_options.state_machine_options.escape.GetValue();
 	output.SetValue(2, 0, str_opt);
 	// 4. NewLine Delimiter
-	auto new_line_identifier =
-	    NewLineIdentifierToString(sniffer_options.dialect_options.state_machine_options.new_line.GetValue());
+	auto new_line_identifier = sniffer_options.NewLineIdentifierToString();
 	output.SetValue(3, 0, new_line_identifier);
 	// 5. Skip Rows
 	output.SetValue(4, 0, Value::UINTEGER(NumericCast<uint32_t>(sniffer_options.dialect_options.skip_rows.GetValue())));
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
index a7db5aeb06f4..faabfe62f23e 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
@@ -157,5 +157,16 @@ struct CSVReaderOptions {
 	                         vector<string> &names);
 
 	string ToString() const;
+
+	string NewLineIdentifierToString() {
+		switch (dialect_options.state_machine_options.new_line.GetValue()) {
+		case NewLineIdentifier::SINGLE:
+			return "\\n";
+		case NewLineIdentifier::CARRY_ON:
+			return "\\r\\n";
+		default:
+			return "";
+		}
+	}
 };
 } // namespace duckdb
diff --git a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
index 2a17f0b61851..6254d7cecc01 100644
--- a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
+++ b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
@@ -16,7 +16,7 @@ class CSVRejectsTable : public ObjectCacheEntry {
 public:
 	CSVRejectsTable(string name) : name(name), count(0) {
 		if (name.empty()) {
-			scan_table = "reject_scan";
+			scan_table = "reject_scans";
 			errors_table = "reject_errors";
 		} else {
 			scan_table = name + "_scan";
diff --git a/test/sql/copy/csv/rejects/csv_rejects_double_table.test b/test/sql/copy/csv/rejects/csv_rejects_double_table.test
index e69de29bb2d1..1d82bc77e2b8 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_double_table.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_double_table.test
@@ -0,0 +1,34 @@
+# name: test/sql/copy/csv/rejects/csv_rejects_double_table.test
+# group: [rejects]
+
+require skip_reload
+
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
+# Ensure that we can get the schema if we reduce the sample size and ignore errors
+query IIIII
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    store_rejects=true,
+    ignore_errors=true);
+----
+BIGINT	VARCHAR	11044	11044	2
+
+
+query IIIIIIIIIIIII
+SELECT *
+FROM reject_scans order by all;
+----
+3	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	ignore_errors=true, store_rejects=true, sample_size=1
+3	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	ignore_errors=true, store_rejects=true, sample_size=1
+
+query IIIIIIIII
+SELECT *
+FROM reject_errors order by all;
+----
+3	0	2176	10875	1	"column0"	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	0	4176	20875	1	"column0"	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+3	1	3680	18395	1	"column0"	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	1	5680	28395	1	"column0"	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
\ No newline at end of file

From b6479d951ff573130543153374669038716e3c1c Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 11 Mar 2024 17:20:51 +0100
Subject: [PATCH 040/147] More adjustments

---
 .../table_function/global_csv_state.cpp           |  4 ++--
 .../csv/rejects/csv_rejects_double_table.test     | 15 +++++++--------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index b1fef47e4d1b..e59d12963348 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -278,10 +278,10 @@ void CSVGlobalState::FillRejectsTable() {
 							break;
 						case CSVErrorType::TOO_FEW_COLUMNS:
 							D_ASSERT(bind_data.return_names.size() > col_idx + 1);
-							errors_appender.Append(string_t("\"" + bind_data.return_names[col_idx + 1] + "\""));
+							errors_appender.Append(string_t(bind_data.return_names[col_idx + 1]));
 							break;
 						default:
-							errors_appender.Append(string_t("\"" + bind_data.return_names[col_idx] + "\""));
+							errors_appender.Append(string_t(bind_data.return_names[col_idx]));
 						}
 						// 7. Error Type
 						errors_appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
diff --git a/test/sql/copy/csv/rejects/csv_rejects_double_table.test b/test/sql/copy/csv/rejects/csv_rejects_double_table.test
index 1d82bc77e2b8..d2714bd3be55 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_double_table.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_double_table.test
@@ -11,8 +11,7 @@ query IIIII
 SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
     'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
     sample_size=1,
-    store_rejects=true,
-    ignore_errors=true);
+    store_rejects=true);
 ----
 BIGINT	VARCHAR	11044	11044	2
 
@@ -21,14 +20,14 @@ query IIIIIIIIIIIII
 SELECT *
 FROM reject_scans order by all;
 ----
-3	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	ignore_errors=true, store_rejects=true, sample_size=1
-3	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	ignore_errors=true, store_rejects=true, sample_size=1
+3	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
+3	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
 
 query IIIIIIIII
 SELECT *
 FROM reject_errors order by all;
 ----
-3	0	2176	10875	1	"column0"	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	0	4176	20875	1	"column0"	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-3	1	3680	18395	1	"column0"	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	1	5680	28395	1	"column0"	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
\ No newline at end of file
+3	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+3	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
\ No newline at end of file

From 34db0eee2f02b69e9d518bbeb9f7482593898c54 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 12 Mar 2024 15:03:42 +0100
Subject: [PATCH 041/147] Alright lets have different options for different
 tables

---
 .../table_function/global_csv_state.cpp         |  3 ++-
 .../csv_scanner/util/csv_reader_options.cpp     |  9 ++++++++-
 .../operator/persistent/csv_rejects_table.cpp   |  8 +++++---
 src/function/table/read_csv.cpp                 | 17 +++++++++++++----
 .../operator/csv_scanner/csv_reader_options.hpp |  6 ++++--
 .../operator/persistent/csv_rejects_table.hpp   | 13 ++++---------
 .../duckdb/storage/serialization/nodes.json     |  6 +++++-
 src/storage/serialization/serialize_nodes.cpp   |  6 ++++--
 8 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index e59d12963348..ae2fddf9df0b 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -234,7 +234,8 @@ void CSVGlobalState::FillRejectsTable() {
 
 	if (options.store_rejects.GetValue()) {
 		auto limit = options.rejects_limit;
-		auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_table_name);
+		auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_scan_name.GetValue(),
+		                                            options.rejects_table_name.GetValue());
 		lock_guard<mutex> lock(rejects->write_lock);
 		auto &errors_table = rejects->GetErrorsTable(context);
 		auto &scans_table = rejects->GetScansTable(context);
diff --git a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
index 7fcb7f3383b1..b06f58779328 100644
--- a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
@@ -214,7 +214,14 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 		if (table_name.empty()) {
 			throw BinderException("REJECTS_TABLE option cannot be empty");
 		}
-		rejects_table_name = table_name;
+		rejects_table_name.Set(table_name);
+	} else if (loption == "rejects_scan") {
+		// skip, handled in SetRejectsOptions
+		auto table_name = ParseString(value, loption);
+		if (table_name.empty()) {
+			throw BinderException("rejects_scan option cannot be empty");
+		}
+		rejects_scan_name.Set(table_name);
 	} else if (loption == "rejects_limit") {
 		int64_t limit = ParseInteger(value, loption);
 		if (limit < 0) {
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index e74a7806a4ef..f9672135e4e9 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -19,10 +19,12 @@ TableCatalogEntry &CSVRejectsTable::GetScansTable(ClientContext &context) {
 	return table_entry;
 }
 
-shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &name) {
-	auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(name);
+shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &rejects_scan,
+                                                         const string &rejects_error) {
+	auto key =
+	    "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(rejects_scan) + "_" + StringUtil::Upper(rejects_error);
 	auto &cache = ObjectCache::GetObjectCache(context);
-	return cache.GetOrCreate<CSVRejectsTable>(key, name);
+	return cache.GetOrCreate<CSVRejectsTable>(key, rejects_scan, rejects_error);
 }
 
 void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData &data) {
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index b7f865fc718e..258d69d3871a 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -51,13 +51,20 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	result->files = MultiFileReader::GetFileList(context, input.inputs[0], "CSV");
 
 	options.FromNamedParameters(input.named_parameters, context, return_types, names);
-	if (!options.rejects_table_name.empty() && !options.store_rejects.GetValue() &&
+	if (options.rejects_table_name.IsSetByUser() && !options.store_rejects.GetValue() &&
 	    options.store_rejects.IsSetByUser()) {
 		throw BinderException(
 		    "rejects_table_name option is only supported when store_rejects is not manually set to false");
 	}
-	// Ensure we set ignore errors to true automagically
-	options.store_rejects.Set(true, false);
+	if (options.rejects_scan_name.IsSetByUser() && !options.store_rejects.GetValue() &&
+	    options.store_rejects.IsSetByUser()) {
+		throw BinderException(
+		    "rejects_scan_name option is only supported when store_rejects is not manually set to false");
+	}
+	if (options.rejects_scan_name.IsSetByUser() || options.rejects_table_name.IsSetByUser()) {
+		// Ensure we set store_rejects to true automagically
+		options.store_rejects.Set(true, false);
+	}
 	// Validate rejects_table options
 	if (options.store_rejects.GetValue()) {
 		if (!options.ignore_errors.GetValue() && options.ignore_errors.IsSetByUser()) {
@@ -153,7 +160,8 @@ static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &con
 
 	// Create the temporary rejects table
 	if (bind_data.options.store_rejects.GetValue()) {
-		CSVRejectsTable::GetOrCreate(context, bind_data.options.rejects_table_name)
+		CSVRejectsTable::GetOrCreate(context, bind_data.options.rejects_scan_name.GetValue(),
+		                             bind_data.options.rejects_table_name.GetValue())
 		    ->InitializeTable(context, bind_data);
 	}
 	if (bind_data.files.empty()) {
@@ -236,6 +244,7 @@ void ReadCSVTableFunction::ReadCSVAddNamedParameters(TableFunction &table_functi
 	table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
 	table_function.named_parameters["store_rejects"] = LogicalType::BOOLEAN;
 	table_function.named_parameters["rejects_table"] = LogicalType::VARCHAR;
+	table_function.named_parameters["rejects_scan"] = LogicalType::VARCHAR;
 	table_function.named_parameters["rejects_limit"] = LogicalType::BIGINT;
 	table_function.named_parameters["buffer_size"] = LogicalType::UBIGINT;
 	table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
index faabfe62f23e..4b69d9aad222 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
@@ -43,8 +43,10 @@ struct CSVReaderOptions {
 	CSVOption<bool> ignore_errors = false;
 	//! Whether we store CSV Errors in the rejects table or not
 	CSVOption<bool> store_rejects = false;
-	//! Rejects table name
-	string rejects_table_name;
+	//! Rejects table name (Name of the table the store rejects errors)
+	CSVOption<string> rejects_table_name = {"reject_errors"};
+	//! Rejects Scan name name  (Name of the table the store rejects scans)
+	CSVOption<string> rejects_scan_name = {"reject_scans"};
 	//! Rejects table entry limit (0 = no limit)
 	idx_t rejects_limit = 0;
 	//! Number of samples to buffer
diff --git a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
index 6254d7cecc01..ee1d2092660b 100644
--- a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
+++ b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
@@ -14,14 +14,8 @@ class ClientContext;
 
 class CSVRejectsTable : public ObjectCacheEntry {
 public:
-	CSVRejectsTable(string name) : name(name), count(0) {
-		if (name.empty()) {
-			scan_table = "reject_scans";
-			errors_table = "reject_errors";
-		} else {
-			scan_table = name + "_scan";
-			errors_table = name;
-		}
+	CSVRejectsTable(string rejects_scan, string rejects_error)
+	    : count(0), scan_table(rejects_scan), errors_table(rejects_error) {
 	}
 	~CSVRejectsTable() override = default;
 	mutex write_lock;
@@ -30,7 +24,8 @@ class CSVRejectsTable : public ObjectCacheEntry {
 	string scan_table;
 	string errors_table;
 
-	static shared_ptr<CSVRejectsTable> GetOrCreate(ClientContext &context, const string &name);
+	static shared_ptr<CSVRejectsTable> GetOrCreate(ClientContext &context, const string &rejects_scan,
+	                                               const string &rejects_error);
 
 	void InitializeTable(ClientContext &context, const ReadCSVData &options);
 	TableCatalogEntry &GetErrorsTable(ClientContext &context);
diff --git a/src/include/duckdb/storage/serialization/nodes.json b/src/include/duckdb/storage/serialization/nodes.json
index 39961131cad2..e3bec298114c 100644
--- a/src/include/duckdb/storage/serialization/nodes.json
+++ b/src/include/duckdb/storage/serialization/nodes.json
@@ -653,7 +653,11 @@
       },
       {"id": 129,
         "name": "rejects_table_name",
-        "type": "string"
+        "type": "CSVOption<string>"
+      },
+      {"id": 130,
+        "name": "rejects_scan_name",
+        "type": "CSVOption<string>"
       }
     ],
     "pointer_type": "none"
diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp
index b7f0d3078810..bc9ee449a8d5 100644
--- a/src/storage/serialization/serialize_nodes.cpp
+++ b/src/storage/serialization/serialize_nodes.cpp
@@ -147,7 +147,8 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WriteProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", dialect_options.date_format);
 	serializer.WritePropertyWithDefault<string>(127, "sniffer_user_mismatch_error", sniffer_user_mismatch_error);
 	serializer.WritePropertyWithDefault<bool>(128, "parallel", parallel);
-	serializer.WritePropertyWithDefault<string>(129, "rejects_table_name", rejects_table_name);
+	serializer.WriteProperty<CSVOption<string>>(129, "rejects_table_name", rejects_table_name);
+	serializer.WriteProperty<CSVOption<string>>(130, "rejects_scan_name", rejects_scan_name);
 }
 
 CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
@@ -181,7 +182,8 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	deserializer.ReadProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", result.dialect_options.date_format);
 	deserializer.ReadPropertyWithDefault<string>(127, "sniffer_user_mismatch_error", result.sniffer_user_mismatch_error);
 	deserializer.ReadPropertyWithDefault<bool>(128, "parallel", result.parallel);
-	deserializer.ReadPropertyWithDefault<string>(129, "rejects_table_name", result.rejects_table_name);
+	deserializer.ReadProperty<CSVOption<string>>(129, "rejects_table_name", result.rejects_table_name);
+	deserializer.ReadProperty<CSVOption<string>>(130, "rejects_scan_name", result.rejects_scan_name);
 	return result;
 }
 

From d877701dd895d2809c029cb1c32232ff43f12017 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 12 Mar 2024 17:43:15 +0100
Subject: [PATCH 042/147] Lots more tests

---
 src/catalog/catalog.cpp                       |  11 +
 .../csv_scanner/util/csv_reader_options.cpp   |   2 +-
 .../operator/persistent/csv_rejects_table.cpp |  18 +-
 src/function/table/read_csv.cpp               |   6 +-
 src/include/duckdb/catalog/catalog.hpp        |   3 +
 .../csv/rejects/csv_rejects_double_table.test |  33 ---
 .../csv/rejects/csv_rejects_two_tables.test   | 241 ++++++++++++++++++
 7 files changed, 274 insertions(+), 40 deletions(-)
 delete mode 100644 test/sql/copy/csv/rejects/csv_rejects_double_table.test
 create mode 100644 test/sql/copy/csv/rejects/csv_rejects_two_tables.test

diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp
index 7294427a2942..1674128837f3 100644
--- a/src/catalog/catalog.cpp
+++ b/src/catalog/catalog.cpp
@@ -758,6 +758,17 @@ CatalogEntry &Catalog::GetEntry(ClientContext &context, const string &schema, co
 	throw CatalogException("CatalogElement \"%s.%s\" does not exist!", schema, name);
 }
 
+bool Catalog::EntryExists(ClientContext &context, const string &schema, const string &name) {
+	vector<CatalogType> entry_types {CatalogType::TABLE_ENTRY, CatalogType::SEQUENCE_ENTRY};
+	for (auto entry_type : entry_types) {
+		auto result = GetEntry(context, entry_type, schema, name, OnEntryNotFound::RETURN_NULL);
+		if (result) {
+			return true;
+		}
+	}
+	return false;
+}
+
 optional_ptr<CatalogEntry> Catalog::GetEntry(ClientContext &context, CatalogType type, const string &schema_name,
                                              const string &name, OnEntryNotFound if_not_found,
                                              QueryErrorContext error_context) {
diff --git a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
index b06f58779328..9ea7bb80992d 100644
--- a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
@@ -207,7 +207,7 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 	} else if (loption == "allow_quoted_nulls") {
 		allow_quoted_nulls = ParseBoolean(value, loption);
 	} else if (loption == "store_rejects") {
-		store_rejects = ParseBoolean(value, loption);
+		store_rejects.Set(ParseBoolean(value, loption));
 	} else if (loption == "rejects_table") {
 		// skip, handled in SetRejectsOptions
 		auto table_name = ParseString(value, loption);
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index f9672135e4e9..429d385553e2 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -24,6 +24,20 @@ shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context,
 	auto key =
 	    "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(rejects_scan) + "_" + StringUtil::Upper(rejects_error);
 	auto &cache = ObjectCache::GetObjectCache(context);
+	auto &catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
+	bool rejects_scan_exist = catalog.EntryExists(context, DEFAULT_SCHEMA, rejects_scan);
+	bool rejects_error_exist = catalog.EntryExists(context, DEFAULT_SCHEMA, rejects_error);
+	if ((rejects_scan_exist || rejects_error_exist) && !cache.Get<CSVRejectsTable>(key)) {
+		std::ostringstream error;
+		if (rejects_scan_exist) {
+			error << "Reject Scan Table name \"" << rejects_scan << "\" is already in use. ";
+		}
+		if (rejects_error_exist) {
+			error << "Reject Error Table name \"" << rejects_error << "\" is already in use. ";
+		}
+		error << "Either drop the used name(s), or give other name options in the CSV Reader function.\n";
+		throw BinderException(error.str());
+	}
 	return cache.GetOrCreate<CSVRejectsTable>(key, rejects_scan, rejects_error);
 }
 
@@ -50,7 +64,7 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 	{
 		auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, scan_table);
 		info->temporary = true;
-		info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
+		info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
 		// 0. Scan ID
 		info->columns.AddColumn(ColumnDefinition("scan_id", LogicalType::UBIGINT));
 		// 1. File ID (within the scan)
@@ -83,7 +97,7 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 		// Create Rejects Error Table
 		auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, errors_table);
 		info->temporary = true;
-		info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
+		info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
 		// 0. Scan ID
 		info->columns.AddColumn(ColumnDefinition("scan_id", LogicalType::UBIGINT));
 		// 1. File ID (within the scan)
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index 258d69d3871a..60d942b3c8a7 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -53,13 +53,11 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	options.FromNamedParameters(input.named_parameters, context, return_types, names);
 	if (options.rejects_table_name.IsSetByUser() && !options.store_rejects.GetValue() &&
 	    options.store_rejects.IsSetByUser()) {
-		throw BinderException(
-		    "rejects_table_name option is only supported when store_rejects is not manually set to false");
+		throw BinderException("REJECTS_TABLE option is only supported when store_rejects is not manually set to false");
 	}
 	if (options.rejects_scan_name.IsSetByUser() && !options.store_rejects.GetValue() &&
 	    options.store_rejects.IsSetByUser()) {
-		throw BinderException(
-		    "rejects_scan_name option is only supported when store_rejects is not manually set to false");
+		throw BinderException("REJECTS_SCAN option is only supported when store_rejects is not manually set to false");
 	}
 	if (options.rejects_scan_name.IsSetByUser() || options.rejects_table_name.IsSetByUser()) {
 		// Ensure we set store_rejects to true automagically
diff --git a/src/include/duckdb/catalog/catalog.hpp b/src/include/duckdb/catalog/catalog.hpp
index 0bbf322c628b..ead5f183c75b 100644
--- a/src/include/duckdb/catalog/catalog.hpp
+++ b/src/include/duckdb/catalog/catalog.hpp
@@ -228,6 +228,9 @@ class Catalog {
 	//! Gets the "schema.name" entry without a specified type, if entry does not exist an exception is thrown
 	DUCKDB_API CatalogEntry &GetEntry(ClientContext &context, const string &schema, const string &name);
 
+	//! Returns true if the "schema.name" entry without a specified type exists
+	DUCKDB_API bool EntryExists(ClientContext &context, const string &schema, const string &name);
+
 	//! Fetches a logical type from the catalog
 	DUCKDB_API LogicalType GetType(ClientContext &context, const string &schema, const string &names,
 	                               OnEntryNotFound if_not_found);
diff --git a/test/sql/copy/csv/rejects/csv_rejects_double_table.test b/test/sql/copy/csv/rejects/csv_rejects_double_table.test
deleted file mode 100644
index d2714bd3be55..000000000000
--- a/test/sql/copy/csv/rejects/csv_rejects_double_table.test
+++ /dev/null
@@ -1,33 +0,0 @@
-# name: test/sql/copy/csv/rejects/csv_rejects_double_table.test
-# group: [rejects]
-
-require skip_reload
-
-# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
-require notwindows
-
-# Ensure that we can get the schema if we reduce the sample size and ignore errors
-query IIIII
-SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
-    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
-    sample_size=1,
-    store_rejects=true);
-----
-BIGINT	VARCHAR	11044	11044	2
-
-
-query IIIIIIIIIIIII
-SELECT *
-FROM reject_scans order by all;
-----
-3	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
-3	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
-
-query IIIIIIIII
-SELECT *
-FROM reject_errors order by all;
-----
-3	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-3	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
\ No newline at end of file
diff --git a/test/sql/copy/csv/rejects/csv_rejects_two_tables.test b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
new file mode 100644
index 000000000000..e9ad454f6052
--- /dev/null
+++ b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
@@ -0,0 +1,241 @@
+# name: test/sql/copy/csv/rejects/csv_rejects_two_tables.test
+# group: [rejects]
+
+require skip_reload
+
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
+# Ensure that we can get the schema if we reduce the sample size and ignore errors
+query IIIII
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    store_rejects=true);
+----
+BIGINT	VARCHAR	11044	11044	2
+
+
+query IIIIIIIIIIIII
+SELECT *
+FROM reject_scans order by all;
+----
+3	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
+3	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
+
+query IIIIIIIII
+SELECT *
+FROM reject_errors order by all;
+----
+3	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+3	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+
+# Test giving the name of errors table
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_table = 'rejects_errors_2');
+----
+Reject Scan Table name "reject_scans" is already in use. Either drop the used name(s), or give other name options in the CSV Reader function.
+
+statement ok
+drop table reject_scans;
+
+query IIIII
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_table = 'rejects_errors_2'
+    );
+----
+BIGINT	VARCHAR	11044	11044	2
+
+query IIIIIIIIIIIII
+SELECT *
+FROM reject_scans order by all;
+----
+8	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_2', sample_size=1
+8	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_2', sample_size=1
+
+query IIIIIIIII
+SELECT *
+FROM rejects_errors_2 order by all;
+----
+8	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+8	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+8	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+8	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+
+statement ok
+drop table reject_errors;
+
+# Test giving the name of scans table
+query IIIII
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_scan = 'rejects_scan_2');
+----
+BIGINT	VARCHAR	11044	11044	2
+
+query IIIIIIIIIIIII
+SELECT *
+FROM rejects_scan_2 order by all;
+----
+12	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_2', sample_size=1
+12	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_2', sample_size=1
+
+query IIIIIIIII
+SELECT *
+FROM reject_errors order by all;
+----
+12	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+12	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+12	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+12	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+
+
+# Test giving the name of both tables
+query IIIII
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_scan = 'rejects_scan_3',
+     rejects_table = 'rejects_errors_3'
+    );
+----
+BIGINT	VARCHAR	11044	11044	2
+
+query IIIIIIIIIIIII
+SELECT *
+FROM rejects_scan_3 order by all;
+----
+15	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_3', rejects_scan='rejects_scan_3', sample_size=1
+15	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_3', rejects_scan='rejects_scan_3', sample_size=1
+
+query IIIIIIIII
+SELECT *
+FROM rejects_errors_3 order by all;
+----
+15	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+15	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+15	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+15	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+
+statement ok
+drop table reject_errors;
+
+statement ok
+drop table reject_scans;
+
+
+# Test giving the name of an existing table to the errors table
+statement ok
+create temporary table t (a integer);
+
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_table = 't'
+    );
+----
+Reject Error Table name "t" is already in use. Either drop the used name(s), or give other name options in the CSV Reader function.
+
+# Test giving the name of an existing table to the scans table
+
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_scan = 't'
+    );
+----
+Reject Scan Table name "t" is already in use. Either drop the used name(s), or give other name options in the CSV Reader function.
+
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_table = 't',
+    rejects_scan = 't'
+    );
+----
+Reject Scan Table name "t" is already in use. Reject Error Table name "t" is already in use. Either drop the used name(s), or give other name options in the CSV Reader function.
+
+
+# Test giving the name of the tables with store_rejects and/or ignore_errors set to false throws
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_scan = 'rejects_scan_3',
+     rejects_table = 'rejects_errors_3',
+     ignore_errors = false
+    );
+----
+STORE_REJECTS option is only supported when IGNORE_ERRORS is not manually set to false
+
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    store_rejects = true,
+     ignore_errors = false
+    );
+----
+STORE_REJECTS option is only supported when IGNORE_ERRORS is not manually set to false
+
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+     rejects_table = 'rejects_errors_3',
+     ignore_errors = false
+    );
+----
+STORE_REJECTS option is only supported when IGNORE_ERRORS is not manually set to false
+
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_scan = 'rejects_scan_3',
+     ignore_errors = false
+    );
+----
+STORE_REJECTS option is only supported when IGNORE_ERRORS is not manually set to false
+
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_scan = 'rejects_scan_3',
+     rejects_table = 'rejects_errors_3',
+     store_rejects = false
+    );
+----
+REJECTS_TABLE option is only supported when store_rejects is not manually set to false
+
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+     rejects_table = 'rejects_errors_3',
+     store_rejects = false
+    );
+----
+REJECTS_TABLE option is only supported when store_rejects is not manually set to false
+
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_scan = 'rejects_scan_3',
+     store_rejects = false
+    );
+----
+REJECTS_SCAN option is only supported when store_rejects is not manually set to false
\ No newline at end of file

From baca88167e12a3233aaa7aaee83a2b7ff1339c9e Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 13 Mar 2024 14:34:14 +0100
Subject: [PATCH 043/147] Lots of adjustments to make if possible to have
 cur_pos the exact place where an error happened and to produce multiple
 errors in the same row

---
 .../scanner/string_value_scanner.cpp          | 121 +++++++++---------
 .../csv_scanner/string_value_scanner.hpp      |  28 ++--
 2 files changed, 70 insertions(+), 79 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index ae9d470a32ce..277924d8c01a 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -111,10 +111,6 @@ inline bool IsValueNull(const char *null_str_ptr, const char *value_ptr, const i
 }
 
 void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size, bool allocate) {
-	if (current_error.is_set) {
-		cur_col_id++;
-		return;
-	}
 	if (cur_col_id >= number_of_columns) {
 		bool error = true;
 		if (cur_col_id == number_of_columns && ((quoted && state_machine.options.allow_quoted_nulls) || !quoted)) {
@@ -122,7 +118,9 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 			error = !IsValueNull(null_str_ptr, value_ptr, size);
 		}
 		if (error) {
-			current_error = {CSVErrorType::TOO_MANY_COLUMNS, cur_col_id};
+			// We error pointing to the current value error.
+			current_errors.push_back(
+			    {CSVErrorType::TOO_MANY_COLUMNS, cur_col_id, {iterator.pos.buffer_idx, last_position, buffer_size}});
 		}
 		return;
 	}
@@ -143,7 +141,9 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 				if (empty) {
 					if (parse_types[chunk_col_id].first != LogicalTypeId::VARCHAR) {
 						// If it is not a varchar, empty values are not accepted, we must error.
-						cast_errors[chunk_col_id] = std::string("");
+						current_errors.push_back({CSVErrorType::CAST_ERROR,
+						                          cur_col_id,
+						                          {iterator.pos.buffer_idx, last_position, buffer_size}});
 					}
 					static_cast<string_t *>(vector_ptr[chunk_col_id])[number_of_rows] = string_t();
 				} else {
@@ -225,7 +225,8 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 				HandleUnicodeError(cur_col_id, force_error);
 			}
 			// If we got here, we are ingoring errors, hence we must ignore this line.
-			current_error = {CSVErrorType::INVALID_UNICODE, cur_col_id};
+			current_errors.push_back(
+			    {CSVErrorType::INVALID_UNICODE, cur_col_id, {iterator.pos.buffer_idx, last_position, buffer_size}});
 			break;
 		}
 		if (allocate) {
@@ -241,7 +242,13 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 	}
 	if (!success) {
 		// We had a casting error, we push it here because we can only error when finishing the line read.
-		cast_errors[cur_col_id] = std::string(value_ptr, size);
+		std::ostringstream error;
+		// Casting Error Message
+		error << "Could not convert string \"" << std::string(value_ptr, size) << "\" to \'"
+		      << LogicalTypeIdToString(parse_types[cur_col_id].first) << "\'";
+		current_errors.push_back(
+		    {CSVErrorType::INVALID_UNICODE, cur_col_id, {iterator.pos.buffer_idx, last_position, buffer_size}});
+		current_errors.back().error_message = error.str();
 	}
 	cur_col_id++;
 	chunk_col_id++;
@@ -282,7 +289,7 @@ void StringValueResult::Reset() {
 	if (cur_buffer) {
 		buffer_handles[cur_buffer->buffer_idx] = cur_buffer;
 	}
-	current_error.Reset();
+	current_errors.clear();
 }
 
 void StringValueResult::AddQuotedValue(StringValueResult &result, const idx_t buffer_pos) {
@@ -327,16 +334,6 @@ void StringValueResult::AddValue(StringValueResult &result, const idx_t buffer_p
 	result.last_position = buffer_pos + 1;
 }
 
-void StringValueResult::HandleOverLimitRows(idx_t col_idx) {
-	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
-	bool first_nl;
-	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
-	auto csv_error =
-	    CSVError::IncorrectColumnAmountError(state_machine.options, col_idx, lines_per_batch, borked_line,
-	                                         current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
-	error_handler.Error(csv_error);
-}
-
 void StringValueResult::HandleUnicodeError(idx_t col_idx, bool force_error) {
 	bool first_nl;
 	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
@@ -351,35 +348,50 @@ void StringValueResult::HandleUnicodeError(idx_t col_idx, bool force_error) {
 	error_handler.Error(csv_error, force_error);
 }
 
-void StringValueResult::HandleUnterminatedQuotes(idx_t col_idx, bool force_error) {
-	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
-	bool first_nl;
-	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
-	auto csv_error =
-	    CSVError::UnterminatedQuotesError(state_machine.options, col_idx, lines_per_batch, borked_line,
-	                                      current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
-	error_handler.Error(csv_error, force_error);
-}
-
 bool StringValueResult::HandleError() {
-	if (current_error.is_set) {
-		switch (current_error.type) {
+	// Reconstruct CSV Line
+	for (auto &cur_error : current_errors) {
+		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
+		bool first_nl;
+		auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
+		CSVError csv_error;
+		auto col_idx = cur_error.col_idx;
+		auto &line_pos = cur_error.error_position;
+
+		switch (cur_error.type) {
 		case CSVErrorType::TOO_MANY_COLUMNS:
-			HandleOverLimitRows(cur_col_id);
+			csv_error =
+			    CSVError::IncorrectColumnAmountError(state_machine.options, col_idx, lines_per_batch, borked_line,
+			                                         line_pos.GetGlobalPosition(requested_size, first_nl));
 			break;
-		case CSVErrorType::INVALID_UNICODE:
-			HandleUnicodeError(current_error.col_idx);
+		case CSVErrorType::INVALID_UNICODE: {
+			// We have to sanitize the CSV line
+			std::vector<char> char_array(borked_line.begin(), borked_line.end());
+			char_array.push_back('\0'); // Null-terminate the character array
+			Utf8Proc::MakeValid(&char_array[0], char_array.size());
+			borked_line = {char_array.begin(), char_array.end() - 1};
+			csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
+			                                  line_pos.GetGlobalPosition(requested_size, first_nl));
 			break;
+		}
 		case CSVErrorType::UNTERMINATED_QUOTES:
-			HandleUnterminatedQuotes(current_error.col_idx);
+			csv_error = CSVError::UnterminatedQuotesError(state_machine.options, col_idx, lines_per_batch, borked_line,
+			                                              line_pos.GetGlobalPosition(requested_size, first_nl));
+			break;
+		case CSVErrorType::CAST_ERROR:
+			csv_error = CSVError::CastError(state_machine.options, names[cur_error.col_idx], cur_error.error_message,
+			                                cur_error.col_idx, borked_line, lines_per_batch,
+			                                current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 			break;
 		default:
 			throw InvalidInputException("CSV Error not allowed when inserting row");
 		}
+		error_handler.Error(csv_error);
+	}
+	if (!current_errors.empty()) {
+		current_errors.clear();
 		cur_col_id = 0;
 		chunk_col_id = 0;
-		// An error occurred on this row, we are ignoring it and resetting our control flag
-		current_error.Reset();
 		return true;
 	}
 	return false;
@@ -455,30 +467,6 @@ bool StringValueResult::AddRowInternal() {
 	if (HandleError()) {
 		return false;
 	}
-	if (!cast_errors.empty()) {
-		// A wild casting error appears
-		for (auto &cast_error : cast_errors) {
-			std::ostringstream error;
-			// Casting Error Message
-			error << "Could not convert string \"" << cast_error.second << "\" to \'"
-			      << LogicalTypeIdToString(parse_types[cast_error.first].first) << "\'";
-			auto error_string = error.str();
-			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
-			bool first_nl;
-			auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
-			auto csv_error = CSVError::CastError(
-			    state_machine.options, names[cast_error.first], error_string, cast_error.first, borked_line,
-			    lines_per_batch, current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
-			error_handler.Error(csv_error);
-		}
-		// If we got here it means we are ignoring errors, hence we need to signify to our result scanner to ignore this
-		// row
-		// Cleanup this line and continue
-		cast_errors.clear();
-		cur_col_id = 0;
-		chunk_col_id = 0;
-		return false;
-	}
 	NullPaddingQuotedNewlineCheck();
 	quoted_new_line = false;
 	// We need to check if we are getting the correct number of columns here.
@@ -559,7 +547,9 @@ void StringValueResult::InvalidState(StringValueResult &result) {
 	if (force_error) {
 		result.HandleUnicodeError(result.cur_col_id, force_error);
 	}
-	result.current_error = {CSVErrorType::UNTERMINATED_QUOTES, result.cur_col_id};
+	result.current_errors.push_back({CSVErrorType::INVALID_UNICODE,
+	                                 result.cur_col_id,
+	                                 {result.iterator.pos.buffer_idx, result.last_position, result.buffer_size}});
 }
 
 bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_pos) {
@@ -1229,9 +1219,12 @@ void StringValueScanner::FinalizeChunkProcess() {
 	// If we are not done we have two options.
 	// 1) If a boundary is set.
 	if (iterator.IsBoundarySet()) {
-		if (!(result.current_error == CSVErrorType::UNTERMINATED_QUOTES)) {
-			iterator.done = true;
+		for (auto &cur_error : result.current_errors) {
+			if (!(cur_error == CSVErrorType::UNTERMINATED_QUOTES)) {
+				iterator.done = true;
+			}
 		}
+
 		// We read until the next line or until we have nothing else to read.
 		// Move to next buffer
 		if (!cur_buffer_handle) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index e36266d90f69..4a54bc7dcccc 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -64,16 +64,17 @@ class FullLinePosition {
 
 class CurrentError {
 public:
-	CurrentError() : is_set(false) {};
-	CurrentError(CSVErrorType type, idx_t col_idx_p) : is_set(true), type(type), col_idx(col_idx_p) {};
-	void Reset() {
-		is_set = false;
-	}
-	bool is_set;
+	CurrentError(CSVErrorType type, idx_t col_idx_p, LinePosition error_position_p)
+	    : type(type), col_idx(col_idx_p), error_position(error_position_p) {};
+
 	CSVErrorType type;
 	idx_t col_idx;
+	string error_message;
+	//! Exact Position where the error happened
+	LinePosition error_position;
+
 	friend bool operator==(const CurrentError &error, CSVErrorType other) {
-		return error.is_set && error.type == other;
+		return error.type == other;
 	}
 };
 
@@ -81,8 +82,8 @@ class StringValueResult : public ScannerResult {
 public:
 	StringValueResult(CSVStates &states, CSVStateMachine &state_machine,
 	                  const shared_ptr<CSVBufferHandle> &buffer_handle, Allocator &buffer_allocator, idx_t result_size,
-	                  idx_t buffer_position, CSVErrorHandler &error_hander, CSVIterator &iterator, bool store_line_size,
-	                  shared_ptr<CSVFileScan> csv_file_scan, idx_t &lines_read, bool sniffing);
+	                  idx_t buffer_position, CSVErrorHandler &error_handler, CSVIterator &iterator,
+	                  bool store_line_size, shared_ptr<CSVFileScan> csv_file_scan, idx_t &lines_read, bool sniffing);
 
 	~StringValueResult();
 
@@ -120,7 +121,6 @@ class StringValueResult : public ScannerResult {
 
 	unsafe_unique_array<std::pair<LogicalTypeId, bool>> parse_types;
 	vector<string> names;
-	unordered_map<idx_t, string> cast_errors;
 
 	shared_ptr<CSVFileScan> csv_file_scan;
 	idx_t &lines_read;
@@ -135,8 +135,8 @@ class StringValueResult : public ScannerResult {
 	//! Requested size of buffers (i.e., either 32Mb or set by buffer_size parameter)
 	idx_t requested_size;
 
-	//! Current Error if any
-	CurrentError current_error;
+	//! Errors happening in the current line (if any)
+	vector<CurrentError> current_errors;
 
 	bool sniffing;
 	//! Specialized code for quoted values, makes sure to remove quotes and escapes
@@ -153,10 +153,8 @@ class StringValueResult : public ScannerResult {
 	//! Handles EmptyLine states
 	static inline bool EmptyLine(StringValueResult &result, const idx_t buffer_pos);
 	inline bool AddRowInternal();
-
-	void HandleOverLimitRows(idx_t col_idx);
 	void HandleUnicodeError(idx_t col_idx, bool force_error = false);
-	void HandleUnterminatedQuotes(idx_t col_idx, bool force_error = false);
+	//! Certain errors should only be handled when adding the line, to ensure proper error propagation.
 	bool HandleError();
 
 	inline void AddValueToVector(const char *value_ptr, const idx_t size, bool allocate = false);

From 2fe296f606f1543c152d493a0449a27cc6672025 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 13 Mar 2024 14:46:23 +0100
Subject: [PATCH 044/147] More adjustments

---
 .../scanner/string_value_scanner.cpp          | 26 ++++++++++++-------
 .../csv_scanner/string_value_scanner.hpp      |  3 ++-
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 277924d8c01a..144e8ea9d679 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -216,17 +216,17 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 		break;
 	}
 	default: {
-		// By default we add a string
+		// By default, we add a string
 		// We only evaluate if a string is utf8 valid, if it's actually a varchar
 		if (parse_types[chunk_col_id].second && !Utf8Proc::IsValid(value_ptr, UnsafeNumericCast<uint32_t>(size))) {
 			bool force_error = !state_machine.options.ignore_errors.GetValue() && sniffing;
+			LinePosition error_position {iterator.pos.buffer_idx, last_position, buffer_size};
 			// Invalid unicode, we must error
 			if (force_error) {
-				HandleUnicodeError(cur_col_id, force_error);
+				HandleUnicodeError(cur_col_id, error_position);
 			}
 			// If we got here, we are ingoring errors, hence we must ignore this line.
-			current_errors.push_back(
-			    {CSVErrorType::INVALID_UNICODE, cur_col_id, {iterator.pos.buffer_idx, last_position, buffer_size}});
+			current_errors.push_back({CSVErrorType::INVALID_UNICODE, cur_col_id, error_position});
 			break;
 		}
 		if (allocate) {
@@ -334,7 +334,7 @@ void StringValueResult::AddValue(StringValueResult &result, const idx_t buffer_p
 	result.last_position = buffer_pos + 1;
 }
 
-void StringValueResult::HandleUnicodeError(idx_t col_idx, bool force_error) {
+void StringValueResult::HandleUnicodeError(idx_t col_idx, LinePosition &error_position) {
 	bool first_nl;
 	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 	// sanitize borked line
@@ -344,8 +344,8 @@ void StringValueResult::HandleUnicodeError(idx_t col_idx, bool force_error) {
 	borked_line = {char_array.begin(), char_array.end() - 1};
 	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 	auto csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
-	                                       current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
-	error_handler.Error(csv_error, force_error);
+	                                       error_position.GetGlobalPosition(requested_size, first_nl));
+	error_handler.Error(csv_error, true);
 }
 
 bool StringValueResult::HandleError() {
@@ -381,7 +381,7 @@ bool StringValueResult::HandleError() {
 		case CSVErrorType::CAST_ERROR:
 			csv_error = CSVError::CastError(state_machine.options, names[cur_error.col_idx], cur_error.error_message,
 			                                cur_error.col_idx, borked_line, lines_per_batch,
-			                                current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
+			                                line_pos.GetGlobalPosition(requested_size, first_nl));
 			break;
 		default:
 			throw InvalidInputException("CSV Error not allowed when inserting row");
@@ -498,9 +498,10 @@ bool StringValueResult::AddRowInternal() {
 			bool first_nl;
 			auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
+			LinePosition error_position {iterator.pos.buffer_idx, last_position, buffer_size};
 			auto csv_error = CSVError::IncorrectColumnAmountError(
 			    state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
-			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
+			    error_position.GetGlobalPosition(requested_size, first_nl));
 			error_handler.Error(csv_error);
 			// If we are here we ignore_errors, so we delete this line
 			number_of_rows--;
@@ -545,7 +546,8 @@ void StringValueResult::InvalidState(StringValueResult &result) {
 	bool force_error = !result.state_machine.options.ignore_errors.GetValue() && result.sniffing;
 	// Invalid unicode, we must error
 	if (force_error) {
-		result.HandleUnicodeError(result.cur_col_id, force_error);
+		LinePosition error_position {result.iterator.pos.buffer_idx, result.last_position, result.buffer_size};
+		result.HandleUnicodeError(result.cur_col_id, error_position);
 	}
 	result.current_errors.push_back({CSVErrorType::INVALID_UNICODE,
 	                                 result.cur_col_id,
@@ -722,6 +724,8 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 				bool first_nl;
 				auto borked_line =
 				    result.line_positions_per_row[line_error].ReconstructCurrentLine(first_nl, result.buffer_handles);
+				// TODO: We can't really nicely get the position where this error happened, this should be solved by
+				// TODO: adding more types to implicit casting instead of relying on this flush.
 				auto csv_error = CSVError::CastError(
 				    state_machine->options, csv_file_scan->names[col_idx], error_message, col_idx, borked_line,
 				    lines_per_batch,
@@ -744,6 +748,8 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 					bool first_nl;
 					auto borked_line = result.line_positions_per_row[line_error].ReconstructCurrentLine(
 					    first_nl, result.buffer_handles);
+					// TODO: We can't really nicely get the position where this error happened, this should be solved by
+					// TODO: adding more types to implicit casting instead of relying on this flush.
 					auto csv_error =
 					    CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx], error_message,
 					                        col_idx, borked_line, lines_per_batch,
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 4a54bc7dcccc..dfbe1f581bd5 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -153,7 +153,8 @@ class StringValueResult : public ScannerResult {
 	//! Handles EmptyLine states
 	static inline bool EmptyLine(StringValueResult &result, const idx_t buffer_pos);
 	inline bool AddRowInternal();
-	void HandleUnicodeError(idx_t col_idx, bool force_error = false);
+	//! Force the throw of a unicode error
+	void HandleUnicodeError(idx_t col_idx, LinePosition &error_position);
 	//! Certain errors should only be handled when adding the line, to ensure proper error propagation.
 	bool HandleError();
 

From dcdb16a1f276a5babf33aeee1adbd43726f3fed2 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 13 Mar 2024 16:01:24 +0100
Subject: [PATCH 045/147] Add a new return option for start of the line as a
 byte position

---
 .../scanner/string_value_scanner.cpp          | 41 +++++++++++--------
 .../table_function/global_csv_state.cpp       | 22 ++++++----
 .../operator/csv_scanner/util/csv_error.cpp   | 38 +++++++++--------
 .../operator/persistent/csv_rejects_table.cpp | 14 ++++---
 .../operator/csv_scanner/csv_error.hpp        | 19 +++++----
 5 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 144e8ea9d679..103ff6341a65 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -247,7 +247,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 		error << "Could not convert string \"" << std::string(value_ptr, size) << "\" to \'"
 		      << LogicalTypeIdToString(parse_types[cur_col_id].first) << "\'";
 		current_errors.push_back(
-		    {CSVErrorType::INVALID_UNICODE, cur_col_id, {iterator.pos.buffer_idx, last_position, buffer_size}});
+		    {CSVErrorType::CAST_ERROR, cur_col_id, {iterator.pos.buffer_idx, last_position, buffer_size}});
 		current_errors.back().error_message = error.str();
 	}
 	cur_col_id++;
@@ -344,6 +344,7 @@ void StringValueResult::HandleUnicodeError(idx_t col_idx, LinePosition &error_po
 	borked_line = {char_array.begin(), char_array.end() - 1};
 	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 	auto csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
+	                                       current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
 	                                       error_position.GetGlobalPosition(requested_size, first_nl));
 	error_handler.Error(csv_error, true);
 }
@@ -360,9 +361,10 @@ bool StringValueResult::HandleError() {
 
 		switch (cur_error.type) {
 		case CSVErrorType::TOO_MANY_COLUMNS:
-			csv_error =
-			    CSVError::IncorrectColumnAmountError(state_machine.options, col_idx, lines_per_batch, borked_line,
-			                                         line_pos.GetGlobalPosition(requested_size, first_nl));
+			csv_error = CSVError::IncorrectColumnAmountError(
+			    state_machine.options, col_idx, lines_per_batch, borked_line,
+			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+			    line_pos.GetGlobalPosition(requested_size, first_nl));
 			break;
 		case CSVErrorType::INVALID_UNICODE: {
 			// We have to sanitize the CSV line
@@ -371,16 +373,20 @@ bool StringValueResult::HandleError() {
 			Utf8Proc::MakeValid(&char_array[0], char_array.size());
 			borked_line = {char_array.begin(), char_array.end() - 1};
 			csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
+			                                  current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
 			                                  line_pos.GetGlobalPosition(requested_size, first_nl));
 			break;
 		}
 		case CSVErrorType::UNTERMINATED_QUOTES:
-			csv_error = CSVError::UnterminatedQuotesError(state_machine.options, col_idx, lines_per_batch, borked_line,
-			                                              line_pos.GetGlobalPosition(requested_size, first_nl));
+			csv_error = CSVError::UnterminatedQuotesError(
+			    state_machine.options, col_idx, lines_per_batch, borked_line,
+			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+			    line_pos.GetGlobalPosition(requested_size, first_nl));
 			break;
 		case CSVErrorType::CAST_ERROR:
 			csv_error = CSVError::CastError(state_machine.options, names[cur_error.col_idx], cur_error.error_message,
 			                                cur_error.col_idx, borked_line, lines_per_batch,
+			                                current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
 			                                line_pos.GetGlobalPosition(requested_size, first_nl));
 			break;
 		default:
@@ -404,7 +410,7 @@ void StringValueResult::QuotedNewLine(StringValueResult &result) {
 void StringValueResult::NullPaddingQuotedNewlineCheck() {
 	// We do some checks for null_padding correctness
 	if (state_machine.options.null_padding && iterator.IsBoundarySet() && quoted_new_line && iterator.done) {
-		// If we have null_padding set, we found a quoted new line, we are scanning the file in parallel and it's the
+		// If we have null_padding set, we found a quoted new line, we are scanning the file in parallel, and it's the
 		// last row of this thread.
 		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 		auto csv_error = CSVError::NullPaddingFail(state_machine.options, lines_per_batch);
@@ -501,6 +507,7 @@ bool StringValueResult::AddRowInternal() {
 			LinePosition error_position {iterator.pos.buffer_idx, last_position, buffer_size};
 			auto csv_error = CSVError::IncorrectColumnAmountError(
 			    state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
+			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
 			    error_position.GetGlobalPosition(requested_size, first_nl));
 			error_handler.Error(csv_error);
 			// If we are here we ignore_errors, so we delete this line
@@ -549,7 +556,7 @@ void StringValueResult::InvalidState(StringValueResult &result) {
 		LinePosition error_position {result.iterator.pos.buffer_idx, result.last_position, result.buffer_size};
 		result.HandleUnicodeError(result.cur_col_id, error_position);
 	}
-	result.current_errors.push_back({CSVErrorType::INVALID_UNICODE,
+	result.current_errors.push_back({CSVErrorType::UNTERMINATED_QUOTES,
 	                                 result.cur_col_id,
 	                                 {result.iterator.pos.buffer_idx, result.last_position, result.buffer_size}});
 }
@@ -724,12 +731,11 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 				bool first_nl;
 				auto borked_line =
 				    result.line_positions_per_row[line_error].ReconstructCurrentLine(first_nl, result.buffer_handles);
-				// TODO: We can't really nicely get the position where this error happened, this should be solved by
-				// TODO: adding more types to implicit casting instead of relying on this flush.
 				auto csv_error = CSVError::CastError(
 				    state_machine->options, csv_file_scan->names[col_idx], error_message, col_idx, borked_line,
 				    lines_per_batch,
-				    result.line_positions_per_row[line_error].begin.GetGlobalPosition(result.result_size, first_nl));
+				    result.line_positions_per_row[line_error].begin.GetGlobalPosition(result.result_size, first_nl),
+				    -1);
 				error_handler->Error(csv_error);
 			}
 			borked_lines.insert(line_error++);
@@ -748,13 +754,11 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 					bool first_nl;
 					auto borked_line = result.line_positions_per_row[line_error].ReconstructCurrentLine(
 					    first_nl, result.buffer_handles);
-					// TODO: We can't really nicely get the position where this error happened, this should be solved by
-					// TODO: adding more types to implicit casting instead of relying on this flush.
-					auto csv_error =
-					    CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx], error_message,
-					                        col_idx, borked_line, lines_per_batch,
-					                        result.line_positions_per_row[line_error].begin.GetGlobalPosition(
-					                            result.result_size, first_nl));
+					auto csv_error = CSVError::CastError(
+					    state_machine->options, csv_file_scan->names[col_idx], error_message, col_idx, borked_line,
+					    lines_per_batch,
+					    result.line_positions_per_row[line_error].begin.GetGlobalPosition(result.result_size, first_nl),
+					    -1);
 
 					error_handler->Error(csv_error);
 				}
@@ -1249,6 +1253,7 @@ void StringValueScanner::FinalizeChunkProcess() {
 		} else {
 			result.HandleError();
 		}
+		iterator.done = FinishedFile();
 	} else {
 		// 2) If a boundary is not set
 		// We read until the chunk is complete, or we have nothing else to read.
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index ae2fddf9df0b..3f819bbf170c 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -268,11 +268,19 @@ void CSVGlobalState::FillRejectsTable() {
 						errors_appender.Append(file_idx);
 						// 3. Row Line
 						errors_appender.Append(row_line);
-						// 4. Byte Position where error occurred
-						errors_appender.Append(error.byte_position);
-						// 5. Column Index
+						// 4. Byte Position of the row error
+						errors_appender.Append(error.row_byte_position);
+						// 5. Byte Position where error occurred
+						if (error.byte_position == -1) {
+							// This means this error comes from a flush, and we don't support this yet, so we give it
+							// a null
+							errors_appender.Append(Value());
+						} else {
+							errors_appender.Append(error.byte_position);
+						}
+						// 6. Column Index
 						errors_appender.Append(col_idx + 1);
-						// 6. Column Name (If Applicable)
+						// 7. Column Name (If Applicable)
 						switch (error.type) {
 						case CSVErrorType::TOO_MANY_COLUMNS:
 							errors_appender.Append(Value());
@@ -284,11 +292,11 @@ void CSVGlobalState::FillRejectsTable() {
 						default:
 							errors_appender.Append(string_t(bind_data.return_names[col_idx]));
 						}
-						// 7. Error Type
+						// 8. Error Type
 						errors_appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
-						// 8. Original CSV Line
+						// 9. Original CSV Line
 						errors_appender.Append(string_t(error.csv_row));
-						// 9. Full Error Message
+						// 10. Full Error Message
 						errors_appender.Append(string_t(error.error_message));
 						errors_appender.EndRow();
 					}
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index 7a8349288c07..1f93d945fd91 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -82,9 +82,10 @@ CSVError::CSVError(string error_message_p, CSVErrorType type_p, LinesPerBoundary
 }
 
 CSVError::CSVError(string error_message_p, CSVErrorType type_p, idx_t column_idx_p, string csv_row_p,
-                   LinesPerBoundary error_info_p, idx_t byte_position_p, const CSVReaderOptions &reader_options)
+                   LinesPerBoundary error_info_p, idx_t row_byte_position, int64_t byte_position_p,
+                   const CSVReaderOptions &reader_options)
     : error_message(std::move(error_message_p)), type(type_p), column_idx(column_idx_p), csv_row(std::move(csv_row_p)),
-      error_info(error_info_p), byte_position(byte_position_p) {
+      error_info(error_info_p), row_byte_position(row_byte_position), byte_position(byte_position_p) {
 	// What were the options
 	std::ostringstream error;
 	error << error_message << std::endl;
@@ -114,13 +115,15 @@ CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_
 }
 
 CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_name, string &cast_error, idx_t column_idx,
-                             string &csv_row, LinesPerBoundary error_info, idx_t byte_position) {
+                             string &csv_row, LinesPerBoundary error_info, idx_t row_byte_position,
+                             int64_t byte_position) {
 	std::ostringstream error;
 	// Which column
 	error << "Error when converting column \"" << column_name << "\". ";
 	// What was the cast error
 	error << cast_error;
-	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info, byte_position, options);
+	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info, row_byte_position,
+	                byte_position, options);
 }
 
 CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
@@ -128,7 +131,8 @@ CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_s
 	std::ostringstream error;
 	error << "Maximum line size of " << options.maximum_line_size << " bytes exceeded. ";
 	error << "Actual Size:" << actual_size << " bytes.";
-	return CSVError(error.str(), CSVErrorType::MAXIMUM_LINE_SIZE, 0, csv_row, error_info, byte_position, options);
+	return CSVError(error.str(), CSVErrorType::MAXIMUM_LINE_SIZE, 0, csv_row, error_info, byte_position, byte_position,
+	                options);
 }
 
 CSVError CSVError::SniffingError(string &file_path) {
@@ -150,34 +154,36 @@ CSVError CSVError::NullPaddingFail(const CSVReaderOptions &options, LinesPerBoun
 }
 
 CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_t current_column,
-                                           LinesPerBoundary error_info, string &csv_row, idx_t byte_position) {
+                                           LinesPerBoundary error_info, string &csv_row, idx_t row_byte_position,
+                                           int64_t byte_position) {
 	std::ostringstream error;
 	error << "Value with unterminated quote found.";
-	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info, byte_position,
-	                options);
+	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info,
+	                row_byte_position, byte_position, options);
 }
 
 CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, idx_t actual_columns,
-                                              LinesPerBoundary error_info, string &csv_row, idx_t byte_position) {
+                                              LinesPerBoundary error_info, string &csv_row, idx_t row_byte_position,
+                                              int64_t byte_position) {
 	std::ostringstream error;
 	// How many columns were expected and how many were found
 	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns + 1;
 	if (actual_columns >= options.dialect_options.num_cols) {
-		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info, byte_position,
-		                options);
+		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info,
+		                row_byte_position, byte_position, options);
 	} else {
-		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info, byte_position,
-		                options);
+		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info,
+		                row_byte_position, byte_position, options);
 	}
 }
 
 CSVError CSVError::InvalidUTF8(const CSVReaderOptions &options, idx_t current_column, LinesPerBoundary error_info,
-                               string &csv_row, idx_t byte_position) {
+                               string &csv_row, idx_t row_byte_position, int64_t byte_position) {
 	std::ostringstream error;
 	// How many columns were expected and how many were found
 	error << "Invalid unicode (byte sequence mismatch) detected.";
-	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, current_column, csv_row, error_info, byte_position,
-	                options);
+	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, current_column, csv_row, error_info, row_byte_position,
+	                byte_position, options);
 }
 
 bool CSVErrorHandler::PrintLineNumber(CSVError &error) {
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 429d385553e2..31f63d0279b8 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -104,17 +104,19 @@ void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData
 		info->columns.AddColumn(ColumnDefinition("file_id", LogicalType::UBIGINT));
 		// 2. Row Line
 		info->columns.AddColumn(ColumnDefinition("line", LogicalType::UBIGINT));
-		// 3. Byte Position where error occurred
+		// 3. Byte Position of the start of the line
+		info->columns.AddColumn(ColumnDefinition("line_byte_position", LogicalType::UBIGINT));
+		// 4. Byte Position where error occurred
 		info->columns.AddColumn(ColumnDefinition("byte_position", LogicalType::UBIGINT));
-		// 4. Column Index (If Applicable)
+		// 5. Column Index (If Applicable)
 		info->columns.AddColumn(ColumnDefinition("column_idx", LogicalType::UBIGINT));
-		// 5. Column Name (If Applicable)
+		// 6. Column Name (If Applicable)
 		info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
-		// 6. Error Type
+		// 7. Error Type
 		info->columns.AddColumn(ColumnDefinition("error_type", enum_type));
-		// 7. Original CSV Line
+		// 8. Original CSV Line
 		info->columns.AddColumn(ColumnDefinition("csv_line", LogicalType::VARCHAR));
-		// 8. Full Error Message
+		// 9. Full Error Message
 		info->columns.AddColumn(ColumnDefinition("error_message", LogicalType::VARCHAR));
 		catalog.CreateTable(context, std::move(info));
 	}
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index 98f460127d83..d6a6ce7ca657 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -52,13 +52,14 @@ class CSVError {
 public:
 	CSVError() {};
 	CSVError(string error_message, CSVErrorType type, idx_t column_idx, string csv_row, LinesPerBoundary error_info,
-	         idx_t byte_position, const CSVReaderOptions &reader_options);
+	         idx_t row_byte_position, int64_t byte_position, const CSVReaderOptions &reader_options);
 	CSVError(string error_message, CSVErrorType type, LinesPerBoundary error_info);
 	//! Produces error messages for column name -> type mismatch.
 	static CSVError ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names);
 	//! Produces error messages for casting errors
 	static CSVError CastError(const CSVReaderOptions &options, string &column_name, string &cast_error,
-	                          idx_t column_idx, string &csv_row, LinesPerBoundary error_info, idx_t byte_position);
+	                          idx_t column_idx, string &csv_row, LinesPerBoundary error_info, idx_t row_byte_position,
+	                          int64_t byte_position);
 	//! Produces error for when the line size exceeds the maximum line size option
 	static CSVError LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
 	                              string &csv_row, idx_t byte_position);
@@ -66,14 +67,16 @@ class CSVError {
 	static CSVError SniffingError(string &file_path);
 	//! Produces error messages for unterminated quoted values
 	static CSVError UnterminatedQuotesError(const CSVReaderOptions &options, idx_t current_column,
-	                                        LinesPerBoundary error_info, string &csv_row, idx_t byte_position);
+	                                        LinesPerBoundary error_info, string &csv_row, idx_t row_byte_position,
+	                                        int64_t byte_position);
 	//! Produces error messages for null_padding option is set and we have quoted new values in parallel
 	static CSVError NullPaddingFail(const CSVReaderOptions &options, LinesPerBoundary error_info);
 	//! Produces error for incorrect (e.g., smaller and lower than the predefined) number of columns in a CSV Line
 	static CSVError IncorrectColumnAmountError(const CSVReaderOptions &state_machine, idx_t actual_columns,
-	                                           LinesPerBoundary error_info, string &csv_row, idx_t byte_position);
+	                                           LinesPerBoundary error_info, string &csv_row, idx_t row_byte_position,
+	                                           int64_t byte_position);
 	static CSVError InvalidUTF8(const CSVReaderOptions &options, idx_t current_column, LinesPerBoundary error_info,
-	                            string &csv_row, idx_t byte_position);
+	                            string &csv_row, idx_t row_byte_position, int64_t byte_position);
 
 	idx_t GetBoundaryIndex() {
 		return error_info.boundary_idx;
@@ -91,8 +94,10 @@ class CSVError {
 	string csv_row;
 	//! Line information regarding this error
 	LinesPerBoundary error_info;
-	//! Global Byte Position where error occurred.
-	idx_t byte_position;
+	//! Byte position of where the row starts
+	idx_t row_byte_position;
+	//! Byte Position where error occurred.
+	int64_t byte_position;
 };
 
 class CSVErrorHandler {

From 1f4270c335e98c8106de30b5af3b062ba5143ca0 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 13 Mar 2024 16:27:48 +0100
Subject: [PATCH 046/147] More on the byte per row and per value

---
 data/csv/rejects/unquoted/unquoted_last_value.csv             | 2 +-
 .../operator/csv_scanner/scanner/string_value_scanner.cpp     | 4 ++++
 .../operator/csv_scanner/table_function/global_csv_state.cpp  | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/data/csv/rejects/unquoted/unquoted_last_value.csv b/data/csv/rejects/unquoted/unquoted_last_value.csv
index 0d714083e9c8..68dec7d40d9c 100644
--- a/data/csv/rejects/unquoted/unquoted_last_value.csv
+++ b/data/csv/rejects/unquoted/unquoted_last_value.csv
@@ -2,4 +2,4 @@
 "bla"
 "bla"
 "bla"
-"bla
+"bla
\ No newline at end of file
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 103ff6341a65..0b300ee30d00 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -1030,6 +1030,10 @@ bool StringValueScanner::MoveToNextBuffer() {
 				lines_read++;
 			} else if (states.IsQuotedCurrent()) {
 				// Unterminated quote
+				LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos,
+				                                   result.buffer_size};
+				result.current_line_position.begin = result.current_line_position.end;
+				result.current_line_position.end = current_line_start;
 				result.InvalidState(result);
 			} else {
 				result.AddRow(result, previous_buffer_handle->actual_size);
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 3f819bbf170c..94951168ddd2 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -269,7 +269,7 @@ void CSVGlobalState::FillRejectsTable() {
 						// 3. Row Line
 						errors_appender.Append(row_line);
 						// 4. Byte Position of the row error
-						errors_appender.Append(error.row_byte_position);
+						errors_appender.Append(error.row_byte_position + 1);
 						// 5. Byte Position where error occurred
 						if (error.byte_position == -1) {
 							// This means this error comes from a flush, and we don't support this yet, so we give it

From ab2e9b13fb8177869916dd48112fba555bb4c8b4 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 14 Mar 2024 13:42:33 +0100
Subject: [PATCH 047/147] Change last_position to be a LinePosition

---
 .../scanner/string_value_scanner.cpp          | 72 +++++++++----------
 .../csv_scanner/string_value_scanner.hpp      |  2 +-
 2 files changed, 33 insertions(+), 41 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 0b300ee30d00..2b3eaf252a9f 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -28,7 +28,7 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
 	// Buffer Information
 	buffer_ptr = buffer_handle->Ptr();
 	buffer_size = buffer_handle->actual_size;
-	last_position = buffer_position;
+	last_position = {buffer_handle->buffer_idx, buffer_position, buffer_size};
 	requested_size = buffer_handle->requested_size;
 
 	// Current Result information
@@ -119,8 +119,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 		}
 		if (error) {
 			// We error pointing to the current value error.
-			current_errors.push_back(
-			    {CSVErrorType::TOO_MANY_COLUMNS, cur_col_id, {iterator.pos.buffer_idx, last_position, buffer_size}});
+			current_errors.push_back({CSVErrorType::TOO_MANY_COLUMNS, cur_col_id, last_position});
 		}
 		return;
 	}
@@ -141,9 +140,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 				if (empty) {
 					if (parse_types[chunk_col_id].first != LogicalTypeId::VARCHAR) {
 						// If it is not a varchar, empty values are not accepted, we must error.
-						current_errors.push_back({CSVErrorType::CAST_ERROR,
-						                          cur_col_id,
-						                          {iterator.pos.buffer_idx, last_position, buffer_size}});
+						current_errors.push_back({CSVErrorType::CAST_ERROR, cur_col_id, last_position});
 					}
 					static_cast<string_t *>(vector_ptr[chunk_col_id])[number_of_rows] = string_t();
 				} else {
@@ -220,13 +217,12 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 		// We only evaluate if a string is utf8 valid, if it's actually a varchar
 		if (parse_types[chunk_col_id].second && !Utf8Proc::IsValid(value_ptr, UnsafeNumericCast<uint32_t>(size))) {
 			bool force_error = !state_machine.options.ignore_errors.GetValue() && sniffing;
-			LinePosition error_position {iterator.pos.buffer_idx, last_position, buffer_size};
 			// Invalid unicode, we must error
 			if (force_error) {
-				HandleUnicodeError(cur_col_id, error_position);
+				HandleUnicodeError(cur_col_id, last_position);
 			}
 			// If we got here, we are ingoring errors, hence we must ignore this line.
-			current_errors.push_back({CSVErrorType::INVALID_UNICODE, cur_col_id, error_position});
+			current_errors.push_back({CSVErrorType::INVALID_UNICODE, cur_col_id, last_position});
 			break;
 		}
 		if (allocate) {
@@ -246,8 +242,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 		// Casting Error Message
 		error << "Could not convert string \"" << std::string(value_ptr, size) << "\" to \'"
 		      << LogicalTypeIdToString(parse_types[cur_col_id].first) << "\'";
-		current_errors.push_back(
-		    {CSVErrorType::CAST_ERROR, cur_col_id, {iterator.pos.buffer_idx, last_position, buffer_size}});
+		current_errors.push_back({CSVErrorType::CAST_ERROR, cur_col_id, last_position});
 		current_errors.back().error_message = error.str();
 	}
 	cur_col_id++;
@@ -309,7 +304,7 @@ void StringValueResult::AddQuotedValue(StringValueResult &result, const idx_t bu
 		    result.parse_chunk.data[result.chunk_col_id]);
 		result.AddValueToVector(value.GetData(), value.GetSize());
 	} else {
-		if (buffer_pos < result.last_position + 2) {
+		if (buffer_pos < result.last_position.buffer_pos + 2) {
 			// empty value
 			auto value = string_t();
 			result.AddValueToVector(value.GetData(), value.GetSize());
@@ -323,15 +318,16 @@ void StringValueResult::AddQuotedValue(StringValueResult &result, const idx_t bu
 }
 
 void StringValueResult::AddValue(StringValueResult &result, const idx_t buffer_pos) {
-	if (result.last_position > buffer_pos) {
+	if (result.last_position.buffer_pos > buffer_pos) {
 		return;
 	}
 	if (result.quoted) {
 		StringValueResult::AddQuotedValue(result, buffer_pos);
 	} else {
-		result.AddValueToVector(result.buffer_ptr + result.last_position, buffer_pos - result.last_position);
+		result.AddValueToVector(result.buffer_ptr + result.last_position.buffer_pos,
+		                        buffer_pos - result.last_position.buffer_pos);
 	}
-	result.last_position = buffer_pos + 1;
+	result.last_position.buffer_pos = buffer_pos + 1;
 }
 
 void StringValueResult::HandleUnicodeError(idx_t col_idx, LinePosition &error_position) {
@@ -504,11 +500,10 @@ bool StringValueResult::AddRowInternal() {
 			bool first_nl;
 			auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
-			LinePosition error_position {iterator.pos.buffer_idx, last_position, buffer_size};
 			auto csv_error = CSVError::IncorrectColumnAmountError(
 			    state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
 			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
-			    error_position.GetGlobalPosition(requested_size, first_nl));
+			    last_position.GetGlobalPosition(requested_size, first_nl));
 			error_handler.Error(csv_error);
 			// If we are here we ignore_errors, so we delete this line
 			number_of_rows--;
@@ -526,22 +521,23 @@ bool StringValueResult::AddRowInternal() {
 }
 
 bool StringValueResult::AddRow(StringValueResult &result, const idx_t buffer_pos) {
-	if (result.last_position <= buffer_pos) {
+	if (result.last_position.buffer_pos <= buffer_pos) {
 		// We add the value
 		if (result.quoted) {
 			StringValueResult::AddQuotedValue(result, buffer_pos);
 		} else {
-			result.AddValueToVector(result.buffer_ptr + result.last_position, buffer_pos - result.last_position);
+			result.AddValueToVector(result.buffer_ptr + result.last_position.buffer_pos,
+			                        buffer_pos - result.last_position.buffer_pos);
 		}
 		if (result.state_machine.dialect_options.state_machine_options.new_line == NewLineIdentifier::CARRY_ON) {
 			if (result.states.states[1] == CSVState::RECORD_SEPARATOR) {
 				// Even though this is marked as a carry on, this is a hippie mixie
-				result.last_position = buffer_pos + 1;
+				result.last_position.buffer_pos = buffer_pos + 1;
 			} else {
-				result.last_position = buffer_pos + 2;
+				result.last_position.buffer_pos = buffer_pos + 2;
 			}
 		} else {
-			result.last_position = buffer_pos + 1;
+			result.last_position.buffer_pos = buffer_pos + 1;
 		}
 	}
 
@@ -553,20 +549,17 @@ void StringValueResult::InvalidState(StringValueResult &result) {
 	bool force_error = !result.state_machine.options.ignore_errors.GetValue() && result.sniffing;
 	// Invalid unicode, we must error
 	if (force_error) {
-		LinePosition error_position {result.iterator.pos.buffer_idx, result.last_position, result.buffer_size};
-		result.HandleUnicodeError(result.cur_col_id, error_position);
+		result.HandleUnicodeError(result.cur_col_id, result.last_position);
 	}
-	result.current_errors.push_back({CSVErrorType::UNTERMINATED_QUOTES,
-	                                 result.cur_col_id,
-	                                 {result.iterator.pos.buffer_idx, result.last_position, result.buffer_size}});
+	result.current_errors.push_back({CSVErrorType::UNTERMINATED_QUOTES, result.cur_col_id, result.last_position});
 }
 
 bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_pos) {
 	// We care about empty lines if this is a single column csv file
-	result.last_position = buffer_pos + 1;
+	result.last_position = {result.iterator.pos.buffer_idx, result.iterator.pos.buffer_pos + 1, result.buffer_size};
 	if (result.states.IsCarriageReturn() &&
 	    result.state_machine.dialect_options.state_machine_options.new_line == NewLineIdentifier::CARRY_ON) {
-		result.last_position++;
+		result.last_position.buffer_pos++;
 	}
 	if (result.number_of_columns == 1) {
 		if (result.null_str_size == 0) {
@@ -786,9 +779,8 @@ void StringValueScanner::Initialize() {
 	                                 !state_machine->options.dialect_options.skip_rows.IsSetByUser())) {
 		SetStart();
 	}
-	result.last_position = iterator.pos.buffer_pos;
-	result.current_line_position.begin = {iterator.pos.buffer_idx, iterator.pos.buffer_pos,
-	                                      cur_buffer_handle->actual_size};
+	result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, cur_buffer_handle->actual_size};
+	result.current_line_position.begin = result.last_position;
 
 	result.current_line_position.end = result.current_line_position.begin;
 }
@@ -906,12 +898,12 @@ void StringValueScanner::ProcessOverbufferValue() {
 	states.Initialize();
 	string overbuffer_string;
 	auto previous_buffer = previous_buffer_handle->Ptr();
-	if (result.last_position == previous_buffer_handle->actual_size) {
-		state_machine->Transition(states, previous_buffer[result.last_position - 1]);
+	if (result.last_position.buffer_pos == previous_buffer_handle->actual_size) {
+		state_machine->Transition(states, previous_buffer[result.last_position.buffer_pos - 1]);
 	}
 	idx_t j = 0;
 	result.quoted = false;
-	for (idx_t i = result.last_position; i < previous_buffer_handle->actual_size; i++) {
+	for (idx_t i = result.last_position.buffer_pos; i < previous_buffer_handle->actual_size; i++) {
 		state_machine->Transition(states, previous_buffer[i]);
 		if (states.EmptyLine() || states.IsCurrentNewRow()) {
 			continue;
@@ -995,9 +987,9 @@ void StringValueScanner::ProcessOverbufferValue() {
 	}
 	if (states.IsCarriageReturn() &&
 	    state_machine->dialect_options.state_machine_options.new_line == NewLineIdentifier::CARRY_ON) {
-		result.last_position = ++iterator.pos.buffer_pos + 1;
+		result.last_position = {iterator.pos.buffer_idx, ++iterator.pos.buffer_pos + 1, result.buffer_size};
 	} else {
-		result.last_position = ++iterator.pos.buffer_pos;
+		result.last_position = {iterator.pos.buffer_idx, ++iterator.pos.buffer_pos, result.buffer_size};
 	}
 	// Be sure to reset the quoted and escaped variables
 	result.quoted = false;
@@ -1180,7 +1172,7 @@ void StringValueScanner::SetStart() {
 				    scan_finder->previous_buffer_handle->is_last_buffer) {
 					iterator.pos.buffer_idx = scan_finder->iterator.pos.buffer_idx;
 					iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos;
-					result.last_position = iterator.pos.buffer_pos;
+					result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size};
 					iterator.done = scan_finder->iterator.done;
 					return;
 				}
@@ -1199,7 +1191,7 @@ void StringValueScanner::SetStart() {
 				// If things go terribly wrong, we never loop indefinetly.
 				iterator.pos.buffer_idx = scan_finder->iterator.pos.buffer_idx;
 				iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos;
-				result.last_position = iterator.pos.buffer_pos;
+				result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size};
 				iterator.done = scan_finder->iterator.done;
 				return;
 			}
@@ -1216,7 +1208,7 @@ void StringValueScanner::SetStart() {
 	}
 	iterator.pos.buffer_idx = scan_finder->result.current_line_position.begin.buffer_idx;
 	iterator.pos.buffer_pos = scan_finder->result.current_line_position.begin.buffer_pos;
-	result.last_position = iterator.pos.buffer_pos;
+	result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size};
 }
 
 void StringValueScanner::FinalizeChunkProcess() {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index dfbe1f581bd5..58b312c75c53 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -92,7 +92,7 @@ class StringValueResult : public ScannerResult {
 	unsafe_vector<ValidityMask *> validity_mask;
 
 	//! Variables to iterate over the CSV buffers
-	idx_t last_position;
+	LinePosition last_position;
 	char *buffer_ptr;
 	idx_t buffer_size;
 

From b0f804b88d6590309272b7677892430eb18f3b2a Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 14 Mar 2024 14:50:09 +0100
Subject: [PATCH 048/147] more adjustments'

---
 .../scanner/string_value_scanner.cpp          |  7 +-
 .../csv/rejects/csv_unquoted_rejects.test     | 66 +++++++++++--------
 2 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 2b3eaf252a9f..f2339a8f1cb5 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -1249,7 +1249,12 @@ void StringValueScanner::FinalizeChunkProcess() {
 		} else {
 			result.HandleError();
 		}
-		iterator.done = FinishedFile();
+		if (!iterator.done) {
+			if (iterator.pos.buffer_pos >= iterator.GetEndPos() || iterator.pos.buffer_idx > iterator.GetBufferIdx() ||
+			    FinishedFile()) {
+				iterator.done = true;
+			}
+		}
 	} else {
 		// 2) If a boundary is not set
 		// We read until the chunk is complete, or we have nothing else to read.
diff --git a/test/sql/copy/csv/rejects/csv_unquoted_rejects.test b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
index 13c13b8b9fa7..1ce59c7c826e 100644
--- a/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_unquoted_rejects.test
@@ -11,8 +11,7 @@ query II
 SELECT * FROM read_csv(
     'data/csv/rejects/unquoted/basic.csv',
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1, quote = '"', escape = '"');
+    store_rejects=true, auto_detect=false, header = 1, quote = '"', escape = '"');
 ----
 bla	1
 bla	2
@@ -21,21 +20,23 @@ bla	1
 bla	2
 bla	3
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIII rowsort
+SELECT regexp_replace(file_path, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line,line_byte_position, byte_position
+FROM reject_scans inner join reject_errors on (reject_scans.scan_id = reject_errors.scan_id and reject_scans.file_id = reject_errors.file_id);
 ----
-data/csv/rejects/unquoted/basic.csv	5	1	"a"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
+data/csv/rejects/unquoted/basic.csv	5	1	a	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	29	29
+
+statement ok
+DROP TABLE reject_scans;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
 
 query II
 SELECT * FROM read_csv(
     'data/csv/rejects/unquoted/unquoted_new_line.csv',
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1, quote = '"', escape = '"');
+    store_rejects=true, auto_detect=false, header = 1, quote = '"', escape = '"');
 ----
 bla	1
 bla	2
@@ -44,35 +45,40 @@ bla	1
 bla	2
 bla	3
 
-query IIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, byte_position
-FROM csv_rejects_table;
+query IIIIIII rowsort
+SELECT regexp_replace(file_path, '\\', '/', 'g'), line, column_idx, column_name, error_type, line_byte_position,byte_position
+FROM reject_scans inner join reject_errors on (reject_scans.scan_id = reject_errors.scan_id and reject_scans.file_id = reject_errors.file_id);
 ----
-data/csv/rejects/unquoted/unquoted_new_line.csv	5	1	"a"	UNQUOTED VALUE	28
+data/csv/rejects/unquoted/unquoted_new_line.csv	5	1	a	UNQUOTED VALUE	29	29
+
+statement ok
+DROP TABLE reject_scans;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
 
 query I
 SELECT * FROM read_csv(
     'data/csv/rejects/unquoted/unquoted_last_value.csv',
     columns = {'a': 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 0, quote = '"', escape = '"');
+    store_rejects=true, auto_detect=false, header = 0, quote = '"', escape = '"');
 ----
 blaaaaaaaaaaaaaa
 bla
 bla
 bla
 
-query IIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, byte_position
-FROM csv_rejects_table;
+query IIIIIIII rowsort
+SELECT regexp_replace(file_path, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line,line_byte_position, byte_position
+FROM reject_scans inner join reject_errors on (reject_scans.scan_id = reject_errors.scan_id and reject_scans.file_id = reject_errors.file_id);
 ----
-data/csv/rejects/unquoted/unquoted_last_value.csv	5	1	"a"	UNQUOTED VALUE	31
+data/csv/rejects/unquoted/unquoted_last_value.csv	5	1	a	UNQUOTED VALUE	"bla	38	38
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_scans;
+
+statement ok
+DROP TABLE reject_errors;
 
 loop buffer_size 35 40
 
@@ -80,8 +86,8 @@ query II
 SELECT * FROM read_csv(
     'data/csv/rejects/unquoted/basic.csv',
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
-    rejects_table='csv_rejects_table', buffer_size=${buffer_size},
-    ignore_errors=true, auto_detect=false, header = 1, quote = '"', escape = '"', buffer_size=35);
+    buffer_size=${buffer_size},
+    store_rejects=true, auto_detect=false, header = 1, quote = '"', escape = '"');
 ----
 bla	1
 bla	2
@@ -90,13 +96,17 @@ bla	1
 bla	2
 bla	3
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+
+query IIIIIIII rowsort
+SELECT regexp_replace(file_path, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line,line_byte_position, byte_position
+FROM reject_scans inner join reject_errors on (reject_scans.scan_id = reject_errors.scan_id and reject_scans.file_id = reject_errors.file_id);
 ----
-data/csv/rejects/unquoted/basic.csv	5	1	"a"	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	28
+data/csv/rejects/unquoted/basic.csv	5	1	a	UNQUOTED VALUE	"blaaaaaaaaaaaaaa"bla,4	29	29
+
+statement ok
+DROP TABLE reject_scans;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
 
 endloop
\ No newline at end of file

From a2a9982e7e619661f9fa7abc9e10fe87cb5dcc74 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 20 Mar 2024 11:26:53 +0100
Subject: [PATCH 049/147] Fix progress over multiple very large files

---
 .../table_function/global_csv_state.cpp       | 19 ++++++++++---------
 test/sql/copy/csv/test_gzipped.test           |  0
 2 files changed, 10 insertions(+), 9 deletions(-)
 create mode 100644 test/sql/copy/csv/test_gzipped.test

diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 863b937f2186..72707df66324 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -43,16 +43,16 @@ double CSVGlobalState::GetProgress(const ReadCSVData &bind_data_p) const {
 	lock_guard<mutex> parallel_lock(main_mutex);
 	idx_t total_files = bind_data.files.size();
 	// get the progress WITHIN the current file
-	double progress;
+	double percentage = 0;
 	if (file_scans.back()->file_size == 0) {
-		progress = 1.0;
+		percentage = 1.0;
 	} else {
 		// for compressed files, readed bytes may greater than files size.
-		progress = std::min(1.0, double(file_scans.back()->bytes_read) / double(file_scans.back()->file_size));
+		for (auto &file : file_scans) {
+			percentage +=
+			    (double(1) / double(total_files)) * std::min(1.0, double(file->bytes_read) / double(file->file_size));
+		}
 	}
-	// now get the total percentage of files read
-	double percentage = double(current_boundary.GetFileIdx()) / total_files;
-	percentage += (double(1) / double(total_files)) * progress;
 	return percentage * 100;
 }
 
@@ -66,8 +66,9 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next() {
 		if (cur_idx == 0) {
 			current_file = file_scans.back();
 		} else {
-			current_file = make_shared<CSVFileScan>(context, bind_data.files[cur_idx], bind_data.options, cur_idx,
-			                                        bind_data, column_ids, file_schema);
+			file_scans.emplace_back(make_shared<CSVFileScan>(context, bind_data.files[cur_idx], bind_data.options,
+			                                                 cur_idx, bind_data, column_ids, file_schema));
+			current_file = file_scans.back();
 		}
 		auto csv_scanner =
 		    make_uniq<StringValueScanner>(scanner_idx++, current_file->buffer_manager, current_file->state_machine,
@@ -98,7 +99,7 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next() {
 			// If we have a next file we have to construct the file scan for that
 			file_scans.emplace_back(make_shared<CSVFileScan>(context, bind_data.files[current_file_idx],
 			                                                 bind_data.options, current_file_idx, bind_data, column_ids,
-			                                                 file_schema));
+			                                                 file_schema, single_threaded));
 			// And re-start the boundary-iterator
 			auto buffer_size = file_scans.back()->buffer_manager->GetBuffer(0)->actual_size;
 			current_boundary = CSVIterator(current_file_idx, 0, 0, 0, buffer_size);
diff --git a/test/sql/copy/csv/test_gzipped.test b/test/sql/copy/csv/test_gzipped.test
new file mode 100644
index 000000000000..e69de29bb2d1

From 3ab61710c8dbc6889c4299e61e45ed42368af097 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 20 Mar 2024 12:52:58 +0100
Subject: [PATCH 050/147] Dont store buffers if doing single threaded scan over
 multiple files

---
 .../csv_scanner/buffer_manager/csv_buffer.cpp | 20 +++++++++----------
 .../buffer_manager/csv_buffer_manager.cpp     | 14 +++++++++----
 .../scanner/string_value_scanner.cpp          |  4 ++--
 .../table_function/csv_file_scanner.cpp       |  9 +++++----
 .../table_function/global_csv_state.cpp       |  7 +++++--
 src/function/table/copy_csv.cpp               |  2 +-
 src/function/table/read_csv.cpp               |  2 +-
 src/function/table/sniff_csv.cpp              |  2 +-
 .../operator/csv_scanner/csv_buffer.hpp       |  9 +++++----
 .../csv_scanner/csv_buffer_manager.hpp        |  5 ++++-
 .../operator/csv_scanner/csv_file_scanner.hpp |  5 +++--
 src/main/relation/read_csv_relation.cpp       |  2 +-
 12 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
index 8c29ae79fb43..6ac66783f041 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
@@ -4,9 +4,9 @@
 namespace duckdb {
 
 CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
-                     idx_t &global_csv_current_position, idx_t file_number_p)
+                     idx_t &global_csv_current_position, idx_t file_number_p, bool single_threaded)
     : context(context), file_number(file_number_p), can_seek(file_handle.CanSeek()) {
-	AllocateBuffer(buffer_size_p);
+	AllocateBuffer(buffer_size_p, can_seek || single_threaded);
 	auto buffer = Ptr();
 	actual_buffer_size = file_handle.Read(buffer, buffer_size_p);
 	while (actual_buffer_size < buffer_size_p && !file_handle.FinishedReading()) {
@@ -18,10 +18,10 @@ CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle
 }
 
 CSVBuffer::CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t buffer_size,
-                     idx_t global_csv_current_position, idx_t file_number_p, idx_t buffer_idx_p)
+                     idx_t global_csv_current_position, idx_t file_number_p, idx_t buffer_idx_p, bool single_threaded)
     : context(context), global_csv_start(global_csv_current_position), file_number(file_number_p),
       can_seek(file_handle.CanSeek()), buffer_idx(buffer_idx_p) {
-	AllocateBuffer(buffer_size);
+	AllocateBuffer(buffer_size, single_threaded || can_seek);
 	auto buffer = handle.Ptr();
 	actual_buffer_size = file_handle.Read(handle.Ptr(), buffer_size);
 	while (actual_buffer_size < buffer_size && !file_handle.FinishedReading()) {
@@ -32,15 +32,16 @@ CSVBuffer::CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t b
 }
 
 shared_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number_p,
-                                      bool &has_seaked) {
+                                      bool &has_seaked, bool single_threaded) {
 	if (has_seaked) {
 		// This means that at some point a reload was done, and we are currently on the incorrect position in our file
 		// handle
 		file_handle.Seek(global_csv_start + actual_buffer_size);
 		has_seaked = false;
 	}
-	auto next_csv_buffer = make_shared<CSVBuffer>(file_handle, context, buffer_size,
-	                                              global_csv_start + actual_buffer_size, file_number_p, buffer_idx + 1);
+	auto next_csv_buffer =
+	    make_shared<CSVBuffer>(file_handle, context, buffer_size, global_csv_start + actual_buffer_size, file_number_p,
+	                           buffer_idx + 1, single_threaded);
 	if (next_csv_buffer->GetBufferSize() == 0) {
 		// We are done reading
 		return nullptr;
@@ -48,9 +49,8 @@ shared_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_s
 	return next_csv_buffer;
 }
 
-void CSVBuffer::AllocateBuffer(idx_t buffer_size) {
+void CSVBuffer::AllocateBuffer(idx_t buffer_size, bool can_destroy) {
 	auto &buffer_manager = BufferManager::GetBufferManager(context);
-	bool can_destroy = can_seek;
 	handle = buffer_manager.Allocate(MemoryTag::CSV_READER, MaxValue<idx_t>(Storage::BLOCK_SIZE, buffer_size),
 	                                 can_destroy, &block);
 }
@@ -60,7 +60,7 @@ idx_t CSVBuffer::GetBufferSize() {
 }
 
 void CSVBuffer::Reload(CSVFileHandle &file_handle) {
-	AllocateBuffer(actual_buffer_size);
+	AllocateBuffer(actual_buffer_size, false);
 	file_handle.Seek(global_csv_start);
 	file_handle.Read(handle.Ptr(), actual_buffer_size);
 }
diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
index 2a13158b6081..568343cafad7 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
@@ -4,8 +4,9 @@
 namespace duckdb {
 
 CSVBufferManager::CSVBufferManager(ClientContext &context_p, const CSVReaderOptions &options, const string &file_path_p,
-                                   const idx_t file_idx_p)
-    : context(context_p), file_idx(file_idx_p), file_path(file_path_p), buffer_size(CSVBuffer::CSV_BUFFER_SIZE) {
+                                   const idx_t file_idx_p, bool single_threaded_p)
+    : context(context_p), file_idx(file_idx_p), file_path(file_path_p), buffer_size(CSVBuffer::CSV_BUFFER_SIZE),
+      single_threaded(single_threaded_p) {
 	D_ASSERT(!file_path.empty());
 	file_handle = ReadCSV::OpenCSV(file_path, options.compression, context);
 	skip_rows = options.dialect_options.skip_rows.GetValue();
@@ -28,7 +29,7 @@ void CSVBufferManager::UnpinBuffer(const idx_t cache_idx) {
 void CSVBufferManager::Initialize() {
 	if (cached_buffers.empty()) {
 		cached_buffers.emplace_back(
-		    make_shared<CSVBuffer>(context, buffer_size, *file_handle, global_csv_pos, file_idx));
+		    make_shared<CSVBuffer>(context, buffer_size, *file_handle, global_csv_pos, file_idx, single_threaded));
 		last_buffer = cached_buffers.front();
 	}
 }
@@ -47,7 +48,8 @@ bool CSVBufferManager::ReadNextAndCacheIt() {
 				last_buffer->last_buffer = true;
 				return false;
 			}
-			auto maybe_last_buffer = last_buffer->Next(*file_handle, cur_buffer_size, file_idx, has_seeked);
+			auto maybe_last_buffer =
+			    last_buffer->Next(*file_handle, cur_buffer_size, file_idx, has_seeked, single_threaded);
 			if (!maybe_last_buffer) {
 				last_buffer->last_buffer = true;
 				return false;
@@ -126,4 +128,8 @@ string CSVBufferManager::GetFilePath() {
 	return file_path;
 }
 
+void CSVBufferManager::SetSingleThreaded() {
+	single_threaded = true;
+}
+
 } // namespace duckdb
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 9582e1c1af2f..a0376cc94947 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -530,9 +530,9 @@ unique_ptr<StringValueScanner> StringValueScanner::GetCSVScanner(ClientContext &
 
 	state_machine->dialect_options.num_cols = options.dialect_options.num_cols;
 	state_machine->dialect_options.header = options.dialect_options.header;
-	auto buffer_manager = make_shared<CSVBufferManager>(context, options, options.file_path, 0);
+	auto buffer_manager = make_shared<CSVBufferManager>(context, options, options.file_path, 0, false);
 	auto scanner = make_uniq<StringValueScanner>(buffer_manager, state_machine, make_shared<CSVErrorHandler>());
-	scanner->csv_file_scan = make_shared<CSVFileScan>(context, options.file_path, options);
+	scanner->csv_file_scan = make_shared<CSVFileScan>(context, options.file_path, options, false);
 	scanner->csv_file_scan->InitializeProjection();
 	return scanner;
 }
diff --git a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
index 0532fc678a41..8013a2da10ef 100644
--- a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
+++ b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
@@ -41,7 +41,7 @@ CSVFileScan::CSVFileScan(ClientContext &context, shared_ptr<CSVBufferManager> bu
 
 CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, const CSVReaderOptions &options_p,
                          const idx_t file_idx_p, const ReadCSVData &bind_data, const vector<column_t> &column_ids,
-                         const vector<LogicalType> &file_schema)
+                         const vector<LogicalType> &file_schema, bool single_threaded)
     : file_path(file_path_p), file_idx(file_idx_p),
       error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors)), options(options_p) {
 	if (file_idx < bind_data.union_readers.size()) {
@@ -73,7 +73,7 @@ CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, cons
 	}
 
 	// Initialize Buffer Manager
-	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx);
+	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx, single_threaded);
 	// Initialize On Disk and Size of file
 	on_disk_file = buffer_manager->file_handle->OnDiskFile();
 	file_size = buffer_manager->file_handle->FileSize();
@@ -128,10 +128,11 @@ CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, cons
 	InitializeFileNamesTypes();
 }
 
-CSVFileScan::CSVFileScan(ClientContext &context, const string &file_name, CSVReaderOptions &options_p)
+CSVFileScan::CSVFileScan(ClientContext &context, const string &file_name, CSVReaderOptions &options_p,
+                         bool single_threaded)
     : file_path(file_name), file_idx(0), error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors)),
       options(options_p) {
-	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx);
+	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx, single_threaded);
 	// Initialize On Disk and Size of file
 	on_disk_file = buffer_manager->file_handle->OnDiskFile();
 	file_size = buffer_manager->file_handle->FileSize();
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 72707df66324..f0ec98fbbdfb 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -22,7 +22,7 @@ CSVGlobalState::CSVGlobalState(ClientContext &context_p, const shared_ptr<CSVBuf
 	} else {
 		// If not we need to construct it for the first file
 		file_scans.emplace_back(
-		    make_uniq<CSVFileScan>(context, files[0], options, 0, bind_data, column_ids, file_schema));
+		    make_uniq<CSVFileScan>(context, files[0], options, 0, bind_data, column_ids, file_schema, single_threaded));
 	};
 	//! There are situations where we only support single threaded scanning
 	bool many_csv_files = files.size() > 1 && files.size() > system_threads * 2;
@@ -65,9 +65,12 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next() {
 		shared_ptr<CSVFileScan> current_file;
 		if (cur_idx == 0) {
 			current_file = file_scans.back();
+			current_file->buffer_manager->SetSingleThreaded();
 		} else {
+			lock_guard<mutex> parallel_lock(main_mutex);
 			file_scans.emplace_back(make_shared<CSVFileScan>(context, bind_data.files[cur_idx], bind_data.options,
-			                                                 cur_idx, bind_data, column_ids, file_schema));
+			                                                 cur_idx, bind_data, column_ids, file_schema,
+			                                                 single_threaded));
 			current_file = file_scans.back();
 		}
 		auto csv_scanner =
diff --git a/src/function/table/copy_csv.cpp b/src/function/table/copy_csv.cpp
index e2f9a2403c08..67e8041f4da6 100644
--- a/src/function/table/copy_csv.cpp
+++ b/src/function/table/copy_csv.cpp
@@ -156,7 +156,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
 	}
 
 	if (options.auto_detect) {
-		auto buffer_manager = make_shared<CSVBufferManager>(context, options, bind_data->files[0], 0);
+		auto buffer_manager = make_shared<CSVBufferManager>(context, options, bind_data->files[0], 0, false);
 		CSVSniffer sniffer(options, buffer_manager, CSVStateMachineCache::Get(context),
 		                   {&expected_types, &expected_names});
 		sniffer.SniffCSV();
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index 8d2e1be0d780..0963f35c0478 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -98,7 +98,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	}
 	if (options.auto_detect && !options.file_options.union_by_name) {
 		options.file_path = result->files[0];
-		result->buffer_manager = make_shared<CSVBufferManager>(context, options, result->files[0], 0);
+		result->buffer_manager = make_shared<CSVBufferManager>(context, options, result->files[0], 0, false);
 		CSVSniffer sniffer(options, result->buffer_manager, CSVStateMachineCache::Get(context),
 		                   {&return_types, &names});
 		auto sniffer_result = sniffer.SniffCSV();
diff --git a/src/function/table/sniff_csv.cpp b/src/function/table/sniff_csv.cpp
index f135b15c615d..b776288d6f27 100644
--- a/src/function/table/sniff_csv.cpp
+++ b/src/function/table/sniff_csv.cpp
@@ -120,7 +120,7 @@ static void CSVSniffFunction(ClientContext &context, TableFunctionInput &data_p,
 	auto sniffer_options = data.options;
 	sniffer_options.file_path = data.path;
 
-	auto buffer_manager = make_shared<CSVBufferManager>(context, sniffer_options, sniffer_options.file_path, 0);
+	auto buffer_manager = make_shared<CSVBufferManager>(context, sniffer_options, sniffer_options.file_path, 0, false);
 	CSVSniffer sniffer(sniffer_options, buffer_manager, CSVStateMachineCache::Get(context));
 	auto sniffer_result = sniffer.SniffCSV(true);
 	string str_opt;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
index 72665ae2de54..8200da88e32a 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
@@ -44,14 +44,15 @@ class CSVBuffer {
 public:
 	//! Constructor for Initial Buffer
 	CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
-	          idx_t &global_csv_current_position, idx_t file_number);
+	          idx_t &global_csv_current_position, idx_t file_number, bool single_threaded);
 
 	//! Constructor for `Next()` Buffers
 	CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t buffer_size, idx_t global_csv_current_position,
-	          idx_t file_number_p, idx_t buffer_idx);
+	          idx_t file_number_p, idx_t buffer_idx, bool single_threaded);
 
 	//! Creates a new buffer with the next part of the CSV File
-	shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number, bool &has_seaked);
+	shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number, bool &has_seaked,
+	                           bool single_threaded);
 
 	//! Gets the buffer actual size
 	idx_t GetBufferSize();
@@ -60,7 +61,7 @@ class CSVBuffer {
 	bool IsCSVFileLastBuffer();
 
 	//! Allocates internal buffer, sets 'block' and 'handle' variables.
-	void AllocateBuffer(idx_t buffer_size);
+	void AllocateBuffer(idx_t buffer_size, bool can_destroy);
 
 	void Reload(CSVFileHandle &file_handle);
 	//! Wrapper for the Pin Function, if it can seek, it means that the buffer might have been destroyed, hence we must
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
index b9b4bb92d372..a1127882a718 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
@@ -22,7 +22,7 @@ class CSVStateMachine;
 class CSVBufferManager {
 public:
 	CSVBufferManager(ClientContext &context, const CSVReaderOptions &options, const string &file_path,
-	                 const idx_t file_idx);
+	                 const idx_t file_idx, bool single_threaded);
 	//! Returns a buffer from a buffer id (starting from 0). If it's in the auto-detection then we cache new buffers
 	//! Otherwise we remove them from the cache if they are already there, or just return them bypassing the cache.
 	shared_ptr<CSVBufferHandle> GetBuffer(const idx_t buffer_idx);
@@ -44,6 +44,8 @@ class CSVBufferManager {
 
 	string GetFilePath();
 
+	void SetSingleThreaded();
+
 	ClientContext &context;
 	idx_t skip_rows = 0;
 
@@ -69,6 +71,7 @@ class CSVBufferManager {
 	//! If the file_handle used seek
 	bool has_seeked = false;
 	unordered_set<idx_t> reset_when_possible;
+	bool single_threaded;
 };
 
 } // namespace duckdb
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp
index ce9fc08ce0bd..ed859238d9ef 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp
@@ -27,9 +27,10 @@ class CSVFileScan {
 	//! Path to this file
 	CSVFileScan(ClientContext &context, const string &file_path, const CSVReaderOptions &options, const idx_t file_idx,
 	            const ReadCSVData &bind_data, const vector<column_t> &column_ids,
-	            const vector<LogicalType> &file_schema);
+	            const vector<LogicalType> &file_schema, bool single_threaded);
 
-	CSVFileScan(ClientContext &context, const string &file_name, CSVReaderOptions &options);
+	CSVFileScan(ClientContext &context, const string &file_name, CSVReaderOptions &options,
+	            bool single_threaded = false);
 
 	const string &GetFileName();
 	const vector<string> &GetNames();
diff --git a/src/main/relation/read_csv_relation.cpp b/src/main/relation/read_csv_relation.cpp
index 1500720e0069..5d0b52e5c96d 100644
--- a/src/main/relation/read_csv_relation.cpp
+++ b/src/main/relation/read_csv_relation.cpp
@@ -56,7 +56,7 @@ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context,
 
 	shared_ptr<CSVBufferManager> buffer_manager;
 	context->RunFunctionInTransaction([&]() {
-		buffer_manager = make_shared<CSVBufferManager>(*context, csv_options, files[0], 0);
+		buffer_manager = make_shared<CSVBufferManager>(*context, csv_options, files[0], 0, false);
 		CSVSniffer sniffer(csv_options, buffer_manager, CSVStateMachineCache::Get(*context));
 		auto sniffer_result = sniffer.SniffCSV();
 		auto &types = sniffer_result.return_types;

From 8db82b0d018f0694e13cfd4ed5a2b5d6ce3edde9 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 20 Mar 2024 15:44:24 +0100
Subject: [PATCH 051/147] bad file

---
 test/sql/copy/csv/test_gzipped.test | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 test/sql/copy/csv/test_gzipped.test

diff --git a/test/sql/copy/csv/test_gzipped.test b/test/sql/copy/csv/test_gzipped.test
deleted file mode 100644
index e69de29bb2d1..000000000000

From d996c085f2f5da8b296137d9e74d8cd2233f9d5b Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 25 Mar 2024 12:34:39 +0100
Subject: [PATCH 052/147] Restore old bm

---
 .../csv_scanner/buffer_manager/csv_buffer.cpp | 27 ++++++++++---------
 .../buffer_manager/csv_buffer_manager.cpp     | 14 +++-------
 .../buffer_manager/csv_file_handle.cpp        | 12 ++++++++-
 .../scanner/string_value_scanner.cpp          |  2 +-
 .../table_function/csv_file_scanner.cpp       |  4 +--
 .../table_function/global_csv_state.cpp       |  1 -
 src/function/table/copy_csv.cpp               |  2 +-
 src/function/table/read_csv.cpp               |  2 +-
 src/function/table/sniff_csv.cpp              |  2 +-
 .../operator/csv_scanner/csv_buffer.hpp       | 12 ++++-----
 .../csv_scanner/csv_buffer_manager.hpp        |  5 +---
 .../operator/csv_scanner/csv_file_handle.hpp  |  3 +++
 src/main/relation/read_csv_relation.cpp       |  2 +-
 13 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
index 6ac66783f041..aaafd21331c6 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
@@ -4,9 +4,9 @@
 namespace duckdb {
 
 CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
-                     idx_t &global_csv_current_position, idx_t file_number_p, bool single_threaded)
-    : context(context), file_number(file_number_p), can_seek(file_handle.CanSeek()) {
-	AllocateBuffer(buffer_size_p, can_seek || single_threaded);
+                     idx_t &global_csv_current_position, idx_t file_number_p)
+    : context(context), file_number(file_number_p), can_seek(file_handle.CanSeek()), is_pipe(file_handle.OnDiskFile()) {
+	AllocateBuffer(buffer_size_p);
 	auto buffer = Ptr();
 	actual_buffer_size = file_handle.Read(buffer, buffer_size_p);
 	while (actual_buffer_size < buffer_size_p && !file_handle.FinishedReading()) {
@@ -18,10 +18,10 @@ CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle
 }
 
 CSVBuffer::CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t buffer_size,
-                     idx_t global_csv_current_position, idx_t file_number_p, idx_t buffer_idx_p, bool single_threaded)
+                     idx_t global_csv_current_position, idx_t file_number_p, idx_t buffer_idx_p)
     : context(context), global_csv_start(global_csv_current_position), file_number(file_number_p),
-      can_seek(file_handle.CanSeek()), buffer_idx(buffer_idx_p) {
-	AllocateBuffer(buffer_size, single_threaded || can_seek);
+      can_seek(file_handle.CanSeek()), is_pipe(file_handle.OnDiskFile()), buffer_idx(buffer_idx_p) {
+	AllocateBuffer(buffer_size);
 	auto buffer = handle.Ptr();
 	actual_buffer_size = file_handle.Read(handle.Ptr(), buffer_size);
 	while (actual_buffer_size < buffer_size && !file_handle.FinishedReading()) {
@@ -32,16 +32,15 @@ CSVBuffer::CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t b
 }
 
 shared_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number_p,
-                                      bool &has_seaked, bool single_threaded) {
+                                      bool &has_seaked) {
 	if (has_seaked) {
 		// This means that at some point a reload was done, and we are currently on the incorrect position in our file
 		// handle
 		file_handle.Seek(global_csv_start + actual_buffer_size);
 		has_seaked = false;
 	}
-	auto next_csv_buffer =
-	    make_shared<CSVBuffer>(file_handle, context, buffer_size, global_csv_start + actual_buffer_size, file_number_p,
-	                           buffer_idx + 1, single_threaded);
+	auto next_csv_buffer = make_shared<CSVBuffer>(file_handle, context, buffer_size,
+	                                              global_csv_start + actual_buffer_size, file_number_p, buffer_idx + 1);
 	if (next_csv_buffer->GetBufferSize() == 0) {
 		// We are done reading
 		return nullptr;
@@ -49,8 +48,9 @@ shared_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_s
 	return next_csv_buffer;
 }
 
-void CSVBuffer::AllocateBuffer(idx_t buffer_size, bool can_destroy) {
+void CSVBuffer::AllocateBuffer(idx_t buffer_size) {
 	auto &buffer_manager = BufferManager::GetBufferManager(context);
+	bool can_destroy = !is_pipe;
 	handle = buffer_manager.Allocate(MemoryTag::CSV_READER, MaxValue<idx_t>(Storage::BLOCK_SIZE, buffer_size),
 	                                 can_destroy, &block);
 }
@@ -60,14 +60,15 @@ idx_t CSVBuffer::GetBufferSize() {
 }
 
 void CSVBuffer::Reload(CSVFileHandle &file_handle) {
-	AllocateBuffer(actual_buffer_size, false);
+	AllocateBuffer(actual_buffer_size);
+	// If we can seek, we seek and return the correct pointers
 	file_handle.Seek(global_csv_start);
 	file_handle.Read(handle.Ptr(), actual_buffer_size);
 }
 
 shared_ptr<CSVBufferHandle> CSVBuffer::Pin(CSVFileHandle &file_handle, bool &has_seeked) {
 	auto &buffer_manager = BufferManager::GetBufferManager(context);
-	if (can_seek && block->IsUnloaded()) {
+	if (is_pipe && block->IsUnloaded()) {
 		// We have to reload it from disk
 		block = nullptr;
 		Reload(file_handle);
diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
index 568343cafad7..2a13158b6081 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
@@ -4,9 +4,8 @@
 namespace duckdb {
 
 CSVBufferManager::CSVBufferManager(ClientContext &context_p, const CSVReaderOptions &options, const string &file_path_p,
-                                   const idx_t file_idx_p, bool single_threaded_p)
-    : context(context_p), file_idx(file_idx_p), file_path(file_path_p), buffer_size(CSVBuffer::CSV_BUFFER_SIZE),
-      single_threaded(single_threaded_p) {
+                                   const idx_t file_idx_p)
+    : context(context_p), file_idx(file_idx_p), file_path(file_path_p), buffer_size(CSVBuffer::CSV_BUFFER_SIZE) {
 	D_ASSERT(!file_path.empty());
 	file_handle = ReadCSV::OpenCSV(file_path, options.compression, context);
 	skip_rows = options.dialect_options.skip_rows.GetValue();
@@ -29,7 +28,7 @@ void CSVBufferManager::UnpinBuffer(const idx_t cache_idx) {
 void CSVBufferManager::Initialize() {
 	if (cached_buffers.empty()) {
 		cached_buffers.emplace_back(
-		    make_shared<CSVBuffer>(context, buffer_size, *file_handle, global_csv_pos, file_idx, single_threaded));
+		    make_shared<CSVBuffer>(context, buffer_size, *file_handle, global_csv_pos, file_idx));
 		last_buffer = cached_buffers.front();
 	}
 }
@@ -48,8 +47,7 @@ bool CSVBufferManager::ReadNextAndCacheIt() {
 				last_buffer->last_buffer = true;
 				return false;
 			}
-			auto maybe_last_buffer =
-			    last_buffer->Next(*file_handle, cur_buffer_size, file_idx, has_seeked, single_threaded);
+			auto maybe_last_buffer = last_buffer->Next(*file_handle, cur_buffer_size, file_idx, has_seeked);
 			if (!maybe_last_buffer) {
 				last_buffer->last_buffer = true;
 				return false;
@@ -128,8 +126,4 @@ string CSVBufferManager::GetFilePath() {
 	return file_path;
 }
 
-void CSVBufferManager::SetSingleThreaded() {
-	single_threaded = true;
-}
-
 } // namespace duckdb
diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
index cbb1c1cd86e7..cf4bf9fafdf2 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
@@ -9,6 +9,7 @@ CSVFileHandle::CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<Fi
 	can_seek = file_handle->CanSeek();
 	on_disk_file = file_handle->OnDiskFile();
 	file_size = file_handle->GetFileSize();
+	is_pipe = file_handle->IsPipe();
 	uncompressed = compression == FileCompressionType::UNCOMPRESSED;
 }
 
@@ -33,7 +34,12 @@ bool CSVFileHandle::CanSeek() {
 
 void CSVFileHandle::Seek(idx_t position) {
 	if (!can_seek) {
-		throw InternalException("Cannot seek in this file");
+		if (is_pipe) {
+			throw InternalException("Can't reconstruct the buffer from a on disk file.");
+		}
+		//! If we can't seek in this file, we reset it and re-read up to the necessary point.
+		file_handle->Reset();
+		//		file_handle->Read();
 	}
 	file_handle->Seek(position);
 }
@@ -42,6 +48,10 @@ bool CSVFileHandle::OnDiskFile() {
 	return on_disk_file;
 }
 
+bool CSVFileHandle::IsPipe() {
+	return is_pipe;
+}
+
 idx_t CSVFileHandle::FileSize() {
 	return file_size;
 }
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index a0376cc94947..b1b2afa39c98 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -530,7 +530,7 @@ unique_ptr<StringValueScanner> StringValueScanner::GetCSVScanner(ClientContext &
 
 	state_machine->dialect_options.num_cols = options.dialect_options.num_cols;
 	state_machine->dialect_options.header = options.dialect_options.header;
-	auto buffer_manager = make_shared<CSVBufferManager>(context, options, options.file_path, 0, false);
+	auto buffer_manager = make_shared<CSVBufferManager>(context, options, options.file_path, 0);
 	auto scanner = make_uniq<StringValueScanner>(buffer_manager, state_machine, make_shared<CSVErrorHandler>());
 	scanner->csv_file_scan = make_shared<CSVFileScan>(context, options.file_path, options, false);
 	scanner->csv_file_scan->InitializeProjection();
diff --git a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
index 8013a2da10ef..7d975221c3ac 100644
--- a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
+++ b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
@@ -73,7 +73,7 @@ CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, cons
 	}
 
 	// Initialize Buffer Manager
-	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx, single_threaded);
+	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx);
 	// Initialize On Disk and Size of file
 	on_disk_file = buffer_manager->file_handle->OnDiskFile();
 	file_size = buffer_manager->file_handle->FileSize();
@@ -132,7 +132,7 @@ CSVFileScan::CSVFileScan(ClientContext &context, const string &file_name, CSVRea
                          bool single_threaded)
     : file_path(file_name), file_idx(0), error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors)),
       options(options_p) {
-	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx, single_threaded);
+	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx);
 	// Initialize On Disk and Size of file
 	on_disk_file = buffer_manager->file_handle->OnDiskFile();
 	file_size = buffer_manager->file_handle->FileSize();
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index f0ec98fbbdfb..524482e0de55 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -65,7 +65,6 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next() {
 		shared_ptr<CSVFileScan> current_file;
 		if (cur_idx == 0) {
 			current_file = file_scans.back();
-			current_file->buffer_manager->SetSingleThreaded();
 		} else {
 			lock_guard<mutex> parallel_lock(main_mutex);
 			file_scans.emplace_back(make_shared<CSVFileScan>(context, bind_data.files[cur_idx], bind_data.options,
diff --git a/src/function/table/copy_csv.cpp b/src/function/table/copy_csv.cpp
index 67e8041f4da6..e2f9a2403c08 100644
--- a/src/function/table/copy_csv.cpp
+++ b/src/function/table/copy_csv.cpp
@@ -156,7 +156,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
 	}
 
 	if (options.auto_detect) {
-		auto buffer_manager = make_shared<CSVBufferManager>(context, options, bind_data->files[0], 0, false);
+		auto buffer_manager = make_shared<CSVBufferManager>(context, options, bind_data->files[0], 0);
 		CSVSniffer sniffer(options, buffer_manager, CSVStateMachineCache::Get(context),
 		                   {&expected_types, &expected_names});
 		sniffer.SniffCSV();
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index 0963f35c0478..8d2e1be0d780 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -98,7 +98,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	}
 	if (options.auto_detect && !options.file_options.union_by_name) {
 		options.file_path = result->files[0];
-		result->buffer_manager = make_shared<CSVBufferManager>(context, options, result->files[0], 0, false);
+		result->buffer_manager = make_shared<CSVBufferManager>(context, options, result->files[0], 0);
 		CSVSniffer sniffer(options, result->buffer_manager, CSVStateMachineCache::Get(context),
 		                   {&return_types, &names});
 		auto sniffer_result = sniffer.SniffCSV();
diff --git a/src/function/table/sniff_csv.cpp b/src/function/table/sniff_csv.cpp
index d27817f818eb..3e859a65afe3 100644
--- a/src/function/table/sniff_csv.cpp
+++ b/src/function/table/sniff_csv.cpp
@@ -120,7 +120,7 @@ static void CSVSniffFunction(ClientContext &context, TableFunctionInput &data_p,
 	auto sniffer_options = data.options;
 	sniffer_options.file_path = data.path;
 
-	auto buffer_manager = make_shared<CSVBufferManager>(context, sniffer_options, sniffer_options.file_path, 0, false);
+	auto buffer_manager = make_shared<CSVBufferManager>(context, sniffer_options, sniffer_options.file_path, 0);
 	if (sniffer_options.name_list.empty()) {
 		sniffer_options.name_list = data.names_csv;
 	}
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
index 8200da88e32a..a5a90d763e06 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp
@@ -44,15 +44,14 @@ class CSVBuffer {
 public:
 	//! Constructor for Initial Buffer
 	CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
-	          idx_t &global_csv_current_position, idx_t file_number, bool single_threaded);
+	          idx_t &global_csv_current_position, idx_t file_number);
 
 	//! Constructor for `Next()` Buffers
 	CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t buffer_size, idx_t global_csv_current_position,
-	          idx_t file_number_p, idx_t buffer_idx, bool single_threaded);
+	          idx_t file_number_p, idx_t buffer_idx);
 
 	//! Creates a new buffer with the next part of the CSV File
-	shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number, bool &has_seaked,
-	                           bool single_threaded);
+	shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number, bool &has_seaked);
 
 	//! Gets the buffer actual size
 	idx_t GetBufferSize();
@@ -61,7 +60,7 @@ class CSVBuffer {
 	bool IsCSVFileLastBuffer();
 
 	//! Allocates internal buffer, sets 'block' and 'handle' variables.
-	void AllocateBuffer(idx_t buffer_size, bool can_destroy);
+	void AllocateBuffer(idx_t buffer_size);
 
 	void Reload(CSVFileHandle &file_handle);
 	//! Wrapper for the Pin Function, if it can seek, it means that the buffer might have been destroyed, hence we must
@@ -92,8 +91,9 @@ class CSVBuffer {
 	//! Number of the file that is in this buffer
 	idx_t file_number = 0;
 	//! If we can seek in the file or not.
-	//! If we can't seek, this means we can't destroy the buffers
 	bool can_seek;
+	//! If this file is being fed by a pipe.
+	bool is_pipe;
 	//! Buffer Index, used as a batch index for insertion-order preservation
 	idx_t buffer_idx = 0;
 	//! -------- Allocated Block ---------//
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
index a1127882a718..b9b4bb92d372 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
@@ -22,7 +22,7 @@ class CSVStateMachine;
 class CSVBufferManager {
 public:
 	CSVBufferManager(ClientContext &context, const CSVReaderOptions &options, const string &file_path,
-	                 const idx_t file_idx, bool single_threaded);
+	                 const idx_t file_idx);
 	//! Returns a buffer from a buffer id (starting from 0). If it's in the auto-detection then we cache new buffers
 	//! Otherwise we remove them from the cache if they are already there, or just return them bypassing the cache.
 	shared_ptr<CSVBufferHandle> GetBuffer(const idx_t buffer_idx);
@@ -44,8 +44,6 @@ class CSVBufferManager {
 
 	string GetFilePath();
 
-	void SetSingleThreaded();
-
 	ClientContext &context;
 	idx_t skip_rows = 0;
 
@@ -71,7 +69,6 @@ class CSVBufferManager {
 	//! If the file_handle used seek
 	bool has_seeked = false;
 	unordered_set<idx_t> reset_when_possible;
-	bool single_threaded;
 };
 
 } // namespace duckdb
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
index c7e70b008ae7..95a3cc6dafc9 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
@@ -28,6 +28,7 @@ struct CSVFileHandle {
 	bool CanSeek();
 	void Seek(idx_t position);
 	bool OnDiskFile();
+	bool IsPipe();
 
 	idx_t FileSize();
 
@@ -50,6 +51,8 @@ struct CSVFileHandle {
 	string path;
 	bool can_seek = false;
 	bool on_disk_file = false;
+	bool is_pipe = false;
+
 	idx_t file_size = 0;
 
 	idx_t requested_bytes = 0;
diff --git a/src/main/relation/read_csv_relation.cpp b/src/main/relation/read_csv_relation.cpp
index 5d0b52e5c96d..1500720e0069 100644
--- a/src/main/relation/read_csv_relation.cpp
+++ b/src/main/relation/read_csv_relation.cpp
@@ -56,7 +56,7 @@ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context,
 
 	shared_ptr<CSVBufferManager> buffer_manager;
 	context->RunFunctionInTransaction([&]() {
-		buffer_manager = make_shared<CSVBufferManager>(*context, csv_options, files[0], 0, false);
+		buffer_manager = make_shared<CSVBufferManager>(*context, csv_options, files[0], 0);
 		CSVSniffer sniffer(csv_options, buffer_manager, CSVStateMachineCache::Get(*context));
 		auto sniffer_result = sniffer.SniffCSV();
 		auto &types = sniffer_result.return_types;

From 03cb16a6ca02a84bebea5448f904ff75ffe39312 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 25 Mar 2024 16:41:31 +0100
Subject: [PATCH 053/147] Not store buffers from gzipped files, reset buffer
 manager after sniffing, implement brute force seeking for gzipped files

---
 .../csv_scanner/buffer_manager/csv_buffer.cpp   |  4 ++--
 .../buffer_manager/csv_buffer_manager.cpp       | 13 +++++++++++++
 .../buffer_manager/csv_file_handle.cpp          | 17 ++++++++++++++---
 .../csv_scanner/sniffer/csv_sniffer.cpp         |  2 ++
 .../operator/csv_scanner/csv_buffer_manager.hpp |  1 +
 .../operator/csv_scanner/csv_file_handle.hpp    |  4 +++-
 6 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
index aaafd21331c6..2c85e0ee2924 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
@@ -36,7 +36,7 @@ shared_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_s
 	if (has_seaked) {
 		// This means that at some point a reload was done, and we are currently on the incorrect position in our file
 		// handle
-		file_handle.Seek(global_csv_start + actual_buffer_size);
+		file_handle.Seek(handle.Ptr(), actual_buffer_size, global_csv_start + actual_buffer_size);
 		has_seaked = false;
 	}
 	auto next_csv_buffer = make_shared<CSVBuffer>(file_handle, context, buffer_size,
@@ -62,7 +62,7 @@ idx_t CSVBuffer::GetBufferSize() {
 void CSVBuffer::Reload(CSVFileHandle &file_handle) {
 	AllocateBuffer(actual_buffer_size);
 	// If we can seek, we seek and return the correct pointers
-	file_handle.Seek(global_csv_start);
+	file_handle.Seek(handle.Ptr(), actual_buffer_size, global_csv_start);
 	file_handle.Read(handle.Ptr(), actual_buffer_size);
 }
 
diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
index 2a13158b6081..66a6e6ab3cec 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
@@ -122,6 +122,19 @@ bool CSVBufferManager::Done() {
 	return done;
 }
 
+void CSVBufferManager::ResetBufferManager() {
+	if (!file_handle->IsPipe()) {
+		// If this is not a pipe we reset the buffer manager and restart it when doing the actual scan
+		cached_buffers.clear();
+		reset_when_possible.clear();
+		file_handle->Reset();
+		last_buffer = nullptr;
+		done = false;
+		global_csv_pos = 0;
+		Initialize();
+	}
+}
+
 string CSVBufferManager::GetFilePath() {
 	return file_path;
 }
diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
index cf4bf9fafdf2..528246d8119d 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
@@ -32,14 +32,19 @@ bool CSVFileHandle::CanSeek() {
 	return can_seek;
 }
 
-void CSVFileHandle::Seek(idx_t position) {
+void CSVFileHandle::Seek(void *buffer, idx_t nr_bytes, idx_t position) {
 	if (!can_seek) {
 		if (is_pipe) {
 			throw InternalException("Can't reconstruct the buffer from a on disk file.");
 		}
-		//! If we can't seek in this file, we reset it and re-read up to the necessary point.
+		// If we can't seek in this file, we reset it and re-read up to the necessary point.
+		// This should only happen on extreme cases of memory pressure
 		file_handle->Reset();
-		//		file_handle->Read();
+		D_ASSERT(position % nr_bytes == 0);
+		for (idx_t i = 0; i < position / nr_bytes; i++) {
+			file_handle->Read(buffer, nr_bytes);
+		}
+		return;
 	}
 	file_handle->Seek(position);
 }
@@ -48,6 +53,12 @@ bool CSVFileHandle::OnDiskFile() {
 	return on_disk_file;
 }
 
+void CSVFileHandle::Reset() {
+	file_handle->Reset();
+	finished = false;
+	requested_bytes = 0;
+}
+
 bool CSVFileHandle::IsPipe() {
 	return is_pipe;
 }
diff --git a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
index 3b60f247aa27..a62aed3ca3d7 100644
--- a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
@@ -93,6 +93,8 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
 	DetectHeader();
 	// 5. Type Replacement
 	ReplaceTypes();
+	buffer_manager->ResetBufferManager();
+
 	if (!best_candidate->error_handler->errors.empty() && !options.ignore_errors) {
 		for (auto &error_vector : best_candidate->error_handler->errors) {
 			for (auto &error : error_vector.second) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
index b9b4bb92d372..f8c6f246c3e6 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
@@ -42,6 +42,7 @@ class CSVBufferManager {
 	//! once.
 	bool Done();
 
+	void ResetBufferManager();
 	string GetFilePath();
 
 	ClientContext &context;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
index 95a3cc6dafc9..7d4b55e424fd 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
@@ -26,10 +26,12 @@ struct CSVFileHandle {
 
 public:
 	bool CanSeek();
-	void Seek(idx_t position);
+	void Seek(void *buffer, idx_t nr_bytes, idx_t position);
 	bool OnDiskFile();
 	bool IsPipe();
 
+	void Reset();
+
 	idx_t FileSize();
 
 	bool FinishedReading();

From 4eba1ab60d5259db6a8badf8d8cedcdbdff72884 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 26 Mar 2024 13:55:10 +0100
Subject: [PATCH 054/147] fix old parameter

---
 .../operator/csv_scanner/scanner/string_value_scanner.cpp  | 2 +-
 .../csv_scanner/table_function/csv_file_scanner.cpp        | 5 ++---
 .../csv_scanner/table_function/global_csv_state.cpp        | 7 +++----
 .../execution/operator/csv_scanner/csv_file_scanner.hpp    | 5 ++---
 4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index b1b2afa39c98..9582e1c1af2f 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -532,7 +532,7 @@ unique_ptr<StringValueScanner> StringValueScanner::GetCSVScanner(ClientContext &
 	state_machine->dialect_options.header = options.dialect_options.header;
 	auto buffer_manager = make_shared<CSVBufferManager>(context, options, options.file_path, 0);
 	auto scanner = make_uniq<StringValueScanner>(buffer_manager, state_machine, make_shared<CSVErrorHandler>());
-	scanner->csv_file_scan = make_shared<CSVFileScan>(context, options.file_path, options, false);
+	scanner->csv_file_scan = make_shared<CSVFileScan>(context, options.file_path, options);
 	scanner->csv_file_scan->InitializeProjection();
 	return scanner;
 }
diff --git a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
index 7d975221c3ac..0532fc678a41 100644
--- a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
+++ b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
@@ -41,7 +41,7 @@ CSVFileScan::CSVFileScan(ClientContext &context, shared_ptr<CSVBufferManager> bu
 
 CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, const CSVReaderOptions &options_p,
                          const idx_t file_idx_p, const ReadCSVData &bind_data, const vector<column_t> &column_ids,
-                         const vector<LogicalType> &file_schema, bool single_threaded)
+                         const vector<LogicalType> &file_schema)
     : file_path(file_path_p), file_idx(file_idx_p),
       error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors)), options(options_p) {
 	if (file_idx < bind_data.union_readers.size()) {
@@ -128,8 +128,7 @@ CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, cons
 	InitializeFileNamesTypes();
 }
 
-CSVFileScan::CSVFileScan(ClientContext &context, const string &file_name, CSVReaderOptions &options_p,
-                         bool single_threaded)
+CSVFileScan::CSVFileScan(ClientContext &context, const string &file_name, CSVReaderOptions &options_p)
     : file_path(file_name), file_idx(0), error_handler(make_shared<CSVErrorHandler>(options_p.ignore_errors)),
       options(options_p) {
 	buffer_manager = make_shared<CSVBufferManager>(context, options, file_path, file_idx);
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 524482e0de55..e92ed51273ef 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -22,7 +22,7 @@ CSVGlobalState::CSVGlobalState(ClientContext &context_p, const shared_ptr<CSVBuf
 	} else {
 		// If not we need to construct it for the first file
 		file_scans.emplace_back(
-		    make_uniq<CSVFileScan>(context, files[0], options, 0, bind_data, column_ids, file_schema, single_threaded));
+		    make_uniq<CSVFileScan>(context, files[0], options, 0, bind_data, column_ids, file_schema));
 	};
 	//! There are situations where we only support single threaded scanning
 	bool many_csv_files = files.size() > 1 && files.size() > system_threads * 2;
@@ -68,8 +68,7 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next() {
 		} else {
 			lock_guard<mutex> parallel_lock(main_mutex);
 			file_scans.emplace_back(make_shared<CSVFileScan>(context, bind_data.files[cur_idx], bind_data.options,
-			                                                 cur_idx, bind_data, column_ids, file_schema,
-			                                                 single_threaded));
+			                                                 cur_idx, bind_data, column_ids, file_schema));
 			current_file = file_scans.back();
 		}
 		auto csv_scanner =
@@ -101,7 +100,7 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next() {
 			// If we have a next file we have to construct the file scan for that
 			file_scans.emplace_back(make_shared<CSVFileScan>(context, bind_data.files[current_file_idx],
 			                                                 bind_data.options, current_file_idx, bind_data, column_ids,
-			                                                 file_schema, single_threaded));
+			                                                 file_schema));
 			// And re-start the boundary-iterator
 			auto buffer_size = file_scans.back()->buffer_manager->GetBuffer(0)->actual_size;
 			current_boundary = CSVIterator(current_file_idx, 0, 0, 0, buffer_size);
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp
index ed859238d9ef..ce9fc08ce0bd 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp
@@ -27,10 +27,9 @@ class CSVFileScan {
 	//! Path to this file
 	CSVFileScan(ClientContext &context, const string &file_path, const CSVReaderOptions &options, const idx_t file_idx,
 	            const ReadCSVData &bind_data, const vector<column_t> &column_ids,
-	            const vector<LogicalType> &file_schema, bool single_threaded);
+	            const vector<LogicalType> &file_schema);
 
-	CSVFileScan(ClientContext &context, const string &file_name, CSVReaderOptions &options,
-	            bool single_threaded = false);
+	CSVFileScan(ClientContext &context, const string &file_name, CSVReaderOptions &options);
 
 	const string &GetFileName();
 	const vector<string> &GetNames();

From ce9507077bbc837779c4ea60b711a271dbd113d5 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 26 Mar 2024 15:34:47 +0100
Subject: [PATCH 055/147] Cleanup buffer managers

---
 .../table_function/csv_file_scanner.cpp         |  5 +++++
 .../table_function/global_csv_state.cpp         | 17 +++++++++++++++--
 src/function/table/read_csv.cpp                 |  4 ++--
 .../operator/csv_scanner/csv_file_scanner.hpp   |  1 +
 .../operator/csv_scanner/global_csv_state.hpp   |  4 +++-
 5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
index 0532fc678a41..f2ae71dbb8f8 100644
--- a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
+++ b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
@@ -222,4 +222,9 @@ void CSVFileScan::InitializeProjection() {
 		reader_data.column_mapping.push_back(i);
 	}
 }
+
+void CSVFileScan::Finish() {
+	buffer_manager.reset();
+}
+
 } // namespace duckdb
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 00a9052e7e48..9e8afd52a404 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -56,7 +56,7 @@ double CSVGlobalState::GetProgress(const ReadCSVData &bind_data_p) const {
 	return percentage * 100;
 }
 
-unique_ptr<StringValueScanner> CSVGlobalState::Next() {
+unique_ptr<StringValueScanner> CSVGlobalState::Next(StringValueScanner *previous_scanner) {
 	if (single_threaded) {
 		idx_t cur_idx = last_file_idx++;
 		if (cur_idx >= bind_data.files.size()) {
@@ -71,6 +71,12 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next() {
 			                                                 cur_idx, bind_data, column_ids, file_schema));
 			current_file = file_scans.back();
 		}
+		if (previous_scanner) {
+			lock_guard<mutex> parallel_lock(main_mutex);
+			previous_scanner->buffer_tracker.reset();
+			current_buffer_in_use.reset();
+			previous_scanner->csv_file_scan->Finish();
+		}
 		auto csv_scanner =
 		    make_uniq<StringValueScanner>(scanner_idx++, current_file->buffer_manager, current_file->state_machine,
 		                                  current_file->error_handler, current_file, false, current_boundary);
@@ -89,7 +95,14 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next() {
 	auto csv_scanner =
 	    make_uniq<StringValueScanner>(scanner_idx++, current_file.buffer_manager, current_file.state_machine,
 	                                  current_file.error_handler, file_scans.back(), false, current_boundary);
-
+	threads_per_file[csv_scanner->csv_file_scan->file_idx]++;
+	if (previous_scanner) {
+		threads_per_file[previous_scanner->csv_file_scan->file_idx]--;
+		if (threads_per_file[previous_scanner->csv_file_scan->file_idx] == 0) {
+			previous_scanner->buffer_tracker.reset();
+			previous_scanner->csv_file_scan->Finish();
+		}
+	}
 	csv_scanner->buffer_tracker = current_buffer_in_use;
 
 	// We then produce the next boundary
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index 8d2e1be0d780..71e6f2255a7c 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -185,7 +185,7 @@ unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context,
 		return nullptr;
 	}
 	auto &global_state = global_state_p->Cast<CSVGlobalState>();
-	auto csv_scanner = global_state.Next();
+	auto csv_scanner = global_state.Next(nullptr);
 	if (!csv_scanner) {
 		global_state.DecrementThread();
 	}
@@ -211,7 +211,7 @@ static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p,
 			break;
 		}
 		if (csv_local_state.csv_reader->FinishedIterator()) {
-			csv_local_state.csv_reader = csv_global_state.Next();
+			csv_local_state.csv_reader = csv_global_state.Next(csv_local_state.csv_reader.get());
 			if (!csv_local_state.csv_reader) {
 				csv_global_state.DecrementThread();
 				break;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp
index ce9fc08ce0bd..0ba7c0e02dcd 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp
@@ -35,6 +35,7 @@ class CSVFileScan {
 	const vector<string> &GetNames();
 	const vector<LogicalType> &GetTypes();
 	void InitializeProjection();
+	void Finish();
 
 	//! Initialize the actual names and types to be scanned from the file
 	void InitializeFileNamesTypes();
diff --git a/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp b/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp
index bbeb2bfee094..4d123480f45c 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp
@@ -30,7 +30,7 @@ struct CSVGlobalState : public GlobalTableFunctionState {
 
 	//! Generates a CSV Scanner, with information regarding the piece of buffer it should be read.
 	//! In case it returns a nullptr it means we are done reading these files.
-	unique_ptr<StringValueScanner> Next();
+	unique_ptr<StringValueScanner> Next(StringValueScanner *previous_scanner);
 
 	void FillRejectsTable();
 
@@ -75,6 +75,8 @@ struct CSVGlobalState : public GlobalTableFunctionState {
 
 	atomic<idx_t> last_file_idx;
 	shared_ptr<CSVBufferUsage> current_buffer_in_use;
+
+	unordered_map<idx_t, idx_t> threads_per_file;
 };
 
 } // namespace duckdb

From a2ee8fc86e956b5cf9592c24f5be4e75b4b0f038 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 26 Mar 2024 15:58:54 +0100
Subject: [PATCH 056/147] IsPipe should check pipe

---
 .../operator/csv_scanner/buffer_manager/csv_buffer.cpp        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
index 2c85e0ee2924..0886a58f1a35 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
@@ -5,7 +5,7 @@ namespace duckdb {
 
 CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
                      idx_t &global_csv_current_position, idx_t file_number_p)
-    : context(context), file_number(file_number_p), can_seek(file_handle.CanSeek()), is_pipe(file_handle.OnDiskFile()) {
+    : context(context), file_number(file_number_p), can_seek(file_handle.CanSeek()), is_pipe(file_handle.IsPipe()) {
 	AllocateBuffer(buffer_size_p);
 	auto buffer = Ptr();
 	actual_buffer_size = file_handle.Read(buffer, buffer_size_p);
@@ -20,7 +20,7 @@ CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle
 CSVBuffer::CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t buffer_size,
                      idx_t global_csv_current_position, idx_t file_number_p, idx_t buffer_idx_p)
     : context(context), global_csv_start(global_csv_current_position), file_number(file_number_p),
-      can_seek(file_handle.CanSeek()), is_pipe(file_handle.OnDiskFile()), buffer_idx(buffer_idx_p) {
+      can_seek(file_handle.CanSeek()), is_pipe(file_handle.IsPipe()), buffer_idx(buffer_idx_p) {
 	AllocateBuffer(buffer_size);
 	auto buffer = handle.Ptr();
 	actual_buffer_size = file_handle.Read(handle.Ptr(), buffer_size);

From 3fdf469f191372f6932f41cb070532154ae32162 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 26 Mar 2024 18:12:42 +0100
Subject: [PATCH 057/147] fix small pipe bug

---
 .../operator/csv_scanner/buffer_manager/csv_buffer.cpp          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
index 0886a58f1a35..7d2913e22e81 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
@@ -68,7 +68,7 @@ void CSVBuffer::Reload(CSVFileHandle &file_handle) {
 
 shared_ptr<CSVBufferHandle> CSVBuffer::Pin(CSVFileHandle &file_handle, bool &has_seeked) {
 	auto &buffer_manager = BufferManager::GetBufferManager(context);
-	if (is_pipe && block->IsUnloaded()) {
+	if (!is_pipe && block->IsUnloaded()) {
 		// We have to reload it from disk
 		block = nullptr;
 		Reload(file_handle);

From 111e9eefef6d7f253258a6fd1dca867975c5b8ef Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 2 Apr 2024 12:28:26 +0200
Subject: [PATCH 058/147] PR requests and adding/running big tests

---
 .../csv_scanner/buffer_manager/csv_buffer.cpp |  4 +-
 .../buffer_manager/csv_file_handle.cpp        | 13 ++---
 .../csv_scanner/sniffer/csv_sniffer.cpp       |  8 ++-
 .../table_function/global_csv_state.cpp       |  4 +-
 .../operator/csv_scanner/csv_file_handle.hpp  |  2 +-
 .../operator/csv_scanner/global_csv_state.hpp |  2 +-
 .../copy/csv/test_big_compressed.test_slow    | 32 +++++++++++
 ...est_multiple_big_compressed_csvs.test_slow | 53 +++++++++++++++++++
 8 files changed, 101 insertions(+), 17 deletions(-)
 create mode 100644 test/sql/copy/csv/test_big_compressed.test_slow
 create mode 100644 test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow

diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
index 7d2913e22e81..79a1e8fd762b 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp
@@ -36,7 +36,7 @@ shared_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_s
 	if (has_seaked) {
 		// This means that at some point a reload was done, and we are currently on the incorrect position in our file
 		// handle
-		file_handle.Seek(handle.Ptr(), actual_buffer_size, global_csv_start + actual_buffer_size);
+		file_handle.Seek(global_csv_start + actual_buffer_size);
 		has_seaked = false;
 	}
 	auto next_csv_buffer = make_shared<CSVBuffer>(file_handle, context, buffer_size,
@@ -62,7 +62,7 @@ idx_t CSVBuffer::GetBufferSize() {
 void CSVBuffer::Reload(CSVFileHandle &file_handle) {
 	AllocateBuffer(actual_buffer_size);
 	// If we can seek, we seek and return the correct pointers
-	file_handle.Seek(handle.Ptr(), actual_buffer_size, global_csv_start);
+	file_handle.Seek(global_csv_start);
 	file_handle.Read(handle.Ptr(), actual_buffer_size);
 }
 
diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
index 9db0131db294..d37e38be14ec 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
@@ -32,19 +32,12 @@ bool CSVFileHandle::CanSeek() {
 	return can_seek;
 }
 
-void CSVFileHandle::Seek(void *buffer, idx_t nr_bytes, idx_t position) {
+void CSVFileHandle::Seek(idx_t position) {
 	if (!can_seek) {
 		if (is_pipe) {
-			throw InternalException("Can't reconstruct the buffer from a on disk file.");
+			throw InternalException("Trying to seek a piped CSV File.");
 		}
-		// If we can't seek in this file, we reset it and re-read up to the necessary point.
-		// This should only happen on extreme cases of memory pressure
-		file_handle->Reset();
-		D_ASSERT(position % nr_bytes == 0);
-		for (idx_t i = 0; i < position / nr_bytes; i++) {
-			file_handle->Read(buffer, nr_bytes);
-		}
-		return;
+		throw InternalException("Trying to seek a compressed CSV File.");
 	}
 	file_handle->Seek(position);
 }
diff --git a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
index a62aed3ca3d7..057120591132 100644
--- a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
@@ -93,7 +93,13 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
 	DetectHeader();
 	// 5. Type Replacement
 	ReplaceTypes();
-	buffer_manager->ResetBufferManager();
+
+	// We reset the buffer for compressed files
+	// This is done because we can't easily seek on compressed files, if a buffer goes out of scope we must read from
+	// the start
+	if (!buffer_manager->file_handle->uncompressed) {
+		buffer_manager->ResetBufferManager();
+	}
 
 	if (!best_candidate->error_handler->errors.empty() && !options.ignore_errors) {
 		for (auto &error_vector : best_candidate->error_handler->errors) {
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 9e8afd52a404..a3cca5a55ea2 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -24,7 +24,7 @@ CSVGlobalState::CSVGlobalState(ClientContext &context_p, const shared_ptr<CSVBuf
 		file_scans.emplace_back(
 		    make_uniq<CSVFileScan>(context, files[0], options, 0, bind_data, column_ids, file_schema));
 	};
-	//! There are situations where we only support single threaded scanning
+	// There are situations where we only support single threaded scanning
 	bool many_csv_files = files.size() > 1 && files.size() > system_threads * 2;
 	single_threaded = many_csv_files || !options.parallel;
 	last_file_idx = 0;
@@ -56,7 +56,7 @@ double CSVGlobalState::GetProgress(const ReadCSVData &bind_data_p) const {
 	return percentage * 100;
 }
 
-unique_ptr<StringValueScanner> CSVGlobalState::Next(StringValueScanner *previous_scanner) {
+unique_ptr<StringValueScanner> CSVGlobalState::Next(optional_ptr<StringValueScanner> previous_scanner) {
 	if (single_threaded) {
 		idx_t cur_idx = last_file_idx++;
 		if (cur_idx >= bind_data.files.size()) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
index 7d4b55e424fd..43e4e275583e 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
@@ -26,7 +26,7 @@ struct CSVFileHandle {
 
 public:
 	bool CanSeek();
-	void Seek(void *buffer, idx_t nr_bytes, idx_t position);
+	void Seek(idx_t position);
 	bool OnDiskFile();
 	bool IsPipe();
 
diff --git a/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp b/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp
index 4d123480f45c..648948c19e37 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp
@@ -30,7 +30,7 @@ struct CSVGlobalState : public GlobalTableFunctionState {
 
 	//! Generates a CSV Scanner, with information regarding the piece of buffer it should be read.
 	//! In case it returns a nullptr it means we are done reading these files.
-	unique_ptr<StringValueScanner> Next(StringValueScanner *previous_scanner);
+	unique_ptr<StringValueScanner> Next(optional_ptr<StringValueScanner> previous_scanner);
 
 	void FillRejectsTable();
 
diff --git a/test/sql/copy/csv/test_big_compressed.test_slow b/test/sql/copy/csv/test_big_compressed.test_slow
new file mode 100644
index 000000000000..eaa766ac2fa6
--- /dev/null
+++ b/test/sql/copy/csv/test_big_compressed.test_slow
@@ -0,0 +1,32 @@
+# name: test/sql/copy/csv/test_big_compressed.test_slow
+# description: Test scan over multiple compressed big csv files
+# group: [csv]
+
+# This test is way too slow to run on CI, generating a SF100 TPC-H lineitem file takes a LOT of time.
+# Still useful to check for problems locally.
+mode skip
+
+require tpch
+
+statement ok
+CALL dbgen(sf=100);
+
+statement ok
+copy lineitem to '__TEST_DIR__/lineitem_100.csv.gz';
+
+statement ok
+SET temp_directory=''
+
+# load the DB from disk (Avoids OOM when generating ze table)
+load __TEST_DIR__/lineitem_100_compressed.db
+
+statement ok
+CREATE TABLE lineitem_2(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity DECIMAL(15,2) NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
+
+statement ok
+INSERT INTO lineitem_2 FROM '__TEST_DIR__/lineitem_100.csv.gz';
+
+query I
+select count(*) from lineitem_2
+----
+600037902
\ No newline at end of file
diff --git a/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
new file mode 100644
index 000000000000..f4183fb24401
--- /dev/null
+++ b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
@@ -0,0 +1,53 @@
+# name: test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
+# description: Test scan over multiple compressed big csv files
+# group: [csv]
+
+require tpch
+
+statement ok
+CALL dbgen(sf=10);
+
+statement ok
+copy lineitem to ' __TEST_DIR__/lineitem.csv.gz';
+
+statement ok
+SET temp_directory=''
+
+# load the DB from disk (Avoids OOM when generating ze table)
+load __TEST_DIR__/lineitem_compressed.db
+
+statement ok
+CREATE TABLE lineitem_2(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity DECIMAL(15,2) NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
+
+statement ok
+INSERT INTO lineitem_2 FROM read_csv([
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+	' __TEST_DIR__/lineitem.csv.gz',
+]);
+
+query I
+select count(*) from lineitem_2
+----
+1439665248
\ No newline at end of file

From 55112d11ec4612eea3db506b162e9d69a4f98ddd Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 2 Apr 2024 13:22:59 +0200
Subject: [PATCH 059/147] Woopsie on path

---
 ...est_multiple_big_compressed_csvs.test_slow | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
index f4183fb24401..07b99f83c601 100644
--- a/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
+++ b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
@@ -8,7 +8,7 @@ statement ok
 CALL dbgen(sf=10);
 
 statement ok
-copy lineitem to ' __TEST_DIR__/lineitem.csv.gz';
+copy lineitem to '__TEST_DIR__/lineitem.csv.gz';
 
 statement ok
 SET temp_directory=''
@@ -21,30 +21,30 @@ CREATE TABLE lineitem_2(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL,
 
 statement ok
 INSERT INTO lineitem_2 FROM read_csv([
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
-	' __TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
+	'__TEST_DIR__/lineitem.csv.gz',
 ]);
 
 query I

From 30beaa82080cd95bb7ea52516460f120f94bc199 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 2 Apr 2024 18:11:00 +0200
Subject: [PATCH 060/147] More on merge

---
 .github/regression/micro_extended.csv         |  1 +
 .../scanner/string_value_scanner.cpp          | 21 +++----
 .../csv_scanner/sniffer/dialect_detection.cpp |  4 +-
 .../operator/csv_scanner/util/csv_error.cpp   | 58 +++++++++----------
 .../operator/csv_scanner/csv_error.hpp        |  3 +-
 src/storage/serialization/serialize_nodes.cpp | 56 +++++++++---------
 6 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/.github/regression/micro_extended.csv b/.github/regression/micro_extended.csv
index 6973785b4c98..a9517ef309b4 100644
--- a/.github/regression/micro_extended.csv
+++ b/.github/regression/micro_extended.csv
@@ -78,6 +78,7 @@ benchmark/micro/copy/to_parquet_partition_by_few.benchmark
 benchmark/micro/copy/to_parquet_partition_by_many.benchmark
 benchmark/micro/csv/16_byte_values.benchmark
 benchmark/micro/csv/1_byte_values.benchmark
+benchmark/micro/csv/1brl.benchmark
 benchmark/micro/csv/multiple_read.benchmark
 benchmark/micro/csv/multiple_small_read_csv.benchmark
 benchmark/micro/csv/null_padding.benchmark
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index f8018e5fcdde..448dc8382c4d 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -380,10 +380,10 @@ bool StringValueResult::HandleError() {
 			    line_pos.GetGlobalPosition(requested_size, first_nl));
 			break;
 		case CSVErrorType::CAST_ERROR:
-			csv_error = CSVError::CastError(state_machine.options, names[cur_error.col_idx], cur_error.error_message,
-			                                cur_error.col_idx, borked_line, lines_per_batch,
-			                                current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
-			                                line_pos.GetGlobalPosition(requested_size, first_nl));
+			csv_error = CSVError::CastError(
+			    state_machine.options, names[cur_error.col_idx], cur_error.error_message, cur_error.col_idx,
+			    borked_line, lines_per_batch, current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+			    line_pos.GetGlobalPosition(requested_size, first_nl), parse_types[cur_error.col_idx].first);
 			break;
 		default:
 			throw InvalidInputException("CSV Error not allowed when inserting row");
@@ -730,11 +730,8 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 				auto csv_error = CSVError::CastError(
 				    state_machine->options, csv_file_scan->names[col_idx], error_message, col_idx, borked_line,
 				    lines_per_batch,
-				    result.line_positions_per_row[line_error].begin.GetGlobalPosition(result.result_size, first_nl),
-				    -1);
-					auto csv_error =
-				    CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx], error_message, col_idx,
-				                        row, lines_per_batch, result_vector.GetType().id());
+				    result.line_positions_per_row[line_error].begin.GetGlobalPosition(result.result_size, first_nl), -1,
+				    result_vector.GetType().id());
 
 				error_handler->Error(csv_error);
 			}
@@ -758,11 +755,7 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
 					    state_machine->options, csv_file_scan->names[col_idx], error_message, col_idx, borked_line,
 					    lines_per_batch,
 					    result.line_positions_per_row[line_error].begin.GetGlobalPosition(result.result_size, first_nl),
-					    -1);
-					auto csv_error =
-					    CSVError::CastError(state_machine->options, csv_file_scan->names[col_idx], error_message,
-					                        col_idx, row, lines_per_batch, result_vector.GetType().id());
-
+					    -1, result_vector.GetType().id());
 					error_handler->Error(csv_error);
 				}
 			}
diff --git a/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp b/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
index 03173547fa98..e6ffae7e30e7 100644
--- a/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
@@ -185,7 +185,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 		best_consistent_rows = consistent_rows;
 		max_columns_found = num_cols;
 		prev_padding_count = padding_count;
-		if (!options.null_padding && !options.ignore_errors) {
+		if (!options.null_padding && !options.ignore_errors.GetValue()) {
 			sniffing_state_machine.dialect_options.skip_rows = start_row;
 		} else {
 			sniffing_state_machine.dialect_options.skip_rows = options.dialect_options.skip_rows.GetValue();
@@ -210,7 +210,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 			}
 		}
 		if (!same_quote_is_candidate) {
-			if (!options.null_padding && !options.ignore_errors) {
+			if (!options.null_padding && !options.ignore_errors.GetValue()) {
 				sniffing_state_machine.dialect_options.skip_rows = start_row;
 			} else {
 				sniffing_state_machine.dialect_options.skip_rows = options.dialect_options.skip_rows.GetValue();
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index b6236553fbf0..7c7260c5d2e4 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -114,9 +114,8 @@ CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_
 }
 
 CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_name, string &cast_error, idx_t column_idx,
-                             vector<Value> &row, LinesPerBoundary error_info, LogicalTypeId type) {
                              string &csv_row, LinesPerBoundary error_info, idx_t row_byte_position,
-                             int64_t byte_position) {
+                             int64_t byte_position, LogicalTypeId type) {
 	std::ostringstream error;
 	// Which column
 	error << "Error when converting column \"" << column_name << "\"." << '\n';
@@ -139,16 +138,16 @@ CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_nam
 		      << '\n';
 	}
 
-return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info, row_byte_position,
+	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info, row_byte_position,
 	                byte_position, options);
-	                         }
+}
 
 CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
                                  string &csv_row, idx_t byte_position) {
 	std::ostringstream error;
 	error << "Maximum line size of " << options.maximum_line_size << " bytes exceeded. ";
 	error << "Actual Size:" << actual_size << " bytes." << '\n';
-return CSVError(error.str(), CSVErrorType::MAXIMUM_LINE_SIZE, 0, csv_row, error_info, byte_position, byte_position,
+	return CSVError(error.str(), CSVErrorType::MAXIMUM_LINE_SIZE, 0, csv_row, error_info, byte_position, byte_position,
 	                options);
 }
 
@@ -176,45 +175,44 @@ CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_
 	std::ostringstream error;
 	error << "Value with unterminated quote found." << '\n';
 	error << '\n';
-return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info,
-	                row_byte_position, byte_position, options);}
+	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info,
+	                row_byte_position, byte_position, options);
+}
 
 CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, idx_t actual_columns,
                                               LinesPerBoundary error_info, string &csv_row, idx_t row_byte_position,
                                               int64_t byte_position) {
 	std::ostringstream error;
 
-
 	// How many columns were expected and how many were found
-//	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns + 1;
-//	if (actual_columns >= options.dialect_options.num_cols) {
-//		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info,
-//		                row_byte_position, byte_position, options);
-//	} else {
-//		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info,
-//		                row_byte_position, byte_position, options);
-//	}
-
-	// How many columns were expected and how many were found
-	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns << '\n';
-	error << '\n' << "Possible fixes:" << '\n';
-	if (!options.null_padding) {
-		error << "* Enable null padding (null_padding=true) to replace missing values with NULL" << '\n';
-	}
-	if (!options.ignore_errors) {
-		error << "* Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
+	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns + 1;
+	if (actual_columns >= options.dialect_options.num_cols) {
+		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info,
+		                row_byte_position, byte_position, options);
+	} else {
+		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info,
+		                row_byte_position, byte_position, options);
 	}
-	error << '\n';
-	// What were the options
-	error << options.ToString();
-	return CSVError(error.str(), CSVErrorType::INCORRECT_COLUMN_AMOUNT, error_info);
+
+	//	// How many columns were expected and how many were found
+	//	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns <<
+	//'\n'; 	error << '\n' << "Possible fixes:" << '\n'; 	if (!options.null_padding) { 		error << "* Enable null padding
+	//(null_padding=true) to replace missing values with NULL" << '\n';
+	//	}
+	//	if (!options.ignore_errors) {
+	//		error << "* Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
+	//	}
+	//	error << '\n';
+	//	// What were the options
+	//	error << options.ToString();
+	//	return CSVError(error.str(), CSVErrorType::INCORRECT_COLUMN_AMOUNT, error_info);
 }
 
 CSVError CSVError::InvalidUTF8(const CSVReaderOptions &options, idx_t current_column, LinesPerBoundary error_info,
                                string &csv_row, idx_t row_byte_position, int64_t byte_position) {
 	std::ostringstream error;
 	// How many columns were expected and how many were found
-	error << "Invalid unicode (byte sequence mismatch) detected."<< '\n';
+	error << "Invalid unicode (byte sequence mismatch) detected." << '\n';
 	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, current_column, csv_row, error_info, row_byte_position,
 	                byte_position, options);
 }
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index ea44c54462ff..1ffc21423344 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -59,8 +59,7 @@ class CSVError {
 	//! Produces error messages for casting errors
 	static CSVError CastError(const CSVReaderOptions &options, string &column_name, string &cast_error,
 	                          idx_t column_idx, string &csv_row, LinesPerBoundary error_info, idx_t row_byte_position,
-	                          int64_t byte_position);
-	                          idx_t column_idx, vector<Value> &row, LinesPerBoundary error_info, LogicalTypeId type);
+	                          int64_t byte_position, LogicalTypeId type);
 	//! Produces error for when the line size exceeds the maximum line size option
 	static CSVError LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
 	                              string &csv_row, idx_t byte_position);
diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp
index 44531988a2cd..2359d127ef24 100644
--- a/src/storage/serialization/serialize_nodes.cpp
+++ b/src/storage/serialization/serialize_nodes.cpp
@@ -118,7 +118,7 @@ CSVOption<T> CSVOption<T>::Deserialize(Deserializer &deserializer) {
 }
 
 void CSVReaderOptions::Serialize(Serializer &serializer) const {
-	serializer.WritePropertyWithDefault<bool>(100, "ignore_errors", ignore_errors);
+	serializer.WriteProperty<CSVOption<bool>>(100, "ignore_errors", ignore_errors);
 	serializer.WritePropertyWithDefault<idx_t>(101, "buffer_sample_size", buffer_sample_size);
 	serializer.WritePropertyWithDefault<string>(102, "null_str", null_str);
 	serializer.WriteProperty<FileCompressionType>(103, "compression", compression);
@@ -135,26 +135,26 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WritePropertyWithDefault<idx_t>(114, "buffer_size", buffer_size);
 	serializer.WriteProperty<MultiFileReaderOptions>(115, "file_options", file_options);
 	serializer.WritePropertyWithDefault<vector<bool>>(116, "force_quote", force_quote);
-	serializer.WritePropertyWithDefault<string>(117, "rejects_table_name", rejects_table_name);
+	serializer.WriteProperty<CSVOption<bool>>(117, "store_rejects", store_rejects);
 	serializer.WritePropertyWithDefault<idx_t>(118, "rejects_limit", rejects_limit);
-	serializer.WritePropertyWithDefault<vector<string>>(119, "rejects_recovery_columns", rejects_recovery_columns);
-	serializer.WritePropertyWithDefault<vector<idx_t>>(120, "rejects_recovery_column_ids", rejects_recovery_column_ids);
-	serializer.WriteProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
-	serializer.WriteProperty<CSVOption<char>>(122, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
-	serializer.WriteProperty<CSVOption<char>>(123, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
-	serializer.WriteProperty<CSVOption<bool>>(124, "dialect_options.header", dialect_options.header);
-	serializer.WritePropertyWithDefault<idx_t>(125, "dialect_options.num_cols", dialect_options.num_cols);
-	serializer.WriteProperty<CSVOption<NewLineIdentifier>>(126, "dialect_options.state_machine_options.new_line", dialect_options.state_machine_options.new_line);
-	serializer.WriteProperty<CSVOption<idx_t>>(127, "dialect_options.skip_rows", dialect_options.skip_rows);
-	serializer.WriteProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(128, "dialect_options.date_format", dialect_options.date_format);
-	serializer.WritePropertyWithDefault<string>(129, "sniffer_user_mismatch_error", sniffer_user_mismatch_error);
-	serializer.WritePropertyWithDefault<bool>(130, "parallel", parallel);
+	serializer.WriteProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
+	serializer.WriteProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
+	serializer.WriteProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
+	serializer.WriteProperty<CSVOption<bool>>(122, "dialect_options.header", dialect_options.header);
+	serializer.WritePropertyWithDefault<idx_t>(123, "dialect_options.num_cols", dialect_options.num_cols);
+	serializer.WriteProperty<CSVOption<NewLineIdentifier>>(124, "dialect_options.state_machine_options.new_line", dialect_options.state_machine_options.new_line);
+	serializer.WriteProperty<CSVOption<idx_t>>(125, "dialect_options.skip_rows", dialect_options.skip_rows);
+	serializer.WriteProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", dialect_options.date_format);
+	serializer.WritePropertyWithDefault<string>(127, "sniffer_user_mismatch_error", sniffer_user_mismatch_error);
+	serializer.WritePropertyWithDefault<bool>(128, "parallel", parallel);
+	serializer.WriteProperty<CSVOption<string>>(129, "rejects_table_name", rejects_table_name);
+	serializer.WriteProperty<CSVOption<string>>(130, "rejects_scan_name", rejects_scan_name);
 	serializer.WritePropertyWithDefault<vector<bool>>(131, "was_type_manually_set", was_type_manually_set);
 }
 
 CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	CSVReaderOptions result;
-	deserializer.ReadPropertyWithDefault<bool>(100, "ignore_errors", result.ignore_errors);
+	deserializer.ReadProperty<CSVOption<bool>>(100, "ignore_errors", result.ignore_errors);
 	deserializer.ReadPropertyWithDefault<idx_t>(101, "buffer_sample_size", result.buffer_sample_size);
 	deserializer.ReadPropertyWithDefault<string>(102, "null_str", result.null_str);
 	deserializer.ReadProperty<FileCompressionType>(103, "compression", result.compression);
@@ -171,20 +171,20 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	deserializer.ReadPropertyWithDefault<idx_t>(114, "buffer_size", result.buffer_size);
 	deserializer.ReadProperty<MultiFileReaderOptions>(115, "file_options", result.file_options);
 	deserializer.ReadPropertyWithDefault<vector<bool>>(116, "force_quote", result.force_quote);
-	deserializer.ReadPropertyWithDefault<string>(117, "rejects_table_name", result.rejects_table_name);
+	deserializer.ReadProperty<CSVOption<bool>>(117, "store_rejects", result.store_rejects);
 	deserializer.ReadPropertyWithDefault<idx_t>(118, "rejects_limit", result.rejects_limit);
-	deserializer.ReadPropertyWithDefault<vector<string>>(119, "rejects_recovery_columns", result.rejects_recovery_columns);
-	deserializer.ReadPropertyWithDefault<vector<idx_t>>(120, "rejects_recovery_column_ids", result.rejects_recovery_column_ids);
-	deserializer.ReadProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
-	deserializer.ReadProperty<CSVOption<char>>(122, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
-	deserializer.ReadProperty<CSVOption<char>>(123, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
-	deserializer.ReadProperty<CSVOption<bool>>(124, "dialect_options.header", result.dialect_options.header);
-	deserializer.ReadPropertyWithDefault<idx_t>(125, "dialect_options.num_cols", result.dialect_options.num_cols);
-	deserializer.ReadProperty<CSVOption<NewLineIdentifier>>(126, "dialect_options.state_machine_options.new_line", result.dialect_options.state_machine_options.new_line);
-	deserializer.ReadProperty<CSVOption<idx_t>>(127, "dialect_options.skip_rows", result.dialect_options.skip_rows);
-	deserializer.ReadProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(128, "dialect_options.date_format", result.dialect_options.date_format);
-	deserializer.ReadPropertyWithDefault<string>(129, "sniffer_user_mismatch_error", result.sniffer_user_mismatch_error);
-	deserializer.ReadPropertyWithDefault<bool>(130, "parallel", result.parallel);
+	deserializer.ReadProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
+	deserializer.ReadProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
+	deserializer.ReadProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
+	deserializer.ReadProperty<CSVOption<bool>>(122, "dialect_options.header", result.dialect_options.header);
+	deserializer.ReadPropertyWithDefault<idx_t>(123, "dialect_options.num_cols", result.dialect_options.num_cols);
+	deserializer.ReadProperty<CSVOption<NewLineIdentifier>>(124, "dialect_options.state_machine_options.new_line", result.dialect_options.state_machine_options.new_line);
+	deserializer.ReadProperty<CSVOption<idx_t>>(125, "dialect_options.skip_rows", result.dialect_options.skip_rows);
+	deserializer.ReadProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", result.dialect_options.date_format);
+	deserializer.ReadPropertyWithDefault<string>(127, "sniffer_user_mismatch_error", result.sniffer_user_mismatch_error);
+	deserializer.ReadPropertyWithDefault<bool>(128, "parallel", result.parallel);
+	deserializer.ReadProperty<CSVOption<string>>(129, "rejects_table_name", result.rejects_table_name);
+	deserializer.ReadProperty<CSVOption<string>>(130, "rejects_scan_name", result.rejects_scan_name);
 	deserializer.ReadPropertyWithDefault<vector<bool>>(131, "was_type_manually_set", result.was_type_manually_set);
 	return result;
 }

From 23680a061fde7a20ec711bc6645996e6c149d73e Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 2 Apr 2024 18:53:06 +0200
Subject: [PATCH 061/147] Forcing fix message to be set to all csv errors

---
 .../operator/csv_scanner/util/csv_error.cpp   | 87 ++++++++++---------
 .../operator/csv_scanner/csv_error.hpp        | 10 ++-
 2 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index 7c7260c5d2e4..dbe43899e6e2 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -18,10 +18,10 @@ void CSVErrorHandler::ThrowError(CSVError csv_error) {
 	if (PrintLineNumber(csv_error)) {
 		error << "CSV Error on Line: " << GetLine(csv_error.error_info) << '\n';
 	}
-	if (csv_error.error_message_with_options.empty()) {
+	if (csv_error.full_error_message.empty()) {
 		error << csv_error.error_message;
 	} else {
-		error << csv_error.error_message_with_options;
+		error << csv_error.full_error_message;
 	}
 	switch (csv_error.type) {
 	case CSVErrorType::CAST_ERROR:
@@ -82,15 +82,16 @@ CSVError::CSVError(string error_message_p, CSVErrorType type_p, LinesPerBoundary
 
 CSVError::CSVError(string error_message_p, CSVErrorType type_p, idx_t column_idx_p, string csv_row_p,
                    LinesPerBoundary error_info_p, idx_t row_byte_position, int64_t byte_position_p,
-                   const CSVReaderOptions &reader_options)
+                   const CSVReaderOptions &reader_options, const string &fixes)
     : error_message(std::move(error_message_p)), type(type_p), column_idx(column_idx_p), csv_row(std::move(csv_row_p)),
       error_info(error_info_p), row_byte_position(row_byte_position), byte_position(byte_position_p) {
 	// What were the options
 	std::ostringstream error;
-	error << error_message << std::endl;
+	error << error_message << '\n';
+	error << fixes << '\n';
 	error << reader_options.ToString();
-	error << std::endl;
-	error_message_with_options = error.str();
+	error << '\n';
+	full_error_message = error.str();
 }
 
 CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names) {
@@ -118,28 +119,30 @@ CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_nam
                              int64_t byte_position, LogicalTypeId type) {
 	std::ostringstream error;
 	// Which column
-	error << "Error when converting column \"" << column_name << "\"." << '\n';
+	error << "Error when converting column \"" << column_name << "\". ";
 	// What was the cast error
 	error << cast_error << '\n';
-
-	error << "Column " << column_name << " is being converted as type " << LogicalTypeIdToString(type) << '\n';
+	std::ostringstream how_to_fix_it;
+	how_to_fix_it << "Column " << column_name << " is being converted as type " << LogicalTypeIdToString(type) << '\n';
 	if (!options.WasTypeManuallySet(column_idx)) {
-		error << "This type was auto-detected from the CSV file." << '\n';
-		error << "Possible solutions:" << '\n';
-		error << "* Override the type for this column manually by setting the type explicitly, e.g. types={'"
-		      << column_name << "': 'VARCHAR'}" << '\n';
-		error << "* Set the sample size to a larger value to enable the auto-detection to scan more values, e.g. "
-		         "sample_size=-1"
-		      << '\n';
-		error << "* Use a COPY statement to automatically derive types from an existing table." << '\n';
+		how_to_fix_it << "This type was auto-detected from the CSV file." << '\n';
+		how_to_fix_it << "Possible solutions:" << '\n';
+		how_to_fix_it << "* Override the type for this column manually by setting the type explicitly, e.g. types={'"
+		              << column_name << "': 'VARCHAR'}" << '\n';
+		how_to_fix_it
+		    << "* Set the sample size to a larger value to enable the auto-detection to scan more values, e.g. "
+		       "sample_size=-1"
+		    << '\n';
+		how_to_fix_it << "* Use a COPY statement to automatically derive types from an existing table." << '\n';
 	} else {
-		error << "This type was either manually set or derived from an existing table. Select a different type to "
-		         "correctly parse this column."
-		      << '\n';
+		how_to_fix_it
+		    << "This type was either manually set or derived from an existing table. Select a different type to "
+		       "correctly parse this column."
+		    << '\n';
 	}
 
 	return CSVError(error.str(), CSVErrorType::CAST_ERROR, column_idx, csv_row, error_info, row_byte_position,
-	                byte_position, options);
+	                byte_position, options, how_to_fix_it.str());
 }
 
 CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_size, LinesPerBoundary error_info,
@@ -147,8 +150,13 @@ CSVError CSVError::LineSizeError(const CSVReaderOptions &options, idx_t actual_s
 	std::ostringstream error;
 	error << "Maximum line size of " << options.maximum_line_size << " bytes exceeded. ";
 	error << "Actual Size:" << actual_size << " bytes." << '\n';
+
+	std::ostringstream how_to_fix_it;
+	how_to_fix_it << "Possible Solution: Change the maximum length size, e.g., max_line_size=" << actual_size + 1
+	              << "\n";
+
 	return CSVError(error.str(), CSVErrorType::MAXIMUM_LINE_SIZE, 0, csv_row, error_info, byte_position, byte_position,
-	                options);
+	                options, how_to_fix_it.str());
 }
 
 CSVError CSVError::SniffingError(string &file_path) {
@@ -175,37 +183,34 @@ CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_
 	std::ostringstream error;
 	error << "Value with unterminated quote found." << '\n';
 	error << '\n';
+	std::ostringstream how_to_fix_it;
+	how_to_fix_it << "Possible Solution: Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
 	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info,
-	                row_byte_position, byte_position, options);
+	                row_byte_position, byte_position, options, how_to_fix_it.str());
 }
 
 CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, idx_t actual_columns,
                                               LinesPerBoundary error_info, string &csv_row, idx_t row_byte_position,
                                               int64_t byte_position) {
 	std::ostringstream error;
-
+	// We don't have a fix for this
+	std::ostringstream how_to_fix_it;
+	how_to_fix_it << "Possible fixes:" << '\n';
+	if (!options.null_padding) {
+		how_to_fix_it << "* Enable null padding (null_padding=true) to replace missing values with NULL" << '\n';
+	}
+	if (!options.ignore_errors.GetValue()) {
+		how_to_fix_it << "* Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
+	}
 	// How many columns were expected and how many were found
 	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns + 1;
 	if (actual_columns >= options.dialect_options.num_cols) {
 		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info,
-		                row_byte_position, byte_position, options);
+		                row_byte_position, byte_position, options, how_to_fix_it.str());
 	} else {
 		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info,
-		                row_byte_position, byte_position, options);
+		                row_byte_position, byte_position, options, how_to_fix_it.str());
 	}
-
-	//	// How many columns were expected and how many were found
-	//	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns <<
-	//'\n'; 	error << '\n' << "Possible fixes:" << '\n'; 	if (!options.null_padding) { 		error << "* Enable null padding
-	//(null_padding=true) to replace missing values with NULL" << '\n';
-	//	}
-	//	if (!options.ignore_errors) {
-	//		error << "* Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
-	//	}
-	//	error << '\n';
-	//	// What were the options
-	//	error << options.ToString();
-	//	return CSVError(error.str(), CSVErrorType::INCORRECT_COLUMN_AMOUNT, error_info);
 }
 
 CSVError CSVError::InvalidUTF8(const CSVReaderOptions &options, idx_t current_column, LinesPerBoundary error_info,
@@ -213,8 +218,10 @@ CSVError CSVError::InvalidUTF8(const CSVReaderOptions &options, idx_t current_co
 	std::ostringstream error;
 	// How many columns were expected and how many were found
 	error << "Invalid unicode (byte sequence mismatch) detected." << '\n';
+	std::ostringstream how_to_fix_it;
+	how_to_fix_it << "Possible Solution: Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
 	return CSVError(error.str(), CSVErrorType::INVALID_UNICODE, current_column, csv_row, error_info, row_byte_position,
-	                byte_position, options);
+	                byte_position, options, how_to_fix_it.str());
 }
 
 bool CSVErrorHandler::PrintLineNumber(CSVError &error) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index 1ffc21423344..c0f556ffba32 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -52,7 +52,8 @@ class CSVError {
 public:
 	CSVError() {};
 	CSVError(string error_message, CSVErrorType type, idx_t column_idx, string csv_row, LinesPerBoundary error_info,
-	         idx_t row_byte_position, int64_t byte_position, const CSVReaderOptions &reader_options);
+	         idx_t row_byte_position, int64_t byte_position, const CSVReaderOptions &reader_options,
+	         const string &fixes);
 	CSVError(string error_message, CSVErrorType type, LinesPerBoundary error_info);
 	//! Produces error messages for column name -> type mismatch.
 	static CSVError ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names);
@@ -84,8 +85,11 @@ class CSVError {
 
 	//! Actual error message
 	string error_message;
-	//! Actual error message
-	string error_message_with_options;
+	//! Full error message used in throws
+	//! 1. The Actual error
+	//! 2. How to fix it
+	//! 3. Options that generated the error
+	string full_error_message;
 	//! Error Type
 	CSVErrorType type;
 	//! Column Index where error happened

From 0c7008b3c0d74817b0bda3c2354ed082a546202f Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 3 Apr 2024 11:30:36 +0200
Subject: [PATCH 062/147] Fix small issue with newline nullpadding in parallel

---
 .../csv_scanner/scanner/string_value_scanner.cpp      | 11 ++++++-----
 src/execution/operator/csv_scanner/util/csv_error.cpp |  1 -
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 448dc8382c4d..07bf849e4a9c 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -405,9 +405,8 @@ void StringValueResult::QuotedNewLine(StringValueResult &result) {
 
 void StringValueResult::NullPaddingQuotedNewlineCheck() {
 	// We do some checks for null_padding correctness
-	if (state_machine.options.null_padding && iterator.IsBoundarySet() && quoted_new_line && iterator.done) {
-		// If we have null_padding set, we found a quoted new line, we are scanning the file in parallel, and it's the
-		// last row of this thread.
+	if (state_machine.options.null_padding && iterator.IsBoundarySet() && quoted_new_line) {
+		// If we have null_padding set, we found a quoted new line, we are scanning the file in parallel; We error.
 		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 		auto csv_error = CSVError::NullPaddingFail(state_machine.options, lines_per_batch);
 		error_handler.Error(csv_error);
@@ -1035,14 +1034,16 @@ bool StringValueScanner::MoveToNextBuffer() {
 				// And an extra empty value to represent what comes after the delimiter
 				result.AddRow(result, previous_buffer_handle->actual_size);
 				lines_read++;
-			} else if (states.IsQuotedCurrent()) {
+			}
+			else if (states.IsQuotedCurrent()) {
 				// Unterminated quote
 				LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos,
 				                                   result.buffer_size};
 				result.current_line_position.begin = result.current_line_position.end;
 				result.current_line_position.end = current_line_start;
 				result.InvalidState(result);
-			} else {
+			}
+			else {
 				result.AddRow(result, previous_buffer_handle->actual_size);
 				lines_read++;
 			}
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index dbe43899e6e2..d8bafa91e72d 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -182,7 +182,6 @@ CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_
                                            int64_t byte_position) {
 	std::ostringstream error;
 	error << "Value with unterminated quote found." << '\n';
-	error << '\n';
 	std::ostringstream how_to_fix_it;
 	how_to_fix_it << "Possible Solution: Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
 	return CSVError(error.str(), CSVErrorType::UNTERMINATED_QUOTES, current_column, csv_row, error_info,

From 8f15bb7a5ac3e21ebeca91eb12aafe4717f945a8 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 3 Apr 2024 13:42:33 +0200
Subject: [PATCH 063/147] Simplify error message stored

---
 .../operator/csv_scanner/util/csv_error.cpp   |  9 ++++++
 .../operator/csv_scanner/csv_error.hpp        |  3 ++
 .../csv/rejects/csv_rejects_flush_cast.test   | 30 +++++--------------
 3 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index d8bafa91e72d..d9119a4dd9d3 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -1,5 +1,7 @@
 #include "duckdb/execution/operator/csv_scanner/csv_error.hpp"
 #include "duckdb/common/exception/conversion_exception.hpp"
+#include "duckdb/common/string_util.hpp"
+
 #include <sstream>
 
 namespace duckdb {
@@ -87,6 +89,9 @@ CSVError::CSVError(string error_message_p, CSVErrorType type_p, idx_t column_idx
       error_info(error_info_p), row_byte_position(row_byte_position), byte_position(byte_position_p) {
 	// What were the options
 	std::ostringstream error;
+	if (reader_options.ignore_errors.GetValue()){
+		RemoveNewLine(error_message);
+	}
 	error << error_message << '\n';
 	error << fixes << '\n';
 	error << reader_options.ToString();
@@ -114,6 +119,10 @@ CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_
 	return CSVError(exception, CSVErrorType::COLUMN_NAME_TYPE_MISMATCH, {});
 }
 
+void CSVError::RemoveNewLine(string &error){
+	error = StringUtil::Split(error, "\n")[0];
+}
+
 CSVError CSVError::CastError(const CSVReaderOptions &options, string &column_name, string &cast_error, idx_t column_idx,
                              string &csv_row, LinesPerBoundary error_info, idx_t row_byte_position,
                              int64_t byte_position, LogicalTypeId type) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
index c0f556ffba32..340c42cd1c40 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp
@@ -83,6 +83,9 @@ class CSVError {
 		return error_info.boundary_idx;
 	}
 
+	//! We might want to remove newline in errors if we are doing them for the rejects tables
+	void RemoveNewLine(string &error);
+
 	//! Actual error message
 	string error_message;
 	//! Full error message used in throws
diff --git a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
index 69530026555e..7af3c29f373d 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
@@ -11,32 +11,16 @@ query III
 SELECT typeof(first(a)), typeof(first(b)), COUNT(*) FROM read_csv(
     'data/csv/error/flush_cast.csv',
     columns = {'a': 'DATE', 'b': 'VARCHAR'},
-    rejects_table='csv_rejects_table',
+    store_rejects = true,
     delim = ',',
-    dateformat = '%d-%m-%Y',
-    ignore_errors=true);
+    dateformat = '%d-%m-%Y');
 ----
 DATE	VARCHAR	2811
 
-
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table order by all;
-----
-data/csv/error/flush_cast.csv	2813	1	"a"	CAST	c, bla	44971
-data/csv/error/flush_cast.csv	439	1	"a"	CAST	B, bla	6996
-
-query I
-SELECT error_message
-FROM csv_rejects_table where byte_position = 6996;
-----
-<REGEX>:.*Could not parse string "B" according to format specifier "%d-%m-%Y".*
-
-query I
-SELECT error_message
-FROM csv_rejects_table where byte_position = 44971;
+query IIIIIIIIII
+SELECT *
+FROM reject_errors order by all;
 ----
-<REGEX>:.*Could not parse string "c" according to format specifier "%d-%m-%Y".*
+3	0	439	6997	NULL	1	a	CAST	B, bla	Error when converting column "a". Could not parse string "B" according to format specifier "%d-%m-%Y"
+3	0	2813	44972	NULL	1	a	CAST	c, bla	Error when converting column "a". Could not parse string "c" according to format specifier "%d-%m-%Y"
 
-statement ok
-DROP TABLE csv_rejects_table;
\ No newline at end of file

From 00efd83289e8d51a14f171fd07b3815c1f116aa0 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 3 Apr 2024 15:06:04 +0200
Subject: [PATCH 064/147] Doing some extra checks to give the correct
 byte-position where errors happen

---
 .../scanner/string_value_scanner.cpp          | 102 +++++++++++++-----
 .../table_function/global_csv_state.cpp       |   2 +-
 .../operator/csv_scanner/util/csv_error.cpp   |   4 +-
 .../csv_scanner/string_value_scanner.hpp      |   7 +-
 .../csv/rejects/csv_buffer_size_rejects.test  |  46 +++-----
 5 files changed, 98 insertions(+), 63 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 07bf849e4a9c..4a00f957e300 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -339,10 +339,17 @@ void StringValueResult::HandleUnicodeError(idx_t col_idx, LinePosition &error_po
 	Utf8Proc::MakeValid(&char_array[0], char_array.size());
 	borked_line = {char_array.begin(), char_array.end() - 1};
 	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
-	auto csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
-	                                       current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
-	                                       error_position.GetGlobalPosition(requested_size, first_nl));
-	error_handler.Error(csv_error, true);
+	if (current_line_position.begin == error_position) {
+		auto csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
+		                                       current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+		                                       error_position.GetGlobalPosition(requested_size, first_nl));
+		error_handler.Error(csv_error, true);
+	} else {
+		auto csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
+		                                       current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+		                                       error_position.GetGlobalPosition(requested_size));
+		error_handler.Error(csv_error, true);
+	}
 }
 
 bool StringValueResult::HandleError() {
@@ -357,10 +364,17 @@ bool StringValueResult::HandleError() {
 
 		switch (cur_error.type) {
 		case CSVErrorType::TOO_MANY_COLUMNS:
-			csv_error = CSVError::IncorrectColumnAmountError(
-			    state_machine.options, col_idx, lines_per_batch, borked_line,
-			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
-			    line_pos.GetGlobalPosition(requested_size, first_nl));
+			if (current_line_position.begin == line_pos) {
+				csv_error = CSVError::IncorrectColumnAmountError(
+				    state_machine.options, col_idx, lines_per_batch, borked_line,
+				    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				    line_pos.GetGlobalPosition(requested_size, first_nl));
+			} else {
+				csv_error = CSVError::IncorrectColumnAmountError(
+				    state_machine.options, col_idx, lines_per_batch, borked_line,
+				    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				    line_pos.GetGlobalPosition(requested_size));
+			}
 			break;
 		case CSVErrorType::INVALID_UNICODE: {
 			// We have to sanitize the CSV line
@@ -368,22 +382,47 @@ bool StringValueResult::HandleError() {
 			char_array.push_back('\0'); // Null-terminate the character array
 			Utf8Proc::MakeValid(&char_array[0], char_array.size());
 			borked_line = {char_array.begin(), char_array.end() - 1};
-			csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
-			                                  current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
-			                                  line_pos.GetGlobalPosition(requested_size, first_nl));
+			if (current_line_position.begin == line_pos) {
+				csv_error =
+				    CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
+				                          current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				                          line_pos.GetGlobalPosition(requested_size, first_nl));
+			} else {
+				csv_error =
+				    CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
+				                          current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				                          line_pos.GetGlobalPosition(requested_size));
+			}
 			break;
 		}
 		case CSVErrorType::UNTERMINATED_QUOTES:
-			csv_error = CSVError::UnterminatedQuotesError(
-			    state_machine.options, col_idx, lines_per_batch, borked_line,
-			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
-			    line_pos.GetGlobalPosition(requested_size, first_nl));
+			if (current_line_position.begin == line_pos) {
+				csv_error = CSVError::UnterminatedQuotesError(
+				    state_machine.options, col_idx, lines_per_batch, borked_line,
+				    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				    line_pos.GetGlobalPosition(requested_size, first_nl));
+			} else {
+				csv_error = CSVError::UnterminatedQuotesError(
+				    state_machine.options, col_idx, lines_per_batch, borked_line,
+				    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				    line_pos.GetGlobalPosition(requested_size));
+			}
 			break;
 		case CSVErrorType::CAST_ERROR:
-			csv_error = CSVError::CastError(
-			    state_machine.options, names[cur_error.col_idx], cur_error.error_message, cur_error.col_idx,
-			    borked_line, lines_per_batch, current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
-			    line_pos.GetGlobalPosition(requested_size, first_nl), parse_types[cur_error.col_idx].first);
+			if (current_line_position.begin == line_pos) {
+				csv_error = CSVError::CastError(
+				    state_machine.options, names[cur_error.col_idx], cur_error.error_message, cur_error.col_idx,
+				    borked_line, lines_per_batch,
+				    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				    line_pos.GetGlobalPosition(requested_size, first_nl), parse_types[cur_error.col_idx].first);
+			} else {
+				csv_error = CSVError::CastError(
+				    state_machine.options, names[cur_error.col_idx], cur_error.error_message, cur_error.col_idx,
+				    borked_line, lines_per_batch,
+				    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				    line_pos.GetGlobalPosition(requested_size), parse_types[cur_error.col_idx].first);
+			}
+
 			break;
 		default:
 			throw InvalidInputException("CSV Error not allowed when inserting row");
@@ -499,11 +538,20 @@ bool StringValueResult::AddRowInternal() {
 			bool first_nl;
 			auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
 			LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
-			auto csv_error = CSVError::IncorrectColumnAmountError(
-			    state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
-			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
-			    last_position.GetGlobalPosition(requested_size, first_nl));
-			error_handler.Error(csv_error);
+			if (current_line_position.begin == last_position) {
+				auto csv_error = CSVError::IncorrectColumnAmountError(
+				    state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
+				    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				    last_position.GetGlobalPosition(requested_size, first_nl));
+				error_handler.Error(csv_error);
+			} else {
+				auto csv_error = CSVError::IncorrectColumnAmountError(
+				    state_machine.options, cur_col_id - 1, lines_per_batch, borked_line,
+				    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
+				    last_position.GetGlobalPosition(requested_size));
+				error_handler.Error(csv_error);
+			}
+
 			// If we are here we ignore_errors, so we delete this line
 			number_of_rows--;
 		}
@@ -1034,16 +1082,14 @@ bool StringValueScanner::MoveToNextBuffer() {
 				// And an extra empty value to represent what comes after the delimiter
 				result.AddRow(result, previous_buffer_handle->actual_size);
 				lines_read++;
-			}
-			else if (states.IsQuotedCurrent()) {
+			} else if (states.IsQuotedCurrent()) {
 				// Unterminated quote
 				LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos,
 				                                   result.buffer_size};
 				result.current_line_position.begin = result.current_line_position.end;
 				result.current_line_position.end = current_line_start;
 				result.InvalidState(result);
-			}
-			else {
+			} else {
 				result.AddRow(result, previous_buffer_handle->actual_size);
 				lines_read++;
 			}
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 3ad83d0954f7..b16c2d48df79 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -279,7 +279,7 @@ void CSVGlobalState::FillRejectsTable() {
 							// a null
 							errors_appender.Append(Value());
 						} else {
-							errors_appender.Append(error.byte_position);
+							errors_appender.Append(error.byte_position + 1);
 						}
 						// 6. Column Index
 						errors_appender.Append(col_idx + 1);
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index d9119a4dd9d3..18a37fa3f2ff 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -89,7 +89,7 @@ CSVError::CSVError(string error_message_p, CSVErrorType type_p, idx_t column_idx
       error_info(error_info_p), row_byte_position(row_byte_position), byte_position(byte_position_p) {
 	// What were the options
 	std::ostringstream error;
-	if (reader_options.ignore_errors.GetValue()){
+	if (reader_options.ignore_errors.GetValue()) {
 		RemoveNewLine(error_message);
 	}
 	error << error_message << '\n';
@@ -119,7 +119,7 @@ CSVError CSVError::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_
 	return CSVError(exception, CSVErrorType::COLUMN_NAME_TYPE_MISMATCH, {});
 }
 
-void CSVError::RemoveNewLine(string &error){
+void CSVError::RemoveNewLine(string &error) {
 	error = StringUtil::Split(error, "\n")[0];
 }
 
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 6332906f03ca..0039f9ade5b0 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -42,7 +42,12 @@ class LinePosition {
 		}
 		return other.buffer_size - other.buffer_pos + buffer_pos;
 	}
-	idx_t GetGlobalPosition(idx_t requested_buffer_size, bool first_char_nl) {
+
+	bool operator==(const LinePosition &other) const {
+		return buffer_pos == other.buffer_pos && buffer_idx == other.buffer_idx && buffer_size == other.buffer_size;
+	}
+
+	idx_t GetGlobalPosition(idx_t requested_buffer_size, bool first_char_nl = false) {
 		return requested_buffer_size * buffer_idx + buffer_pos + first_char_nl;
 	}
 	idx_t buffer_pos = 0;
diff --git a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
index 76b95cfbe731..35f44da755a0 100644
--- a/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_buffer_size_rejects.test
@@ -7,7 +7,7 @@ require skip_reload
 # Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
 require notwindows
 
-loop buffer_size 5 10
+loop buffer_size 5 8
 
 # Ensure that we can get the schema if we reduce the sample size and ignore errors
 query IIIII
@@ -15,45 +15,29 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
     'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
     sample_size=1,
     buffer_size=${buffer_size},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true);
+    store_rejects = true);
 ----
 BIGINT	VARCHAR	11044	11044	2
 
-query IIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id, user_arguments) FROM reject_scans order by all;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	1	"column0"	CAST	B, A	10875
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	1	"column0"	CAST	C, A	20875
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	1	"column0"	CAST	B, A	18395
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	1	"column0"	CAST	C, A	28395
-
-query I
-SELECT error_message
-FROM csv_rejects_table where byte_position = 10875;
-----
-<REGEX>:.*Could not convert string "B" to 'BIGINT'.*
+0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL
+1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL
 
-query I
-SELECT error_message
-FROM csv_rejects_table where byte_position = 20875;
-----
-<REGEX>:.*Could not convert string "C" to 'BIGINT'.*
 
-query I
-SELECT error_message
-FROM csv_rejects_table where  byte_position = 18395;
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-<REGEX>:.*Could not convert string "B" to 'BIGINT'.*
+0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
-query I
-SELECT error_message
-FROM csv_rejects_table where byte_position = 28395;
-----
-<REGEX>:.*Could not convert string "C" to 'BIGINT'.*
+statement ok
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_scans;
 
 endloop
\ No newline at end of file

From eca3caf1404d6aa75d8e3beff9ed212594373646 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 3 Apr 2024 15:42:30 +0200
Subject: [PATCH 065/147] Incorrect Column Amount

---
 .../scanner/string_value_scanner.cpp          |   1 +
 .../operator/csv_scanner/util/csv_error.cpp   |   4 +-
 .../csv_incorrect_columns_amount_rejects.test | 125 ++++++++++--------
 3 files changed, 71 insertions(+), 59 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 4a00f957e300..d3e980a57392 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -121,6 +121,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 			// We error pointing to the current value error.
 			current_errors.push_back({CSVErrorType::TOO_MANY_COLUMNS, cur_col_id, last_position});
 		}
+		cur_col_id++;
 		return;
 	}
 
diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index 18a37fa3f2ff..da9f5d5e2435 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -214,10 +214,10 @@ CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, i
 	error << "Expected Number of Columns: " << options.dialect_options.num_cols << " Found: " << actual_columns + 1;
 	if (actual_columns >= options.dialect_options.num_cols) {
 		return CSVError(error.str(), CSVErrorType::TOO_MANY_COLUMNS, actual_columns, csv_row, error_info,
-		                row_byte_position, byte_position, options, how_to_fix_it.str());
+		                row_byte_position, byte_position - 1, options, how_to_fix_it.str());
 	} else {
 		return CSVError(error.str(), CSVErrorType::TOO_FEW_COLUMNS, actual_columns, csv_row, error_info,
-		                row_byte_position, byte_position, options, how_to_fix_it.str());
+		                row_byte_position, byte_position - 1, options, how_to_fix_it.str());
 	}
 }
 
diff --git a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
index 070b413a8497..2b59e17547d3 100644
--- a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
@@ -11,104 +11,115 @@ statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/incorrect_columns/few_columns.csv',
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1);
+    store_rejects=true, auto_detect=false, header = 1);
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII rowsort
+FROM reject_errors order by all;
 ----
-data/csv/rejects/incorrect_columns/few_columns.csv	1814	3	"d"	MISSING COLUMNS	1,2,3	14504
-data/csv/rejects/incorrect_columns/few_columns.csv	1823	1	"b"	MISSING COLUMNS	1	14574
-data/csv/rejects/incorrect_columns/few_columns.csv	2378	1	"b"	MISSING COLUMNS	1	19008
-data/csv/rejects/incorrect_columns/few_columns.csv	2762	2	"c"	MISSING COLUMNS	1,2	22074
+3	0	1814	14505	14510	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3
+3	0	1823	14575	14576	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+3	0	2378	19009	19010	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+3	0	2762	22075	22078	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
 
 statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/incorrect_columns/many_columns.csv',
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1);
+    store_rejects=true, auto_detect=false, header = 1);
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII rowsort
+FROM reject_errors order by all;
 ----
-data/csv/rejects/incorrect_columns/many_columns.csv	1096	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	8760
-data/csv/rejects/incorrect_columns/many_columns.csv	1159	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	9268
-data/csv/rejects/incorrect_columns/many_columns.csv	1206	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	9648
-data/csv/rejects/incorrect_columns/many_columns.csv	2769	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	22154
+7	0	1096	8761	8768	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+7	0	1096	8761	8770	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+7	0	1159	9269	9276	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+7	0	1159	9269	9278	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+7	0	1206	9649	9656	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+7	0	2769	22155	22162	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
 
 statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/incorrect_columns/mix_columns.csv',
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1);
+    store_rejects=true, auto_detect=false, header = 1);
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII rowsort
+FROM reject_errors order by all;
 ----
-data/csv/rejects/incorrect_columns/mix_columns.csv	1604	1	"b"	MISSING COLUMNS	1	12824
-data/csv/rejects/incorrect_columns/mix_columns.csv	1671	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	13354
-data/csv/rejects/incorrect_columns/mix_columns.csv	2751	2	"c"	MISSING COLUMNS	1,2	21998
-data/csv/rejects/incorrect_columns/mix_columns.csv	2768	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	22130
+11	0	1604	12825	12826	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+11	0	1671	13355	13362	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+11	0	1671	13355	13364	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+11	0	2751	21999	22002	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
+11	0	2768	22131	22138	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+11	0	2768	22131	22140	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+
 
 # Different Buffer Sizes
 loop buffer_size 10 15
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
 
 statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/incorrect_columns/small_mix.csv',
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1);
+    store_rejects=true, auto_detect=false, header = 1);
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all
 ----
-data/csv/rejects/incorrect_columns/small_mix.csv	3	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	16
-data/csv/rejects/incorrect_columns/small_mix.csv	4	3	"d"	MISSING COLUMNS	1,2,3	26
+0	3	17	24	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+0	4	27	32	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3
 
 endloop
 
 # All files
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
 
 statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/incorrect_columns/*.csv',
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1);
+   store_rejects=true, auto_detect=false, header = 1);
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII rowsort
+FROM reject_errors order by all
 ----
-data/csv/rejects/incorrect_columns/few_columns.csv	1814	3	"d"	MISSING COLUMNS	1,2,3	14504
-data/csv/rejects/incorrect_columns/few_columns.csv	1823	1	"b"	MISSING COLUMNS	1	14574
-data/csv/rejects/incorrect_columns/few_columns.csv	2378	1	"b"	MISSING COLUMNS	1	19008
-data/csv/rejects/incorrect_columns/few_columns.csv	2762	2	"c"	MISSING COLUMNS	1,2	22074
-data/csv/rejects/incorrect_columns/many_columns.csv	1096	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	8760
-data/csv/rejects/incorrect_columns/many_columns.csv	1159	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	9268
-data/csv/rejects/incorrect_columns/many_columns.csv	1206	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	9648
-data/csv/rejects/incorrect_columns/many_columns.csv	2769	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	22154
-data/csv/rejects/incorrect_columns/mix_columns.csv	1604	1	"b"	MISSING COLUMNS	1	12824
-data/csv/rejects/incorrect_columns/mix_columns.csv	1671	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	13354
-data/csv/rejects/incorrect_columns/mix_columns.csv	2751	2	"c"	MISSING COLUMNS	1,2	21998
-data/csv/rejects/incorrect_columns/mix_columns.csv	2768	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	22130
-data/csv/rejects/incorrect_columns/small_mix.csv	3	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	16
-data/csv/rejects/incorrect_columns/small_mix.csv	4	3	"d"	MISSING COLUMNS	1,2,3	26
+35	0	1814	14505	14510	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3
+35	0	1823	14575	14576	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+35	0	2378	19009	19010	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+35	0	2762	22075	22078	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
+35	1	1096	8761	8768	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+35	1	1096	8761	8770	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+35	1	1159	9269	9276	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+35	1	1159	9269	9278	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+35	1	1206	9649	9656	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+35	1	2769	22155	22162	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+35	2	1604	12825	12826	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+35	2	1671	13355	13362	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+35	2	1671	13355	13364	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+35	2	2751	21999	22002	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
+35	2	2768	22131	22138	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+35	2	2768	22131	22140	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+35	3	3	17	24	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+35	3	4	27	32	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3

From 50d658c8cde7b5208a1b56feedd0ff3d9a4759dd Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 3 Apr 2024 16:43:43 +0200
Subject: [PATCH 066/147] Don't really care about copy from yet

---
 .../copy/csv/rejects/csv_rejects_auto.test    | 123 +++---------------
 1 file changed, 15 insertions(+), 108 deletions(-)

diff --git a/test/sql/copy/csv/rejects/csv_rejects_auto.test b/test/sql/copy/csv/rejects/csv_rejects_auto.test
index bfa8073a6567..e673e9917287 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_auto.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_auto.test
@@ -11,89 +11,44 @@ query IIIII
 SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
     'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
     sample_size=1,
-    rejects_table='csv_rejects_table',
-    ignore_errors=true);
+    store_rejects=true);
 ----
 BIGINT	VARCHAR	11044	11044	2
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII rowsort
+FROM reject_errors order by all;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	1	"column0"	CAST	B, A	10875
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	1	"column0"	CAST	C, A	20875
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	1	"column0"	CAST	B, A	18395
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	1	"column0"	CAST	C, A	28395
+3	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+3	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
-query I
-SELECT error_message
-FROM csv_rejects_table where line=2176 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "B" to 'BIGINT'.*
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=4176 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "C" to 'BIGINT'.*
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=3680 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "B" to 'BIGINT'.*
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=5680 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "C" to 'BIGINT'.*
+statement ok
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_scans;
 
 # Test with lots of errors 
 query I
 SELECT SUM(num) FROM read_csv_auto(
 	'test/sql/copy/csv/data/error/mismatch/half1.csv',
 	header=true,
-	ignore_errors=true,
 	sample_size=1,
-	rejects_table='csv_rejects_table')
-----
-2464
-
-query I
-SELECT COUNT(*) FROM csv_rejects_table;
-----
-1024
-
-statement ok
-DROP TABLE csv_rejects_table;
-
-# Test same with COPY
-statement ok
-CREATE TABLE tbl1 (col1 BIGINT, col2 VARCHAR);
-
-statement ok
-COPY tbl1 FROM 'test/sql/copy/csv/data/error/mismatch/half1.csv'
-WITH (HEADER, IGNORE_ERRORS TRUE, SAMPLE_SIZE 1000, REJECTS_TABLE 'csv_rejects_table');
-
-query I
-SELECT SUM(col1) FROM tbl1;
+	store_rejects=true)
 ----
 2464
 
 query I
-SELECT COUNT(*) FROM csv_rejects_table;
+SELECT COUNT(*) FROM reject_errors;
 ----
 1024
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE tbl1;
+DROP TABLE reject_scans;
 
 # Test with more errors than STANDARD_VECTOR_SIZE
 query I
@@ -112,52 +67,4 @@ SELECT COUNT(*) FROM csv_rejects_table;
 3072
 
 statement ok
-DROP TABLE csv_rejects_table;
-
-statement ok
-CREATE TABLE tbl1 (col1 BIGINT, col2 VARCHAR);
-
-statement ok
-COPY tbl1 FROM 'test/sql/copy/csv/data/error/mismatch/half2.csv'
-WITH (HEADER, IGNORE_ERRORS TRUE,  SAMPLE_SIZE 1000, REJECTS_TABLE 'csv_rejects_table');
-
-query I
-SELECT SUM(col1) FROM tbl1;
-----
-2542
-
-query I
-SELECT COUNT(*) FROM csv_rejects_table;
-----
-3072
-
-statement ok
-DROP TABLE csv_rejects_table;
-
-statement ok
-DROP TABLE tbl1;
-
-# Test with more errors than STANDARD_VECTOR_SIZE and limit
-statement ok
-CREATE TABLE tbl1 (col1 BIGINT, col2 VARCHAR);
-
-statement ok
-COPY tbl1 FROM 'test/sql/copy/csv/data/error/mismatch/half2.csv'
-WITH (HEADER, IGNORE_ERRORS TRUE, SAMPLE_SIZE 1000, REJECTS_TABLE 'csv_rejects_table', REJECTS_LIMIT 1337);
-
-query I
-SELECT SUM(col1) FROM tbl1;
-----
-2542
-
-query I
-SELECT COUNT(*) FROM csv_rejects_table;
-----
-1337
-
-statement ok
-DROP TABLE csv_rejects_table;
-
-statement ok
-DROP TABLE tbl1;
-
+DROP TABLE csv_rejects_table;
\ No newline at end of file

From 8b97a738ab147e76ea5d927f583b46d684019ddc Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 3 Apr 2024 16:55:51 +0200
Subject: [PATCH 067/147] More test fixes

---
 .../csv/rejects/csv_rejects_flush_cast.test   |  1 -
 .../csv/rejects/csv_rejects_maximum_line.test | 71 ++++++++++---------
 2 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
index 7af3c29f373d..e6459aa5cd77 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
@@ -18,7 +18,6 @@ SELECT typeof(first(a)), typeof(first(b)), COUNT(*) FROM read_csv(
 DATE	VARCHAR	2811
 
 query IIIIIIIIII
-SELECT *
 FROM reject_errors order by all;
 ----
 3	0	439	6997	NULL	1	a	CAST	B, bla	Error when converting column "a". Could not parse string "B" according to format specifier "%d-%m-%Y"
diff --git a/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
index f6214aab0906..1095a90d70f8 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
@@ -11,17 +11,18 @@ statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/maximum_line/max_10.csv',
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1, max_line_size=10);
+    store_rejects=true, auto_detect=false, header = 1, max_line_size=10);
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII
+FROM reject_errors order by all;
 ----
-data/csv/rejects/maximum_line/max_10.csv	5	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
+3	0	5	23	23	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
 
 # Test with buffer sizes
 
@@ -31,17 +32,18 @@ statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/maximum_line/max_10.csv',
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1, max_line_size=10, buffer_size=${buffer_size});
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=10, buffer_size=${buffer_size});
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-data/csv/rejects/maximum_line/max_10.csv	5	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
+0	5	23	23	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
 
 endloop
 
@@ -50,37 +52,38 @@ statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/maximum_line/over_vector.csv',
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1, max_line_size=20);
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=20);
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII
+FROM reject_errors order by all;
 ----
-data/csv/rejects/maximum_line/over_vector.csv	2282	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	13684
-data/csv/rejects/maximum_line/over_vector.csv	2591	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	15557
-data/csv/rejects/maximum_line/over_vector.csv	2923	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	17568
+27	0	2282	13685	13685	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
+27	0	2591	15558	15558	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
+27	0	2923	17569	17569	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
 
-# Read Multiple Files
+statement ok
+DROP TABLE reject_scans;
 
+# Read Multiple Files
 statement ok
 SELECT * FROM read_csv(
     'data/csv/rejects/maximum_line/*.csv',
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1, max_line_size=10);
+     store_rejects = true, auto_detect=false, header = 1, max_line_size=10);
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII
+FROM reject_errors order by all;
 ----
-data/csv/rejects/maximum_line/max_10.csv	5	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	22
-data/csv/rejects/maximum_line/over_vector.csv	2282	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	13684
-data/csv/rejects/maximum_line/over_vector.csv	2591	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	15557
-data/csv/rejects/maximum_line/over_vector.csv	2923	1	"a"	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	17568
+31	0	5	23	23	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
+31	1	2282	13685	13685	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
+31	1	2591	15558	15558	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
+31	1	2923	17569	17569	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
+
+statement ok
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table;
\ No newline at end of file
+DROP TABLE reject_scans;
\ No newline at end of file

From 3db603c364c44af68fcdcfe97f60d1544072ea4c Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 12:08:05 +0200
Subject: [PATCH 068/147] Fixing rejects_read to new model, properly increment
 scan_id

---
 .../scanner/string_value_scanner.cpp          |   3 +-
 .../table_function/global_csv_state.cpp       |   3 +-
 .../transaction/transaction_context.hpp       |   2 +
 src/transaction/transaction_context.cpp       |   6 +
 .../copy/csv/rejects/csv_rejects_read.test    | 268 ++++++------------
 5 files changed, 89 insertions(+), 193 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index d3e980a57392..dd7c4364fb9d 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -120,8 +120,8 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
 		if (error) {
 			// We error pointing to the current value error.
 			current_errors.push_back({CSVErrorType::TOO_MANY_COLUMNS, cur_col_id, last_position});
+			cur_col_id++;
 		}
-		cur_col_id++;
 		return;
 	}
 
@@ -552,7 +552,6 @@ bool StringValueResult::AddRowInternal() {
 				    last_position.GetGlobalPosition(requested_size));
 				error_handler.Error(csv_error);
 			}
-
 			// If we are here we ignore_errors, so we delete this line
 			number_of_rows--;
 		}
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index b16c2d48df79..6646f4ab98b8 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -245,8 +245,8 @@ void CSVGlobalState::FillRejectsTable() {
 		InternalAppender errors_appender(context, errors_table);
 		InternalAppender scans_appender(context, scans_table);
 		idx_t scan_idx = context.transaction.GetActiveQuery();
-		idx_t file_idx = 0;
 		for (auto &file : file_scans) {
+			idx_t file_idx = context.transaction.GetIncrementalIndex();
 			auto file_name = file->file_path;
 			auto &errors = file->error_handler->errors;
 			// We first insert the file into the file scans table
@@ -309,7 +309,6 @@ void CSVGlobalState::FillRejectsTable() {
 				rejects->count = 0;
 				FillScanErrorTable(scans_appender, scan_idx, file_idx, *file);
 			}
-			file_idx++;
 		}
 		errors_appender.Close();
 		scans_appender.Close();
diff --git a/src/include/duckdb/transaction/transaction_context.hpp b/src/include/duckdb/transaction/transaction_context.hpp
index b0a50103bb46..b265c0131498 100644
--- a/src/include/duckdb/transaction/transaction_context.hpp
+++ b/src/include/duckdb/transaction/transaction_context.hpp
@@ -48,6 +48,7 @@ class TransactionContext {
 	}
 
 	idx_t GetActiveQuery();
+	idx_t GetIncrementalIndex();
 	void ResetActiveQuery();
 	void SetActiveQuery(transaction_t query_number);
 
@@ -56,6 +57,7 @@ class TransactionContext {
 	bool auto_commit;
 
 	unique_ptr<MetaTransaction> current_transaction;
+	idx_t incremental_index = 0;
 
 	TransactionContext(const TransactionContext &) = delete;
 };
diff --git a/src/transaction/transaction_context.cpp b/src/transaction/transaction_context.cpp
index 7185a263894b..82d1fa43094f 100644
--- a/src/transaction/transaction_context.cpp
+++ b/src/transaction/transaction_context.cpp
@@ -89,13 +89,19 @@ idx_t TransactionContext::GetActiveQuery() {
 	return current_transaction->GetActiveQuery();
 }
 
+idx_t TransactionContext::GetIncrementalIndex() {
+	return incremental_index++;
+}
+
 void TransactionContext::ResetActiveQuery() {
+	incremental_index = 0;
 	if (current_transaction) {
 		SetActiveQuery(MAXIMUM_QUERY_ID);
 	}
 }
 
 void TransactionContext::SetActiveQuery(transaction_t query_number) {
+	incremental_index = 0;
 	if (!current_transaction) {
 		throw InternalException("SetActiveQuery called without active transaction");
 	}
diff --git a/test/sql/copy/csv/rejects/csv_rejects_read.test b/test/sql/copy/csv/rejects/csv_rejects_read.test
index 9917965558ba..b537833fd7dd 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_read.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_read.test
@@ -10,101 +10,67 @@ query III rowsort
 SELECT * FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/bad.csv',
     columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false);
+    store_rejects = true, auto_detect=true);
 ----
 1	2	AAA
 6	7	CCC
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII
+FROM reject_errors;
 ----
-test/sql/copy/csv/data/error/mismatch/bad.csv	2	2	"col1"	CAST	4,BBB,9,	9
+3	0	2	10	12	2	col1	CAST	4,BBB,9,	Error when converting column "col1". Could not convert string "BBB" to 'INTEGER'
 
-query I
-SELECT error_message
-FROM csv_rejects_table;
-----
-<REGEX>:.*Could not convert string "BBB" to 'INTEGER'.*
+statement ok
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_scans;
 
 # Test with multiple columns on the same row
 query III rowsort
 SELECT * FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/bad2.csv',
     columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'INTEGER'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false);
+    store_rejects = true, auto_detect=false);
 ----
 4	5	9
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
-----
-test/sql/copy/csv/data/error/mismatch/bad2.csv	1	3	"col2"	CAST	1,2,DDD,	0
-test/sql/copy/csv/data/error/mismatch/bad2.csv	3	1	"col0"	CAST	EEE,7,FFF,	16
-test/sql/copy/csv/data/error/mismatch/bad2.csv	3	3	"col2"	CAST	EEE,7,FFF,	16
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=1 and column_idx=3;
+query IIIIIIIIII
+FROM reject_errors ORDER BY ALL;
 ----
-<REGEX>:.*Could not convert string "DDD" to 'INTEGER'.*
+7	0	1	1	5	3	col2	CAST	1,2,DDD,	Error when converting column "col2". Could not convert string "DDD" to 'INTEGER'
+7	0	3	17	17	1	col0	CAST	EEE,7,FFF,	Error when converting column "col0". Could not convert string "EEE" to 'INTEGER'
+7	0	3	17	23	3	col2	CAST	EEE,7,FFF,	Error when converting column "col2". Could not convert string "FFF" to 'INTEGER'
 
-query I
-SELECT error_message
-FROM csv_rejects_table where line=3 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "EEE" to 'INTEGER'.*
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=3 and column_idx=3;
-----
-<REGEX>:.*Could not convert string "FFF" to 'INTEGER'.*
+statement ok
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_scans;
 
 # Test with multiple files
 query III rowsort
 SELECT * FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/bad*.csv',
     columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false);
+    store_rejects = true, auto_detect=false);
 ----
 1	2	AAA
 1	2	DDD
 4	5	9
 6	7	CCC
 
-
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII
+FROM reject_errors ORDER BY ALL;
 ----
-test/sql/copy/csv/data/error/mismatch/bad.csv	2	2	"col1"	CAST	4,BBB,9,	9
-test/sql/copy/csv/data/error/mismatch/bad2.csv	3	1	"col0"	CAST	EEE,7,FFF,	16
+11	0	2	10	12	2	col1	CAST	4,BBB,9,	Error when converting column "col1". Could not convert string "BBB" to 'INTEGER'
+11	1	3	17	17	1	col0	CAST	EEE,7,FFF,	Error when converting column "col0". Could not convert string "EEE" to 'INTEGER'
 
-query I
-SELECT error_message
-FROM csv_rejects_table where line=2 and column_idx=2;
-----
-<REGEX>:.*Could not convert string "BBB" to 'INTEGER'.*
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=3 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "EEE" to 'INTEGER'.*
+statement ok
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_scans;
 
 # Set limit
 
@@ -112,9 +78,7 @@ query III rowsort
 SELECT * FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/bad*.csv',
     columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    rejects_limit=2,
-    ignore_errors=true, auto_detect=false);
+    store_rejects = true,rejects_limit=2, ignore_errors=true, auto_detect=false);
 ----
 1	2	AAA
 1	2	DDD
@@ -123,121 +87,80 @@ SELECT * FROM read_csv(
 
 # We should now only have two errors logged
 query I
-SELECT COUNT(*) FROM csv_rejects_table
+SELECT COUNT(*) FROM reject_errors
 ----
 2
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
 
 # Try with bigger files
 query I
 SELECT SUM(num) FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/big_bad.csv',
     columns = {'num': 'INTEGER', 'str': 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false);
+    store_rejects = true, auto_detect=false);
 ----
 4270
 
-query  IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
-----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	1	"num"	CAST	B, A	10875
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	1	"num"	CAST	C, A	20875
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=2176 and column_idx=1;
+query IIIIIIIIII
+FROM reject_errors ORDER BY ALL;
 ----
-<REGEX>:.*Could not convert string "B" to 'INTEGER'.*
+19	0	2176	10876	10876	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
+19	0	4176	20876	20876	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
 
-query I
-SELECT error_message
-FROM csv_rejects_table where line=4176 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "C" to 'INTEGER'.*
+statement ok
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_scans;
 
 query I
 SELECT SUM(num) FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/big_bad2.csv',
     columns = {'num': 'INTEGER', 'str': 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true,  auto_detect=false)
+    store_rejects = true,  auto_detect=false)
 ----
 6774
 
-query  IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII
+FROM reject_errors ORDER BY ALL;
 ----
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	1	"num"	CAST	B, A	18395
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	1	"num"	CAST	C, A	28395
+23	0	3680	18396	18396	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
+23	0	5680	28396	28396	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
 
-query I
-SELECT error_message
-FROM csv_rejects_table where line=3680 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "B" to 'INTEGER'.*
 
-query I
-SELECT error_message
-FROM csv_rejects_table where line=5680 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "C" to 'INTEGER'.*
+statement ok
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_scans;
 
 # Test with multiple big files
 query I
 SELECT SUM(num) FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/big_*.csv',
     columns = {'num': 'INTEGER', 'str': 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false);
+    store_rejects = true, auto_detect=false);
 ----
 11044
 
-query  IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
-----
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	2176	1	"num"	CAST	B, A	10875
-test/sql/copy/csv/data/error/mismatch/big_bad.csv	4176	1	"num"	CAST	C, A	20875
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	3680	1	"num"	CAST	B, A	18395
-test/sql/copy/csv/data/error/mismatch/big_bad2.csv	5680	1	"num"	CAST	C, A	28395
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=3680 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "B" to 'INTEGER'.*
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=5680 and column_idx=1;
+query IIIIIIIIII
+FROM reject_errors ORDER BY ALL;
 ----
-<REGEX>:.*Could not convert string "C" to 'INTEGER'.*
+27	0	2176	10876	10876	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
+27	0	4176	20876	20876	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
+27	1	3680	18396	18396	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
+27	1	5680	28396	28396	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
 
-query I
-SELECT error_message
-FROM csv_rejects_table where line=2176 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "B" to 'INTEGER'.*
-
-query I
-SELECT error_message
-FROM csv_rejects_table where line=4176 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "C" to 'INTEGER'.*
+statement ok
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table;
+DROP TABLE reject_scans;
 
 # Test with multiple rejects table in the same query
 query IIII rowsort
@@ -245,61 +168,36 @@ SELECT *
 FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/small1.csv',
     columns = {'num': 'INTEGER', 'str': 'VARCHAR'},
-    rejects_table='csv_rejects_table_left',
-    ignore_errors=true) as L
+    store_rejects = true) as L
 JOIN read_csv(
     'test/sql/copy/csv/data/error/mismatch/small2.csv',
     columns = {'num': 'INTEGER', 'str': 'VARCHAR'},
-    rejects_table='csv_rejects_table_right',
-    ignore_errors=true) as R
+   store_rejects = true) as R
 ON L.num = R.num;
 ----
 1	A	1	A
 3	C	3	C
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table_left;
-----
-test/sql/copy/csv/data/error/mismatch/small1.csv	3	1	"num"	CAST	X,Y	14
-test/sql/copy/csv/data/error/mismatch/small1.csv	6	1	"num"	CAST	X,Y	26
-
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table_right;
-----
-test/sql/copy/csv/data/error/mismatch/small2.csv	3	1	"num"	CAST	X,Y	14
-test/sql/copy/csv/data/error/mismatch/small2.csv	5	1	"num"	CAST	X,Y	22
-
-query I
-SELECT error_message
-FROM csv_rejects_table_left where line=3 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "X" to 'INTEGER'.*
-
-query I
-SELECT error_message
-FROM csv_rejects_table_left where line=6 and column_idx=1;
+query IIIIIIIIIIIII
+FROM reject_scans ORDER BY ALL;
 ----
-<REGEX>:.*Could not convert string "X" to 'INTEGER'.*
+31	0	test/sql/copy/csv/data/error/mismatch/small1.csv	,	"	"	\n	0	true	{'num': 'INTEGER','str': 'VARCHAR'}	NULL	NULL	store_rejects=true
+31	1	test/sql/copy/csv/data/error/mismatch/small2.csv	,	"	"	\n	0	true	{'num': 'INTEGER','str': 'VARCHAR'}	NULL	NULL	store_rejects=true
 
-query I
-SELECT error_message
-FROM csv_rejects_table_right where line=3 and column_idx=1;
-----
-<REGEX>:.*Could not convert string "X" to 'INTEGER'.*
 
-query I
-SELECT error_message
-FROM csv_rejects_table_right where line=5 and column_idx=1;
+query IIIIIIIIII
+FROM reject_errors ORDER BY ALL;
 ----
-<REGEX>:.*Could not convert string "X" to 'INTEGER'.*
+31	0	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+31	0	6	27	27	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+31	1	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+31	1	5	23	23	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
 
 statement ok
-DROP TABLE csv_rejects_table_left;
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table_right;
+DROP TABLE reject_scans;
 
 # Test with multiple rejects table in the same query, with different limits
 # (only one reject should be logged in right table)
@@ -308,36 +206,28 @@ SELECT *
 FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/small1.csv',
     columns = {'num': 'INTEGER', 'str': 'VARCHAR'},
-    rejects_table='csv_rejects_table_left',
-    ignore_errors=true) as L
+    store_rejects = true) as L
 JOIN read_csv(
     'test/sql/copy/csv/data/error/mismatch/small2.csv',
     columns = {'num': 'INTEGER', 'str': 'VARCHAR'},
-    rejects_table='csv_rejects_table_right',
-	rejects_limit=1,
-    ignore_errors=true) as R
+    store_rejects = true, rejects_limit=1) as R
 ON L.num = R.num;
 ----
 1	A	1	A
 3	C	3	C
 
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table_left;
+query IIIIIIIIII
+FROM reject_errors ORDER BY ALL;
 ----
-test/sql/copy/csv/data/error/mismatch/small1.csv	3	1	"num"	CAST	X,Y	14
-test/sql/copy/csv/data/error/mismatch/small1.csv	6	1	"num"	CAST	X,Y	26
+36	0	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+36	0	6	27	27	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+36	1	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
 
-query I
-SELECT COUNT(*)
-FROM csv_rejects_table_right;
-----
-1
 
 statement ok
-DROP TABLE csv_rejects_table_left;
+DROP TABLE reject_errors;
 
 statement ok
-DROP TABLE csv_rejects_table_right;
+DROP TABLE reject_scans;
 

From 836f84d4ec456474baa34a1ea616a019529b9aab Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 13:11:57 +0200
Subject: [PATCH 069/147] Adjust test

---
 .../csv/rejects/csv_rejects_two_tables.test   | 48 ++++++++-----------
 1 file changed, 20 insertions(+), 28 deletions(-)

diff --git a/test/sql/copy/csv/rejects/csv_rejects_two_tables.test b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
index e9ad454f6052..f50128989810 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
@@ -17,20 +17,18 @@ BIGINT	VARCHAR	11044	11044	2
 
 
 query IIIIIIIIIIIII
-SELECT *
 FROM reject_scans order by all;
 ----
 3	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
 3	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
 
-query IIIIIIIII
-SELECT *
+query IIIIIIIIII
 FROM reject_errors order by all;
 ----
-3	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-3	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+3	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+3	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+3	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
 # Test giving the name of errors table
 statement error
@@ -54,20 +52,18 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
 BIGINT	VARCHAR	11044	11044	2
 
 query IIIIIIIIIIIII
-SELECT *
 FROM reject_scans order by all;
 ----
 8	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_2', sample_size=1
 8	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_2', sample_size=1
 
-query IIIIIIIII
-SELECT *
+query IIIIIIIIII
 FROM rejects_errors_2 order by all;
 ----
-8	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-8	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-8	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-8	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+8	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+8	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+8	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+8	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
 statement ok
 drop table reject_errors;
@@ -82,21 +78,18 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
 BIGINT	VARCHAR	11044	11044	2
 
 query IIIIIIIIIIIII
-SELECT *
 FROM rejects_scan_2 order by all;
 ----
 12	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_2', sample_size=1
 12	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_2', sample_size=1
 
-query IIIIIIIII
-SELECT *
+query IIIIIIIIII
 FROM reject_errors order by all;
 ----
-12	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-12	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-12	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-12	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-
+12	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+12	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+12	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+12	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
 # Test giving the name of both tables
 query IIIII
@@ -116,14 +109,13 @@ FROM rejects_scan_3 order by all;
 15	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_3', rejects_scan='rejects_scan_3', sample_size=1
 15	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_3', rejects_scan='rejects_scan_3', sample_size=1
 
-query IIIIIIIII
-SELECT *
+query IIIIIIIIII
 FROM rejects_errors_3 order by all;
 ----
-15	0	2176	10875	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-15	0	4176	20875	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-15	1	3680	18395	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-15	1	5680	28395	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+15	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+15	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+15	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+15	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
 statement ok
 drop table reject_errors;

From a6924b638a0851cbf4ef9909106500c8769840d4 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 13:13:24 +0200
Subject: [PATCH 070/147] Small message adjustment

---
 test/sql/copy/csv/rejects/test_invalid_parameters.test | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/sql/copy/csv/rejects/test_invalid_parameters.test b/test/sql/copy/csv/rejects/test_invalid_parameters.test
index 9325f3780f24..950337a90ebb 100644
--- a/test/sql/copy/csv/rejects/test_invalid_parameters.test
+++ b/test/sql/copy/csv/rejects/test_invalid_parameters.test
@@ -12,10 +12,10 @@ SELECT * FROM read_csv(
     'test/sql/copy/csv/data/error/mismatch/bad.csv',
     columns = {'col0': 'INTEGER', 'col1': 'INTEGER', 'col2': 'VARCHAR'},
     ignore_errors=false,
-    rejects_table='csv_rejects_table'
+    store_rejects=true
 )
 ----
-only supported when IGNORE_ERRORS is set to true
+STORE_REJECTS option is only supported when IGNORE_ERRORS is not manually set to false
 
 statement error
 SELECT * FROM read_csv(
@@ -63,7 +63,7 @@ SELECT * FROM read_csv_auto(
     rejects_table='csv_rejects_table'
 )
 ----
-only supported when IGNORE_ERRORS is set to true
+option is only supported when IGNORE_ERRORS is not manually set to false
 
 statement error
 SELECT * FROM read_csv_auto(

From 5f2883ff4e8d576d9698344c8e9309be2c754d66 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 13:15:48 +0200
Subject: [PATCH 071/147] Adjustment to utf rejects

---
 .../csv/rejects/test_invalid_utf_rejects.test     | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
index 94c56cc71562..e579648f8794 100644
--- a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
+++ b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
@@ -9,13 +9,12 @@ require notwindows
 
 statement ok
 from read_csv('test/sql/copy/csv/data/test/invalid_utf_big.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'},
- auto_detect=false, rejects_table='csv_rejects_table', header = 0, delim = ',', ignore_errors=true)
+ auto_detect=false, header = 0, delim = ',', store_rejects=true)
 
-query IIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position
-FROM csv_rejects_table;
+query IIIIIIIIII rowsort
+FROM reject_errors ORDER BY ALL;
 ----
-test/sql/copy/csv/data/test/invalid_utf_big.csv	3001	2	"col2"	INVALID UNICODE	valid,invalid_??_part,valid	54000
-test/sql/copy/csv/data/test/invalid_utf_big.csv	3012	3	"col3"	INVALID UNICODE	valid,valid,invalid_??_part	54208
-test/sql/copy/csv/data/test/invalid_utf_big.csv	3023	2	"col2"	INVALID UNICODE	valid,invalid_??_part,valid	54416
-test/sql/copy/csv/data/test/invalid_utf_big.csv	3034	3	"col3"	INVALID UNICODE	valid,valid,invalid_??_part	54624
\ No newline at end of file
+3	0	3001	54001	54007	2	col2	INVALID UNICODE	valid,invalid_??_part,valid	Invalid unicode (byte sequence mismatch) detected.
+3	0	3012	54209	54221	3	col3	INVALID UNICODE	valid,valid,invalid_??_part	Invalid unicode (byte sequence mismatch) detected.
+3	0	3023	54417	54423	2	col2	INVALID UNICODE	valid,invalid_??_part,valid	Invalid unicode (byte sequence mismatch) detected.
+3	0	3034	54625	54637	3	col3	INVALID UNICODE	valid,valid,invalid_??_part	Invalid unicode (byte sequence mismatch) detected.
\ No newline at end of file

From 1ae2d3c66dd6a847bcec18a5f10a9126924b8e4a Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 13:27:13 +0200
Subject: [PATCH 072/147] More adjustments

---
 test/sql/copy/csv/rejects/test_mixed.test | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/test/sql/copy/csv/rejects/test_mixed.test b/test/sql/copy/csv/rejects/test_mixed.test
index 45001a5e4b05..e5ced0ea3ae8 100644
--- a/test/sql/copy/csv/rejects/test_mixed.test
+++ b/test/sql/copy/csv/rejects/test_mixed.test
@@ -11,8 +11,7 @@ query III
 SELECT * FROM read_csv(
     'data/csv/rejects/frankstein/nightmare.csv',
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'VARCHAR'},
-    rejects_table='csv_rejects_table',
-    ignore_errors=true, auto_detect=false, header = 1, max_line_size=20);
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=20);
 ----
 1	2	pedro
 1	2	pedro
@@ -56,13 +55,12 @@ SELECT * FROM read_csv(
 1	2	pedro
 1	2	pedro
 
-query IIIIIIII rowsort
-SELECT regexp_replace(file, '\\', '/', 'g'), line, column_idx, column_name, error_type, csv_line, byte_position, error_message
-FROM csv_rejects_table;
+query IIIIIIIIII rowsort
+FROM reject_errors ORDER BY ALL;
 ----
-data/csv/rejects/frankstein/nightmare.csv	10	2	"c"	MISSING COLUMNS	1,2	102	Expected Number of Columns: 3 Found: 2
-data/csv/rejects/frankstein/nightmare.csv	14	4	NULL	TOO MANY COLUMNS	1,2,"pedro",5	142	Expected Number of Columns: 3 Found: 4
-data/csv/rejects/frankstein/nightmare.csv	19	2	"b"	CAST	1,bla,"pedro"	204	Error when converting column "b". Could not convert string "bla" to 'INTEGER'
-data/csv/rejects/frankstein/nightmare.csv	22	3	"c"	UNQUOTED VALUE	1,2,"pedro"bla	242	Value with unterminated quote found.
-data/csv/rejects/frankstein/nightmare.csv	32	1	"a"	LINE SIZE OVER MAXIMUM	1,2,"pedro thiago timbo holanda"	365	Maximum line size of 20 bytes exceeded. Actual Size:33 bytes.
-data/csv/rejects/frankstein/nightmare.csv	38	3	"c"	INVALID UNICODE	1,2,"pedro??"	458	Invalid unicode (byte sequence mismatch) detected.
\ No newline at end of file
+3	0	10	103	106	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 3 Found: 2
+3	0	14	143	154	4	NULL	TOO MANY COLUMNS	1,2,"pedro",5	Expected Number of Columns: 3 Found: 4
+3	0	19	205	207	2	b	CAST	1,bla,"pedro"	Error when converting column "b". Could not convert string "bla" to 'INTEGER'
+3	0	22	243	247	3	c	UNQUOTED VALUE	1,2,"pedro"bla	Value with unterminated quote found.
+3	0	32	366	366	1	a	LINE SIZE OVER MAXIMUM	1,2,"pedro thiago timbo holanda"	Maximum line size of 20 bytes exceeded. Actual Size:33 bytes.
+3	0	38	459	463	3	c	INVALID UNICODE	1,2,"pedro??"	Invalid unicode (byte sequence mismatch) detected.
\ No newline at end of file

From e58c90b1013e97653d75bbbbff2c7f219dca60aa Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 15:36:48 +0200
Subject: [PATCH 073/147] Bunch of tests for multiple error return on borked
 lines

---
 data/csv/rejects/multiple_errors.csv          |   8 ++
 .../multiple_errors/cast_and_less_col.csv     |   5 +
 .../multiple_errors/cast_and_more_col.csv     |   5 +
 .../multiple_cast_implicit.csv                |   4 +
 .../multiple_errors/multiple_casts_flush.csv  |   4 +
 .../multiple_errors/multiple_casts_mixed.csv  |   4 +
 .../scanner/string_value_scanner.cpp          |  15 +-
 .../test_multiple_errors_same_line.test       | 131 ++++++++++++++++++
 8 files changed, 174 insertions(+), 2 deletions(-)
 create mode 100644 data/csv/rejects/multiple_errors.csv
 create mode 100644 data/csv/rejects/multiple_errors/cast_and_less_col.csv
 create mode 100644 data/csv/rejects/multiple_errors/cast_and_more_col.csv
 create mode 100644 data/csv/rejects/multiple_errors/multiple_cast_implicit.csv
 create mode 100644 data/csv/rejects/multiple_errors/multiple_casts_flush.csv
 create mode 100644 data/csv/rejects/multiple_errors/multiple_casts_mixed.csv
 create mode 100644 test/sql/copy/csv/rejects/test_multiple_errors_same_line.test

diff --git a/data/csv/rejects/multiple_errors.csv b/data/csv/rejects/multiple_errors.csv
new file mode 100644
index 000000000000..6d10e51c3cf5
--- /dev/null
+++ b/data/csv/rejects/multiple_errors.csv
@@ -0,0 +1,8 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie boogie,3, 2023-01-02, 5
+oogie boogie,3, 2023-01-03, bla, 7
+oogie boogie,3, bla, bla, 7
+oogie boogie,3, 2023-01-04, 8
+oogie boogie,3, bla
+oogie boogieoogie boogieoogie boogieoogie boogieoogie boogieoogie boogieoogie boogie,3, bla
diff --git a/data/csv/rejects/multiple_errors/cast_and_less_col.csv b/data/csv/rejects/multiple_errors/cast_and_less_col.csv
new file mode 100644
index 000000000000..25f4dfe159ed
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/cast_and_less_col.csv
@@ -0,0 +1,5 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie boogie,3, 2023-01-02, 5
+oogie boogie,bla, 2023-01-03
+oogie boogie,bla
diff --git a/data/csv/rejects/multiple_errors/cast_and_more_col.csv b/data/csv/rejects/multiple_errors/cast_and_more_col.csv
new file mode 100644
index 000000000000..4e5a7d2321d3
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/cast_and_more_col.csv
@@ -0,0 +1,5 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie boogie,3, 2023-01-02, 5
+oogie boogie,3, 2023-01-03, bla, 7
+oogie boogie,3, 2023-01-03, bla, 7, 8
diff --git a/data/csv/rejects/multiple_errors/multiple_cast_implicit.csv b/data/csv/rejects/multiple_errors/multiple_cast_implicit.csv
new file mode 100644
index 000000000000..26ad443f6fdc
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/multiple_cast_implicit.csv
@@ -0,0 +1,4 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie boogie,3, 2023-01-02, 5
+oogie boogie,bla_2, 2023-01-02, bla_1
\ No newline at end of file
diff --git a/data/csv/rejects/multiple_errors/multiple_casts_flush.csv b/data/csv/rejects/multiple_errors/multiple_casts_flush.csv
new file mode 100644
index 000000000000..21d7a7b58b54
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/multiple_casts_flush.csv
@@ -0,0 +1,4 @@
+name,age,current_day, tomorrow
+oogie boogie,3, 2023-01-01, 2023-01-02
+oogie boogie,3, 2023-01-02, 2023-01-03
+oogie boogie,3, bla_2, bla_1
\ No newline at end of file
diff --git a/data/csv/rejects/multiple_errors/multiple_casts_mixed.csv b/data/csv/rejects/multiple_errors/multiple_casts_mixed.csv
new file mode 100644
index 000000000000..3931dbb1821b
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/multiple_casts_mixed.csv
@@ -0,0 +1,4 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie boogie,3, 2023-01-02, 5
+oogie boogie,3, bla_2, bla_1
\ No newline at end of file
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index dd7c4364fb9d..0a0b286b4671 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -365,6 +365,7 @@ bool StringValueResult::HandleError() {
 
 		switch (cur_error.type) {
 		case CSVErrorType::TOO_MANY_COLUMNS:
+		case CSVErrorType::TOO_FEW_COLUMNS:
 			if (current_line_position.begin == line_pos) {
 				csv_error = CSVError::IncorrectColumnAmountError(
 				    state_machine.options, col_idx, lines_per_batch, borked_line,
@@ -503,7 +504,15 @@ bool StringValueResult::AddRowInternal() {
 		    CSVError::LineSizeError(state_machine.options, current_line_size, lines_per_batch, borked_line,
 		                            current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 		error_handler.Error(csv_error);
-		number_of_rows--;
+		if (number_of_rows > 0) {
+			number_of_rows--;
+		}
+	}
+	if (!current_errors.empty()) {
+		// We need to add a few columns error
+		for (idx_t col_idx = cur_col_id; col_idx < number_of_columns; col_idx++) {
+			current_errors.push_back({CSVErrorType::TOO_FEW_COLUMNS, col_idx - 1, last_position});
+		}
 	}
 	if (HandleError()) {
 		return false;
@@ -553,7 +562,9 @@ bool StringValueResult::AddRowInternal() {
 				error_handler.Error(csv_error);
 			}
 			// If we are here we ignore_errors, so we delete this line
-			number_of_rows--;
+			if (number_of_rows > 0) {
+				number_of_rows--;
+			}
 		}
 	}
 	line_positions_per_row[number_of_rows] = current_line_position;
diff --git a/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test b/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
new file mode 100644
index 000000000000..22ed108fee20
--- /dev/null
+++ b/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
@@ -0,0 +1,131 @@
+# name: test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
+# description: Tests a mix of multiple errors and validate they get hit
+# group: [rejects]
+
+require skip_reload
+
+# Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
+require notwindows
+
+#query IIII
+#FROM read_csv('data/csv/rejects/multiple_errors/cast_and_more_col.csv',
+#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+#    store_rejects = true, auto_detect=false, header = 1);
+#----
+#oogie boogie	3	2023-01-01	2
+#oogie boogie	3	2023-01-02	5
+#
+#query IIIIIIIII rowsort
+#SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+#----
+#0	4	89	116	4	barks	CAST	oogie boogie,3, 2023-01-03, bla, 7	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
+#0	4	89	120	5	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7	Expected Number of Columns: 4 Found: 5
+#0	5	124	151	4	barks	CAST	oogie boogie,3, 2023-01-03, bla, 7, 8	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
+#0	5	124	155	5	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7, 8	Expected Number of Columns: 4 Found: 5
+#0	5	124	158	6	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7, 8	Expected Number of Columns: 4 Found: 6
+#
+#statement ok
+#DROP TABLE reject_errors;
+#
+#statement ok
+#DROP TABLE reject_scans;
+#
+#query IIII
+#FROM read_csv('data/csv/rejects/multiple_errors/multiple_cast_implicit.csv',
+#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+#    store_rejects = true, auto_detect=false, header = 1);
+#----
+#oogie boogie	3	2023-01-01	2
+#oogie boogie	3	2023-01-02	5
+#
+#query IIIIIIIII rowsort
+#SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+#----
+#0	4	89	102	2	age	CAST	oogie boogie,bla_2, 2023-01-02, bla_1	Error when converting column "age". Could not convert string "bla_2" to 'INTEGER'
+#0	4	89	120	4	barks	CAST	oogie boogie,bla_2, 2023-01-02, bla_1	Error when converting column "barks". Could not convert string " bla_1" to 'INTEGER'
+#
+#statement ok
+#DROP TABLE reject_errors;
+#
+#statement ok
+#DROP TABLE reject_scans;
+#
+#query IIII
+#FROM read_csv('data/csv/rejects/multiple_errors/multiple_casts_flush.csv',
+#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'tomorrow': 'DATE'},
+#    store_rejects = true, auto_detect=false, header = 1);
+#----
+#oogie boogie	3	2023-01-01	2023-01-02
+#oogie boogie	3	2023-01-02	2023-01-03
+#
+#query IIIIIIIII rowsort
+#SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+#----
+#0	4	110	NULL	3	current_day	CAST	oogie boogie,3, bla_2, bla_1	Error when converting column "current_day". date field value out of range: " bla_2", expected format is (YYYY-MM-DD)
+#0	4	110	NULL	4	tomorrow	CAST	oogie boogie,3, bla_2, bla_1	Error when converting column "tomorrow". date field value out of range: " bla_1", expected format is (YYYY-MM-DD)
+#
+#statement ok
+#DROP TABLE reject_errors;
+#
+#statement ok
+#DROP TABLE reject_scans;
+#
+#query IIII
+#FROM read_csv('data/csv/rejects/multiple_errors/multiple_casts_mixed.csv',
+#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+#    store_rejects = true, auto_detect=false, header = 1);
+#----
+#oogie boogie	3	2023-01-01	2
+#oogie boogie	3	2023-01-02	5
+#
+## FIXME: This will not present the both cast errors :'(, should be alleviated the more types we add to implicit casting
+#query IIIIIIIII rowsort
+#SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+#----
+#0	4	89	111	4	barks	CAST	oogie boogie,3, bla_2, bla_1	Error when converting column "barks". Could not convert string " bla_1" to 'INTEGER'
+#
+#statement ok
+#DROP TABLE reject_errors;
+#
+#statement ok
+#DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/cast_and_less_col.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	5
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+----
+0	4	89	102	2	age	CAST	oogie boogie,bla, 2023-01-03	Error when converting column "age". Could not convert string "bla" to 'INTEGER'
+0	4	89	117	3	barks	MISSING COLUMNS	oogie boogie,bla, 2023-01-03	Expected Number of Columns: 4 Found: 3
+0	5	118	131	2	age	CAST	oogie boogie,bla	Error when converting column "age". Could not convert string "bla" to 'INTEGER'
+0	5	118	134	2	current_day	MISSING COLUMNS	oogie boogie,bla	Expected Number of Columns: 4 Found: 2
+0	5	118	134	3	barks	MISSING COLUMNS	oogie boogie,bla	Expected Number of Columns: 4 Found: 3
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+#query IIII
+#FROM read_csv('data/csv/rejects/multiple_errors.csv',
+#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+#    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+#----
+#oogie boogie	3	2023-01-01	2
+#oogie boogie	3	2023-01-02	5
+#
+#query IIIIIIIIII rowsort
+#FROM reject_errors ORDER BY ALL;
+#----
+#3	0	4	89	116	4	barks	CAST	oogie boogie,3, 2023-01-03, bla, 7	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
+#3	0	4	89	120	5	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7	Expected Number of Columns: 4 Found: 5
+#3	0	5	124	144	4	barks	CAST	oogie boogie,3, bla, bla, 7	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
+#3	0	5	124	148	5	NULL	TOO MANY COLUMNS	oogie boogie,3, bla, bla, 7	Expected Number of Columns: 4 Found: 5
+#3	0	6	152	171	3	barks	MISSING COLUMNS	oogie boogie,3, bla	Expected Number of Columns: 4 Found: 3
\ No newline at end of file

From d8575d835221e2375b5f7f5bcaf2d9d188b0d9d7 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 16:33:46 +0200
Subject: [PATCH 074/147] More tests on maxlinesize and some fixes

---
 .../multiple_errors/cast_and_maxline.csv      |   4 +
 .../multiple_errors/less_col_and_max_line.csv |   4 +
 .../multiple_errors/more_col_and_max_line.csv |   4 +
 .../scanner/string_value_scanner.cpp          |  22 +-
 .../table_function/global_csv_state.cpp       |   7 +-
 .../csv_scanner/string_value_scanner.hpp      |   1 +
 .../csv/rejects/csv_rejects_maximum_line.test |  18 +-
 test/sql/copy/csv/rejects/test_mixed.test     |   2 +-
 .../test_multiple_errors_same_line.test       | 226 +++++++++++-------
 9 files changed, 181 insertions(+), 107 deletions(-)
 create mode 100644 data/csv/rejects/multiple_errors/cast_and_maxline.csv
 create mode 100644 data/csv/rejects/multiple_errors/less_col_and_max_line.csv
 create mode 100644 data/csv/rejects/multiple_errors/more_col_and_max_line.csv

diff --git a/data/csv/rejects/multiple_errors/cast_and_maxline.csv b/data/csv/rejects/multiple_errors/cast_and_maxline.csv
new file mode 100644
index 000000000000..e4e871e59462
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/cast_and_maxline.csv
@@ -0,0 +1,4 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie boogie,3, 2023-01-02, 5
+oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03, 4
diff --git a/data/csv/rejects/multiple_errors/less_col_and_max_line.csv b/data/csv/rejects/multiple_errors/less_col_and_max_line.csv
new file mode 100644
index 000000000000..bb3dbe4dfc74
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/less_col_and_max_line.csv
@@ -0,0 +1,4 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie boogie,3, 2023-01-02, 5
+oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03
diff --git a/data/csv/rejects/multiple_errors/more_col_and_max_line.csv b/data/csv/rejects/multiple_errors/more_col_and_max_line.csv
new file mode 100644
index 000000000000..27366cd56e5c
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/more_col_and_max_line.csv
@@ -0,0 +1,4 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie boogie,3, 2023-01-02, 5
+oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03,4, bla
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 0a0b286b4671..acade767b83e 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -424,7 +424,11 @@ bool StringValueResult::HandleError() {
 				    current_line_position.begin.GetGlobalPosition(requested_size, first_nl),
 				    line_pos.GetGlobalPosition(requested_size), parse_types[cur_error.col_idx].first);
 			}
-
+			break;
+		case CSVErrorType::MAXIMUM_LINE_SIZE:
+			csv_error = CSVError::LineSizeError(
+			    state_machine.options, cur_error.current_line_size, lines_per_batch, borked_line,
+			    current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
 			break;
 		default:
 			throw InvalidInputException("CSV Error not allowed when inserting row");
@@ -497,16 +501,8 @@ bool StringValueResult::AddRowInternal() {
 	current_line_position.begin = current_line_position.end;
 	current_line_position.end = current_line_start;
 	if (current_line_size > state_machine.options.maximum_line_size) {
-		bool first_nl;
-		auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
-		LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
-		auto csv_error =
-		    CSVError::LineSizeError(state_machine.options, current_line_size, lines_per_batch, borked_line,
-		                            current_line_position.begin.GetGlobalPosition(requested_size, first_nl));
-		error_handler.Error(csv_error);
-		if (number_of_rows > 0) {
-			number_of_rows--;
-		}
+		current_errors.push_back({CSVErrorType::MAXIMUM_LINE_SIZE, 1, last_position});
+		current_errors.back().current_line_size = current_line_size;
 	}
 	if (!current_errors.empty()) {
 		// We need to add a few columns error
@@ -562,9 +558,7 @@ bool StringValueResult::AddRowInternal() {
 				error_handler.Error(csv_error);
 			}
 			// If we are here we ignore_errors, so we delete this line
-			if (number_of_rows > 0) {
-				number_of_rows--;
-			}
+			number_of_rows--;
 		}
 	}
 	line_positions_per_row[number_of_rows] = current_line_position;
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 6646f4ab98b8..b156f0af8b95 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -282,10 +282,15 @@ void CSVGlobalState::FillRejectsTable() {
 							errors_appender.Append(error.byte_position + 1);
 						}
 						// 6. Column Index
-						errors_appender.Append(col_idx + 1);
+						if (error.type == CSVErrorType::MAXIMUM_LINE_SIZE) {
+							errors_appender.Append(Value());
+						} else {
+							errors_appender.Append(col_idx + 1);
+						}
 						// 7. Column Name (If Applicable)
 						switch (error.type) {
 						case CSVErrorType::TOO_MANY_COLUMNS:
+						case CSVErrorType::MAXIMUM_LINE_SIZE:
 							errors_appender.Append(Value());
 							break;
 						case CSVErrorType::TOO_FEW_COLUMNS:
diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
index 0039f9ade5b0..7f4e6d8f017f 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
@@ -74,6 +74,7 @@ class CurrentError {
 
 	CSVErrorType type;
 	idx_t col_idx;
+	idx_t current_line_size;
 	string error_message;
 	//! Exact Position where the error happened
 	LinePosition error_position;
diff --git a/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
index 1095a90d70f8..99cedc820614 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
@@ -16,7 +16,7 @@ SELECT * FROM read_csv(
 query IIIIIIIIII
 FROM reject_errors order by all;
 ----
-3	0	5	23	23	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
+3	0	5	23	23	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
 
 statement ok
 DROP TABLE reject_errors;
@@ -37,7 +37,7 @@ SELECT * FROM read_csv(
 query IIIIIIIII
 SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-0	5	23	23	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
+0	5	23	23	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
 
 statement ok
 DROP TABLE reject_errors;
@@ -57,9 +57,9 @@ SELECT * FROM read_csv(
 query IIIIIIIIII
 FROM reject_errors order by all;
 ----
-27	0	2282	13685	13685	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
-27	0	2591	15558	15558	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
-27	0	2923	17569	17569	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
+27	0	2282	13685	13685	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
+27	0	2591	15558	15558	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
+27	0	2923	17569	17569	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
 
 statement ok
 DROP TABLE reject_errors;
@@ -77,10 +77,10 @@ SELECT * FROM read_csv(
 query IIIIIIIIII
 FROM reject_errors order by all;
 ----
-31	0	5	23	23	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
-31	1	2282	13685	13685	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
-31	1	2591	15558	15558	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
-31	1	2923	17569	17569	1	a	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
+31	0	5	23	23	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
+31	1	2282	13685	13685	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
+31	1	2591	15558	15558	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
+31	1	2923	17569	17569	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
 
 statement ok
 DROP TABLE reject_errors;
diff --git a/test/sql/copy/csv/rejects/test_mixed.test b/test/sql/copy/csv/rejects/test_mixed.test
index e5ced0ea3ae8..d1f9b1decedc 100644
--- a/test/sql/copy/csv/rejects/test_mixed.test
+++ b/test/sql/copy/csv/rejects/test_mixed.test
@@ -62,5 +62,5 @@ FROM reject_errors ORDER BY ALL;
 3	0	14	143	154	4	NULL	TOO MANY COLUMNS	1,2,"pedro",5	Expected Number of Columns: 3 Found: 4
 3	0	19	205	207	2	b	CAST	1,bla,"pedro"	Error when converting column "b". Could not convert string "bla" to 'INTEGER'
 3	0	22	243	247	3	c	UNQUOTED VALUE	1,2,"pedro"bla	Value with unterminated quote found.
-3	0	32	366	366	1	a	LINE SIZE OVER MAXIMUM	1,2,"pedro thiago timbo holanda"	Maximum line size of 20 bytes exceeded. Actual Size:33 bytes.
+3	0	32	366	366	NULL	NULL	LINE SIZE OVER MAXIMUM	1,2,"pedro thiago timbo holanda"	Maximum line size of 20 bytes exceeded. Actual Size:33 bytes.
 3	0	38	459	463	3	c	INVALID UNICODE	1,2,"pedro??"	Invalid unicode (byte sequence mismatch) detected.
\ No newline at end of file
diff --git a/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test b/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
index 22ed108fee20..9c2243435322 100644
--- a/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
+++ b/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
@@ -7,88 +7,88 @@ require skip_reload
 # Test will fail on windows because byte_position is slightly different due to \r\n instead of \n
 require notwindows
 
-#query IIII
-#FROM read_csv('data/csv/rejects/multiple_errors/cast_and_more_col.csv',
-#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
-#    store_rejects = true, auto_detect=false, header = 1);
-#----
-#oogie boogie	3	2023-01-01	2
-#oogie boogie	3	2023-01-02	5
-#
-#query IIIIIIIII rowsort
-#SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
-#----
-#0	4	89	116	4	barks	CAST	oogie boogie,3, 2023-01-03, bla, 7	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
-#0	4	89	120	5	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7	Expected Number of Columns: 4 Found: 5
-#0	5	124	151	4	barks	CAST	oogie boogie,3, 2023-01-03, bla, 7, 8	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
-#0	5	124	155	5	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7, 8	Expected Number of Columns: 4 Found: 5
-#0	5	124	158	6	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7, 8	Expected Number of Columns: 4 Found: 6
-#
-#statement ok
-#DROP TABLE reject_errors;
-#
-#statement ok
-#DROP TABLE reject_scans;
-#
-#query IIII
-#FROM read_csv('data/csv/rejects/multiple_errors/multiple_cast_implicit.csv',
-#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
-#    store_rejects = true, auto_detect=false, header = 1);
-#----
-#oogie boogie	3	2023-01-01	2
-#oogie boogie	3	2023-01-02	5
-#
-#query IIIIIIIII rowsort
-#SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
-#----
-#0	4	89	102	2	age	CAST	oogie boogie,bla_2, 2023-01-02, bla_1	Error when converting column "age". Could not convert string "bla_2" to 'INTEGER'
-#0	4	89	120	4	barks	CAST	oogie boogie,bla_2, 2023-01-02, bla_1	Error when converting column "barks". Could not convert string " bla_1" to 'INTEGER'
-#
-#statement ok
-#DROP TABLE reject_errors;
-#
-#statement ok
-#DROP TABLE reject_scans;
-#
-#query IIII
-#FROM read_csv('data/csv/rejects/multiple_errors/multiple_casts_flush.csv',
-#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'tomorrow': 'DATE'},
-#    store_rejects = true, auto_detect=false, header = 1);
-#----
-#oogie boogie	3	2023-01-01	2023-01-02
-#oogie boogie	3	2023-01-02	2023-01-03
-#
-#query IIIIIIIII rowsort
-#SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
-#----
-#0	4	110	NULL	3	current_day	CAST	oogie boogie,3, bla_2, bla_1	Error when converting column "current_day". date field value out of range: " bla_2", expected format is (YYYY-MM-DD)
-#0	4	110	NULL	4	tomorrow	CAST	oogie boogie,3, bla_2, bla_1	Error when converting column "tomorrow". date field value out of range: " bla_1", expected format is (YYYY-MM-DD)
-#
-#statement ok
-#DROP TABLE reject_errors;
-#
-#statement ok
-#DROP TABLE reject_scans;
-#
-#query IIII
-#FROM read_csv('data/csv/rejects/multiple_errors/multiple_casts_mixed.csv',
-#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
-#    store_rejects = true, auto_detect=false, header = 1);
-#----
-#oogie boogie	3	2023-01-01	2
-#oogie boogie	3	2023-01-02	5
-#
-## FIXME: This will not present the both cast errors :'(, should be alleviated the more types we add to implicit casting
-#query IIIIIIIII rowsort
-#SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
-#----
-#0	4	89	111	4	barks	CAST	oogie boogie,3, bla_2, bla_1	Error when converting column "barks". Could not convert string " bla_1" to 'INTEGER'
-#
-#statement ok
-#DROP TABLE reject_errors;
-#
-#statement ok
-#DROP TABLE reject_scans;
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/cast_and_more_col.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	5
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+----
+0	4	89	116	4	barks	CAST	oogie boogie,3, 2023-01-03, bla, 7	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
+0	4	89	120	5	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7	Expected Number of Columns: 4 Found: 5
+0	5	124	151	4	barks	CAST	oogie boogie,3, 2023-01-03, bla, 7, 8	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
+0	5	124	155	5	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7, 8	Expected Number of Columns: 4 Found: 5
+0	5	124	158	6	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7, 8	Expected Number of Columns: 4 Found: 6
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/multiple_cast_implicit.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	5
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+----
+0	4	89	102	2	age	CAST	oogie boogie,bla_2, 2023-01-02, bla_1	Error when converting column "age". Could not convert string "bla_2" to 'INTEGER'
+0	4	89	120	4	barks	CAST	oogie boogie,bla_2, 2023-01-02, bla_1	Error when converting column "barks". Could not convert string " bla_1" to 'INTEGER'
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/multiple_casts_flush.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'tomorrow': 'DATE'},
+    store_rejects = true, auto_detect=false, header = 1);
+----
+oogie boogie	3	2023-01-01	2023-01-02
+oogie boogie	3	2023-01-02	2023-01-03
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+----
+0	4	110	NULL	3	current_day	CAST	oogie boogie,3, bla_2, bla_1	Error when converting column "current_day". date field value out of range: " bla_2", expected format is (YYYY-MM-DD)
+0	4	110	NULL	4	tomorrow	CAST	oogie boogie,3, bla_2, bla_1	Error when converting column "tomorrow". date field value out of range: " bla_1", expected format is (YYYY-MM-DD)
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/multiple_casts_mixed.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	5
+
+# FIXME: This will not present the both cast errors :'(, should be alleviated the more types we add to implicit casting
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+----
+0	4	89	111	4	barks	CAST	oogie boogie,3, bla_2, bla_1	Error when converting column "barks". Could not convert string " bla_1" to 'INTEGER'
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
 
 query IIII
 FROM read_csv('data/csv/rejects/multiple_errors/cast_and_less_col.csv',
@@ -113,6 +113,68 @@ DROP TABLE reject_errors;
 statement ok
 DROP TABLE reject_scans;
 
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/cast_and_maxline.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	5
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+----
+0	4	89	138	2	age	CAST	oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03, 4	Error when converting column "age". Could not convert string "bla" to 'INTEGER'
+0	4	89	89	NULL	NULL	LINE SIZE OVER MAXIMUM	oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03, 4	Maximum line size of 40 bytes exceeded. Actual Size:68 bytes.
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/less_col_and_max_line.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	5
+
+query IIIIIIIII
+SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY byte_position;
+----
+0	4	89	89	NULL	NULL	LINE SIZE OVER MAXIMUM	oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03	Maximum line size of 40 bytes exceeded. Actual Size:65 bytes.
+0	4	89	138	2	age	CAST	oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03	Error when converting column "age". Could not convert string "bla" to 'INTEGER'
+0	4	89	153	3	barks	MISSING COLUMNS	oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03	Expected Number of Columns: 4 Found: 3
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/more_col_and_max_line.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	5
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY byte_position;
+----
+0	4	89	138	2	age	CAST	oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03,4, bla	Error when converting column "age". Could not convert string "bla" to 'INTEGER'
+0	4	89	155	5	NULL	TOO MANY COLUMNS	oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03,4, bla	Expected Number of Columns: 4 Found: 5
+0	4	89	89	NULL	NULL	LINE SIZE OVER MAXIMUM	oogie boogieoogie boogieoogie boogieoogie boogie,bla, 2023-01-03,4, bla	Maximum line size of 40 bytes exceeded. Actual Size:72 bytes.
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
 #query IIII
 #FROM read_csv('data/csv/rejects/multiple_errors.csv',
 #    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},

From 0cfb19acc9e040fa3293b9ad63852ffcbeeb2059 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 16:46:46 +0200
Subject: [PATCH 075/147] Adding unquoted mix tests

---
 .../rejects/multiple_errors/unquoted_cast.csv |  5 ++
 .../rejects/multiple_errors/unquoted_less.csv |  5 ++
 .../multiple_errors/unquoted_maxline.csv      |  5 ++
 .../rejects/multiple_errors/unquoted_more.csv |  5 ++
 .../test_multiple_errors_same_line.test       | 82 +++++++++++++++++++
 5 files changed, 102 insertions(+)
 create mode 100644 data/csv/rejects/multiple_errors/unquoted_cast.csv
 create mode 100644 data/csv/rejects/multiple_errors/unquoted_less.csv
 create mode 100644 data/csv/rejects/multiple_errors/unquoted_maxline.csv
 create mode 100644 data/csv/rejects/multiple_errors/unquoted_more.csv

diff --git a/data/csv/rejects/multiple_errors/unquoted_cast.csv b/data/csv/rejects/multiple_errors/unquoted_cast.csv
new file mode 100644
index 000000000000..9cb8bf160c8c
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/unquoted_cast.csv
@@ -0,0 +1,5 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+"oogie boogie"bla,bla, 2023-01-02, 5
+oogie boogie,3, 2023-01-02, 7
+
diff --git a/data/csv/rejects/multiple_errors/unquoted_less.csv b/data/csv/rejects/multiple_errors/unquoted_less.csv
new file mode 100644
index 000000000000..5cd602581222
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/unquoted_less.csv
@@ -0,0 +1,5 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+"oogie boogie"bla,4, 2023-01-02
+oogie boogie,3, 2023-01-02, 7
+
diff --git a/data/csv/rejects/multiple_errors/unquoted_maxline.csv b/data/csv/rejects/multiple_errors/unquoted_maxline.csv
new file mode 100644
index 000000000000..1dc1f8f2d505
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/unquoted_maxline.csv
@@ -0,0 +1,5 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+"oogie boogieoogie boogieoogie boogieoogie boogie"bla,4, 2023-01-02, 5
+oogie boogie,3, 2023-01-02, 7
+
diff --git a/data/csv/rejects/multiple_errors/unquoted_more.csv b/data/csv/rejects/multiple_errors/unquoted_more.csv
new file mode 100644
index 000000000000..051e8cc90b86
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/unquoted_more.csv
@@ -0,0 +1,5 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+"oogie boogie"bla,4, 2023-01-02, 5, 8
+oogie boogie,3, 2023-01-02, 7
+
diff --git a/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test b/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
index 9c2243435322..112e7e0e2575 100644
--- a/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
+++ b/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
@@ -175,6 +175,88 @@ DROP TABLE reject_errors;
 statement ok
 DROP TABLE reject_scans;
 
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/unquoted_cast.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	7
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id)  FROM reject_errors ORDER BY byte_position;
+----
+0	3	59	59	1	name	UNQUOTED VALUE	"oogie boogie"bla,bla, 2023-01-02, 5	Value with unterminated quote found.
+0	3	59	77	2	age	CAST	"oogie boogie"bla,bla, 2023-01-02, 5	Error when converting column "age". Could not convert string "bla" to 'INTEGER'
+
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/unquoted_less.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	7
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id)  FROM reject_errors ORDER BY byte_position;
+----
+0	3	59	59	1	name	UNQUOTED VALUE	"oogie boogie"bla,4, 2023-01-02	Value with unterminated quote found.
+0	3	59	90	3	barks	MISSING COLUMNS	"oogie boogie"bla,4, 2023-01-02	Expected Number of Columns: 4 Found: 3
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/unquoted_maxline.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	7
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id)  FROM reject_errors ORDER BY byte_position;
+----
+0	3	59	59	1	name	UNQUOTED VALUE	"oogie boogieoogie boogieoogie boogieoogie boogie"bla,4, 2023-01-02, 5	Value with unterminated quote found.
+0	3	59	59	NULL	NULL	LINE SIZE OVER MAXIMUM	"oogie boogieoogie boogieoogie boogieoogie boogie"bla,4, 2023-01-02, 5	Maximum line size of 40 bytes exceeded. Actual Size:71 bytes.
+
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/unquoted_more.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	7
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id)  FROM reject_errors ORDER BY byte_position;
+----
+0	3	59	59	1	name	UNQUOTED VALUE	"oogie boogie"bla,4, 2023-01-02, 5, 8	Value with unterminated quote found.
+0	3	59	93	5	NULL	TOO MANY COLUMNS	"oogie boogie"bla,4, 2023-01-02, 5, 8	Expected Number of Columns: 4 Found: 5
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
 #query IIII
 #FROM read_csv('data/csv/rejects/multiple_errors.csv',
 #    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},

From 8c184d6248e84f9791ecfc9dc5a3180d565619ba Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 17:05:11 +0200
Subject: [PATCH 076/147] More tests

---
 .../multiple_errors/invalid_utf_cast.csv      |   3 +
 .../multiple_errors/invalid_utf_less.csv      |   3 +
 .../multiple_errors/invalid_utf_max_line.csv  |   3 +
 .../multiple_errors/invalid_utf_more.csv      |   3 +
 .../multiple_errors/invalid_utf_unquoted.csv  |   3 +
 .../{ => multiple_errors}/multiple_errors.csv |   2 +-
 .../scanner/string_value_scanner.cpp          |  15 +-
 .../test_multiple_errors_same_line.test       | 136 +++++++++++++++---
 8 files changed, 141 insertions(+), 27 deletions(-)
 create mode 100644 data/csv/rejects/multiple_errors/invalid_utf_cast.csv
 create mode 100644 data/csv/rejects/multiple_errors/invalid_utf_less.csv
 create mode 100644 data/csv/rejects/multiple_errors/invalid_utf_max_line.csv
 create mode 100644 data/csv/rejects/multiple_errors/invalid_utf_more.csv
 create mode 100644 data/csv/rejects/multiple_errors/invalid_utf_unquoted.csv
 rename data/csv/rejects/{ => multiple_errors}/multiple_errors.csv (89%)

diff --git a/data/csv/rejects/multiple_errors/invalid_utf_cast.csv b/data/csv/rejects/multiple_errors/invalid_utf_cast.csv
new file mode 100644
index 000000000000..a4b2844afd03
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/invalid_utf_cast.csv
@@ -0,0 +1,3 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie bo��gie,bla, 2023-01-01, 2
\ No newline at end of file
diff --git a/data/csv/rejects/multiple_errors/invalid_utf_less.csv b/data/csv/rejects/multiple_errors/invalid_utf_less.csv
new file mode 100644
index 000000000000..adf74fc1e5fd
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/invalid_utf_less.csv
@@ -0,0 +1,3 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie bo��gie,3, 2023-01-01
\ No newline at end of file
diff --git a/data/csv/rejects/multiple_errors/invalid_utf_max_line.csv b/data/csv/rejects/multiple_errors/invalid_utf_max_line.csv
new file mode 100644
index 000000000000..1f017d2d8cf7
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/invalid_utf_max_line.csv
@@ -0,0 +1,3 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie bo��gieoogie boogieoogie boogieoogie boogie,3, 2023-01-01, 2, 5
\ No newline at end of file
diff --git a/data/csv/rejects/multiple_errors/invalid_utf_more.csv b/data/csv/rejects/multiple_errors/invalid_utf_more.csv
new file mode 100644
index 000000000000..17fa55e72875
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/invalid_utf_more.csv
@@ -0,0 +1,3 @@
+name,age,current_day, barks
+oogie boogie,3, 2023-01-01, 2
+oogie bo��gie,3, 2023-01-01, 2, 5
\ No newline at end of file
diff --git a/data/csv/rejects/multiple_errors/invalid_utf_unquoted.csv b/data/csv/rejects/multiple_errors/invalid_utf_unquoted.csv
new file mode 100644
index 000000000000..3aaedb256d05
--- /dev/null
+++ b/data/csv/rejects/multiple_errors/invalid_utf_unquoted.csv
@@ -0,0 +1,3 @@
+name,last_name, age,current_day, barks
+oogie, boogie,3, 2023-01-01, 2
+"oogie"bla, bo��gie,3, 2023-01-01, 2
\ No newline at end of file
diff --git a/data/csv/rejects/multiple_errors.csv b/data/csv/rejects/multiple_errors/multiple_errors.csv
similarity index 89%
rename from data/csv/rejects/multiple_errors.csv
rename to data/csv/rejects/multiple_errors/multiple_errors.csv
index 6d10e51c3cf5..784fdd9d9faf 100644
--- a/data/csv/rejects/multiple_errors.csv
+++ b/data/csv/rejects/multiple_errors/multiple_errors.csv
@@ -3,6 +3,6 @@ oogie boogie,3, 2023-01-01, 2
 oogie boogie,3, 2023-01-02, 5
 oogie boogie,3, 2023-01-03, bla, 7
 oogie boogie,3, bla, bla, 7
-oogie boogie,3, 2023-01-04, 8
+"oogie boogie"bla,3, 2023-01-04
 oogie boogie,3, bla
 oogie boogieoogie boogieoogie boogieoogie boogieoogie boogieoogie boogieoogie boogie,3, bla
diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index acade767b83e..9f2682deadce 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -334,11 +334,6 @@ void StringValueResult::AddValue(StringValueResult &result, const idx_t buffer_p
 void StringValueResult::HandleUnicodeError(idx_t col_idx, LinePosition &error_position) {
 	bool first_nl;
 	auto borked_line = current_line_position.ReconstructCurrentLine(first_nl, buffer_handles);
-	// sanitize borked line
-	std::vector<char> char_array(borked_line.begin(), borked_line.end());
-	char_array.push_back('\0'); // Null-terminate the character array
-	Utf8Proc::MakeValid(&char_array[0], char_array.size());
-	borked_line = {char_array.begin(), char_array.end() - 1};
 	LinesPerBoundary lines_per_batch(iterator.GetBoundaryIdx(), lines_read);
 	if (current_line_position.begin == error_position) {
 		auto csv_error = CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
@@ -379,11 +374,6 @@ bool StringValueResult::HandleError() {
 			}
 			break;
 		case CSVErrorType::INVALID_UNICODE: {
-			// We have to sanitize the CSV line
-			std::vector<char> char_array(borked_line.begin(), borked_line.end());
-			char_array.push_back('\0'); // Null-terminate the character array
-			Utf8Proc::MakeValid(&char_array[0], char_array.size());
-			borked_line = {char_array.begin(), char_array.end() - 1};
 			if (current_line_position.begin == line_pos) {
 				csv_error =
 				    CSVError::InvalidUTF8(state_machine.options, col_idx, lines_per_batch, borked_line,
@@ -489,6 +479,11 @@ string FullLinePosition::ReconstructCurrentLine(bool &first_char_nl,
 			result += second_buffer[i];
 		}
 	}
+	// sanitize borked line
+	std::vector<char> char_array(result.begin(), result.end());
+	char_array.push_back('\0'); // Null-terminate the character array
+	Utf8Proc::MakeValid(&char_array[0], char_array.size());
+	result = {char_array.begin(), char_array.end() - 1};
 	return result;
 }
 
diff --git a/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test b/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
index 112e7e0e2575..6d9f1fcb5baa 100644
--- a/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
+++ b/test/sql/copy/csv/rejects/test_multiple_errors_same_line.test
@@ -257,19 +257,123 @@ DROP TABLE reject_errors;
 statement ok
 DROP TABLE reject_scans;
 
-#query IIII
-#FROM read_csv('data/csv/rejects/multiple_errors.csv',
-#    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
-#    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
-#----
-#oogie boogie	3	2023-01-01	2
-#oogie boogie	3	2023-01-02	5
-#
-#query IIIIIIIIII rowsort
-#FROM reject_errors ORDER BY ALL;
-#----
-#3	0	4	89	116	4	barks	CAST	oogie boogie,3, 2023-01-03, bla, 7	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
-#3	0	4	89	120	5	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7	Expected Number of Columns: 4 Found: 5
-#3	0	5	124	144	4	barks	CAST	oogie boogie,3, bla, bla, 7	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
-#3	0	5	124	148	5	NULL	TOO MANY COLUMNS	oogie boogie,3, bla, bla, 7	Expected Number of Columns: 4 Found: 5
-#3	0	6	152	171	3	barks	MISSING COLUMNS	oogie boogie,3, bla	Expected Number of Columns: 4 Found: 3
\ No newline at end of file
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/invalid_utf_cast.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id)  FROM reject_errors ORDER BY byte_position;
+----
+0	3	59	59	1	name	INVALID UNICODE	oogie bo??gie,bla, 2023-01-01, 2	Invalid unicode (byte sequence mismatch) detected.
+0	3	59	73	2	age	CAST	oogie bo??gie,bla, 2023-01-01, 2	Error when converting column "age". Could not convert string "bla" to 'INTEGER'
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/invalid_utf_less.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id)  FROM reject_errors ORDER BY byte_position;
+----
+0	3	59	59	1	name	INVALID UNICODE	oogie bo??gie,3, 2023-01-01	Invalid unicode (byte sequence mismatch) detected.
+0	3	59	86	3	barks	MISSING COLUMNS	oogie bo??gie,3, 2023-01-01	Expected Number of Columns: 4 Found: 3
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/invalid_utf_max_line.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id)  FROM reject_errors ORDER BY byte_position, error_message;
+----
+0	3	59	125	5	NULL	TOO MANY COLUMNS	oogie bo??gieoogie boogieoogie boogieoogie boogie,3, 2023-01-01, 2, 5	Expected Number of Columns: 4 Found: 5
+0	3	59	59	1	name	INVALID UNICODE	oogie bo??gieoogie boogieoogie boogieoogie boogie,3, 2023-01-01, 2, 5	Invalid unicode (byte sequence mismatch) detected.
+0	3	59	59	NULL	NULL	LINE SIZE OVER MAXIMUM	oogie bo??gieoogie boogieoogie boogieoogie boogie,3, 2023-01-01, 2, 5	Maximum line size of 40 bytes exceeded. Actual Size:70 bytes.
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/invalid_utf_more.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id)  FROM reject_errors ORDER BY byte_position;
+----
+0	3	59	59	1	name	INVALID UNICODE	oogie bo??gie,3, 2023-01-01, 2, 5	Invalid unicode (byte sequence mismatch) detected.
+0	3	59	89	5	NULL	TOO MANY COLUMNS	oogie bo??gie,3, 2023-01-01, 2, 5	Expected Number of Columns: 4 Found: 5
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIIII
+FROM read_csv('data/csv/rejects/multiple_errors/invalid_utf_unquoted.csv',
+    columns = {'name': 'VARCHAR', 'last_name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie	 boogie	3	2023-01-01	2
+
+
+query IIIIIIIII rowsort
+SElECT * EXCLUDE (scan_id)  FROM reject_errors ORDER BY byte_position;
+----
+0	3	71	71	1	name	UNQUOTED VALUE	"oogie"bla, bo??gie,3, 2023-01-01, 2	Value with unterminated quote found.
+0	3	71	82	2	last_name	INVALID UNICODE	"oogie"bla, bo??gie,3, 2023-01-01, 2	Invalid unicode (byte sequence mismatch) detected.
+
+statement ok
+DROP TABLE reject_errors;
+
+statement ok
+DROP TABLE reject_scans;
+
+query IIII
+FROM read_csv('data/csv/rejects/multiple_errors/multiple_errors.csv',
+    columns = {'name': 'VARCHAR', 'age': 'INTEGER', 'current_day': 'DATE', 'barks': 'INTEGER'},
+    store_rejects = true, auto_detect=false, header = 1, max_line_size=40);
+----
+oogie boogie	3	2023-01-01	2
+oogie boogie	3	2023-01-02	5
+
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
+----
+0	4	89	116	4	barks	CAST	oogie boogie,3, 2023-01-03, bla, 7	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
+0	4	89	120	5	NULL	TOO MANY COLUMNS	oogie boogie,3, 2023-01-03, bla, 7	Expected Number of Columns: 4 Found: 5
+0	5	124	144	4	barks	CAST	oogie boogie,3, bla, bla, 7	Error when converting column "barks". Could not convert string " bla" to 'INTEGER'
+0	5	124	148	5	NULL	TOO MANY COLUMNS	oogie boogie,3, bla, bla, 7	Expected Number of Columns: 4 Found: 5
+0	6	152	152	1	name	UNQUOTED VALUE	"oogie boogie"bla,3, 2023-01-04	Value with unterminated quote found.
+0	6	152	183	3	barks	MISSING COLUMNS	"oogie boogie"bla,3, 2023-01-04	Expected Number of Columns: 4 Found: 3
+0	7	184	203	3	barks	MISSING COLUMNS	oogie boogie,3, bla	Expected Number of Columns: 4 Found: 3
+0	8	204	204	NULL	NULL	LINE SIZE OVER MAXIMUM	oogie boogieoogie boogieoogie boogieoogie boogieoogie boogieoogie boogieoogie boogie,3, bla	Maximum line size of 40 bytes exceeded. Actual Size:92 bytes.
+0	8	204	295	3	barks	MISSING COLUMNS	oogie boogieoogie boogieoogie boogieoogie boogieoogie boogieoogie boogieoogie boogie,3, bla	Expected Number of Columns: 4 Found: 3

From 8dd5df622cf116b41d847a9c33e8cc26718b47cd Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 4 Apr 2024 17:05:26 +0200
Subject: [PATCH 077/147] woopsie on gen files

---
 .github/regression/micro_extended.csv | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/regression/micro_extended.csv b/.github/regression/micro_extended.csv
index a9517ef309b4..6973785b4c98 100644
--- a/.github/regression/micro_extended.csv
+++ b/.github/regression/micro_extended.csv
@@ -78,7 +78,6 @@ benchmark/micro/copy/to_parquet_partition_by_few.benchmark
 benchmark/micro/copy/to_parquet_partition_by_many.benchmark
 benchmark/micro/csv/16_byte_values.benchmark
 benchmark/micro/csv/1_byte_values.benchmark
-benchmark/micro/csv/1brl.benchmark
 benchmark/micro/csv/multiple_read.benchmark
 benchmark/micro/csv/multiple_small_read_csv.benchmark
 benchmark/micro/csv/null_padding.benchmark

From 5efa63eaaca2f7bb6cf1c1e9e22c8058ecb3a2ea Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 5 Apr 2024 10:55:38 +0200
Subject: [PATCH 078/147] make tidy happy

---
 .../execution/operator/persistent/csv_rejects_table.hpp       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
index d00aede6687a..88dd86377dc7 100644
--- a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
+++ b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <utility>
+
 #include "duckdb/storage/object_cache.hpp"
 #include "duckdb/common/mutex.hpp"
 #include "duckdb/common/typedefs.hpp"
@@ -15,7 +17,7 @@ class ClientContext;
 class CSVRejectsTable : public ObjectCacheEntry {
 public:
 	CSVRejectsTable(string rejects_scan, string rejects_error)
-	    : count(0), scan_table(rejects_scan), errors_table(rejects_error) {
+	    : count(0), scan_table(std::move(rejects_scan)), errors_table(std::move(rejects_error)) {
 	}
 	mutex write_lock;
 	string name;

From 9c07c9318853a65ad36ca60f7184d9b3fc5ea332 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 5 Apr 2024 11:08:07 +0200
Subject: [PATCH 079/147] Make user_parameters ordered

---
 .../operator/csv_scanner/util/csv_reader_options.cpp      | 6 +++++-
 .../execution/operator/csv_scanner/csv_reader_options.hpp | 1 +
 test/sql/copy/csv/rejects/csv_rejects_two_tables.test     | 8 ++++----
 test/sql/copy/csv/test_sniff_csv_options.test             | 2 +-
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
index ebfe1ae306bd..6ecdc72989ae 100644
--- a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
@@ -386,6 +386,7 @@ bool StoreUserDefinedParameter(string &option) {
 }
 void CSVReaderOptions::FromNamedParameters(named_parameter_map_t &in, ClientContext &context,
                                            vector<LogicalType> &return_types, vector<string> &names) {
+	map<string, string> ordered_user_defined_parameters;
 	for (auto &kv : in) {
 		if (MultiFileReader::ParseOption(kv.first, kv.second, file_options, context)) {
 			continue;
@@ -393,7 +394,7 @@ void CSVReaderOptions::FromNamedParameters(named_parameter_map_t &in, ClientCont
 		auto loption = StringUtil::Lower(kv.first);
 		// skip variables that are specific to auto detection
 		if (StoreUserDefinedParameter(loption)) {
-			user_defined_parameters += loption + "=" + kv.second.ToSQLString() + ", ";
+			ordered_user_defined_parameters[loption] = kv.second.ToSQLString();
 		}
 		if (loption == "columns") {
 			auto &child_type = kv.second.type();
@@ -499,6 +500,9 @@ void CSVReaderOptions::FromNamedParameters(named_parameter_map_t &in, ClientCont
 			SetReadOption(loption, kv.second, names);
 		}
 	}
+	for (auto &udf_parameter : ordered_user_defined_parameters) {
+		user_defined_parameters += udf_parameter.first + "=" + udf_parameter.second + ", ";
+	}
 	if (user_defined_parameters.size() >= 2) {
 		user_defined_parameters.erase(user_defined_parameters.size() - 2);
 	}
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
index b937ccfc11d9..53a66da77838 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
@@ -107,6 +107,7 @@ struct CSVReaderOptions {
 
 	//! User defined parameters for the csv function concatenated on a string
 	string user_defined_parameters;
+
 	//===--------------------------------------------------------------------===//
 	// WriteCSVOptions
 	//===--------------------------------------------------------------------===//
diff --git a/test/sql/copy/csv/rejects/csv_rejects_two_tables.test b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
index f50128989810..70fef75c473f 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
@@ -19,8 +19,8 @@ BIGINT	VARCHAR	11044	11044	2
 query IIIIIIIIIIIII
 FROM reject_scans order by all;
 ----
-3	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
-3	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	store_rejects=true, sample_size=1
+3	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	sample_size=1, store_rejects=true
+3	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	sample_size=1, store_rejects=true
 
 query IIIIIIIIII
 FROM reject_errors order by all;
@@ -106,8 +106,8 @@ query IIIIIIIIIIIII
 SELECT *
 FROM rejects_scan_3 order by all;
 ----
-15	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_3', rejects_scan='rejects_scan_3', sample_size=1
-15	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_3', rejects_scan='rejects_scan_3', sample_size=1
+15	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_3', rejects_table='rejects_errors_3', sample_size=1
+15	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_3', rejects_table='rejects_errors_3', sample_size=1
 
 query IIIIIIIIII
 FROM rejects_errors_3 order by all;
diff --git a/test/sql/copy/csv/test_sniff_csv_options.test b/test/sql/copy/csv/test_sniff_csv_options.test
index 29144c0455bf..aa402aa58cb6 100644
--- a/test/sql/copy/csv/test_sniff_csv_options.test
+++ b/test/sql/copy/csv/test_sniff_csv_options.test
@@ -82,7 +82,7 @@ FROM sniff_csv('test/sql/copy/csv/data/auto/time_date_timestamp_yyyy.mm.dd.csv',
 query IIIIIIIIIII
 FROM sniff_csv('test/sql/copy/csv/data/auto/time_date_timestamp_yyyy.mm.dd.csv', dateformat='%Y.%m.%d', timestampformat='%Y.%m.%d %H:%M:%S')
 ----
-,	"	"	\n	0	true	{'a': 'BIGINT', 'b': 'VARCHAR', 't': 'TIME', 'd': 'DATE', 'ts': 'TIMESTAMP'}	%Y.%m.%d	%Y.%m.%d %H:%M:%S	timestampformat='%Y.%m.%d %H:%M:%S', dateformat='%Y.%m.%d'	FROM read_csv('test/sql/copy/csv/data/auto/time_date_timestamp_yyyy.mm.dd.csv', auto_detect=false, delim=',', quote='"', escape='"', new_line='\n', skip=0, header=true, columns={'a': 'BIGINT', 'b': 'VARCHAR', 't': 'TIME', 'd': 'DATE', 'ts': 'TIMESTAMP'}, timestampformat='%Y.%m.%d %H:%M:%S', dateformat='%Y.%m.%d');
+,	"	"	\n	0	1	{'a': 'BIGINT', 'b': 'VARCHAR', 't': 'TIME', 'd': 'DATE', 'ts': 'TIMESTAMP'}	%Y.%m.%d	%Y.%m.%d %H:%M:%S	dateformat='%Y.%m.%d', timestampformat='%Y.%m.%d %H:%M:%S'	FROM read_csv('test/sql/copy/csv/data/auto/time_date_timestamp_yyyy.mm.dd.csv', auto_detect=false, delim=',', quote='"', escape='"', new_line='\n', skip=0, header=true, columns={'a': 'BIGINT', 'b': 'VARCHAR', 't': 'TIME', 'd': 'DATE', 'ts': 'TIMESTAMP'}, dateformat='%Y.%m.%d', timestampformat='%Y.%m.%d %H:%M:%S');
 
 query IIIII
 FROM read_csv('test/sql/copy/csv/data/auto/time_date_timestamp_yyyy.mm.dd.csv', auto_detect=false, delim=',', quote='"', escape='"', new_line='\n', skip=0, header=true, columns={'a': 'BIGINT', 'b': 'VARCHAR', 't': 'TIME', 'd': 'DATE', 'ts': 'TIMESTAMP'}, timestampformat='%Y.%m.%d %H:%M:%S', dateformat='%Y.%m.%d') order by all limit 1;

From e392ffc134cd17dd9bd928e0d84ee6f8223ed6ce Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 5 Apr 2024 11:16:55 +0200
Subject: [PATCH 080/147] Remove scan id from tests to make them more
 deterministic

---
 .../copy/csv/rejects/csv_rejects_auto.test    | 12 +--
 .../csv/rejects/csv_rejects_flush_cast.test   |  8 +-
 .../csv/rejects/csv_rejects_maximum_line.test | 28 +++----
 .../copy/csv/rejects/csv_rejects_read.test    | 82 +++++++++----------
 .../csv/rejects/csv_rejects_two_tables.test   | 80 +++++++++---------
 .../csv/rejects/test_invalid_utf_rejects.test | 12 +--
 test/sql/copy/csv/rejects/test_mixed.test     | 16 ++--
 7 files changed, 119 insertions(+), 119 deletions(-)

diff --git a/test/sql/copy/csv/rejects/csv_rejects_auto.test b/test/sql/copy/csv/rejects/csv_rejects_auto.test
index e673e9917287..15eecd053fc1 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_auto.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_auto.test
@@ -15,13 +15,13 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
 ----
 BIGINT	VARCHAR	11044	11044	2
 
-query IIIIIIIIII rowsort
-FROM reject_errors order by all;
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-3	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-3	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
 statement ok
 DROP TABLE reject_errors;
diff --git a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
index e6459aa5cd77..ba48d9fe2a99 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_flush_cast.test
@@ -17,9 +17,9 @@ SELECT typeof(first(a)), typeof(first(b)), COUNT(*) FROM read_csv(
 ----
 DATE	VARCHAR	2811
 
-query IIIIIIIIII
-FROM reject_errors order by all;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-3	0	439	6997	NULL	1	a	CAST	B, bla	Error when converting column "a". Could not parse string "B" according to format specifier "%d-%m-%Y"
-3	0	2813	44972	NULL	1	a	CAST	c, bla	Error when converting column "a". Could not parse string "c" according to format specifier "%d-%m-%Y"
+0	439	6997	NULL	1	a	CAST	B, bla	Error when converting column "a". Could not parse string "B" according to format specifier "%d-%m-%Y"
+0	2813	44972	NULL	1	a	CAST	c, bla	Error when converting column "a". Could not parse string "c" according to format specifier "%d-%m-%Y"
 
diff --git a/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
index 99cedc820614..21ab80aacad5 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_maximum_line.test
@@ -13,10 +13,10 @@ SELECT * FROM read_csv(
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
     store_rejects=true, auto_detect=false, header = 1, max_line_size=10);
 
-query IIIIIIIIII
-FROM reject_errors order by all;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-3	0	5	23	23	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
+0	5	23	23	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
 
 statement ok
 DROP TABLE reject_errors;
@@ -54,12 +54,12 @@ SELECT * FROM read_csv(
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
     store_rejects = true, auto_detect=false, header = 1, max_line_size=20);
 
-query IIIIIIIIII
-FROM reject_errors order by all;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-27	0	2282	13685	13685	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
-27	0	2591	15558	15558	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
-27	0	2923	17569	17569	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
+0	2282	13685	13685	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
+0	2591	15558	15558	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
+0	2923	17569	17569	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 20 bytes exceeded. Actual Size:25 bytes.
 
 statement ok
 DROP TABLE reject_errors;
@@ -74,13 +74,13 @@ SELECT * FROM read_csv(
     columns = {'a': 'VARCHAR', 'b': 'INTEGER'},
      store_rejects = true, auto_detect=false, header = 1, max_line_size=10);
 
-query IIIIIIIIII
-FROM reject_errors order by all;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-31	0	5	23	23	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
-31	1	2282	13685	13685	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
-31	1	2591	15558	15558	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
-31	1	2923	17569	17569	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
+0	5	23	23	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaa,4	Maximum line size of 10 bytes exceeded. Actual Size:19 bytes.
+1	2282	13685	13685	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
+1	2591	15558	15558	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,1	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
+1	2923	17569	17569	NULL	NULL	LINE SIZE OVER MAXIMUM	blaaaaaaaaaaaaaaaaaaaa,3	Maximum line size of 10 bytes exceeded. Actual Size:25 bytes.
 
 statement ok
 DROP TABLE reject_errors;
diff --git a/test/sql/copy/csv/rejects/csv_rejects_read.test b/test/sql/copy/csv/rejects/csv_rejects_read.test
index b537833fd7dd..ba090366ac6e 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_read.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_read.test
@@ -15,10 +15,10 @@ SELECT * FROM read_csv(
 1	2	AAA
 6	7	CCC
 
-query IIIIIIIIII
-FROM reject_errors;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors;
 ----
-3	0	2	10	12	2	col1	CAST	4,BBB,9,	Error when converting column "col1". Could not convert string "BBB" to 'INTEGER'
+0	2	10	12	2	col1	CAST	4,BBB,9,	Error when converting column "col1". Could not convert string "BBB" to 'INTEGER'
 
 statement ok
 DROP TABLE reject_errors;
@@ -35,12 +35,12 @@ SELECT * FROM read_csv(
 ----
 4	5	9
 
-query IIIIIIIIII
-FROM reject_errors ORDER BY ALL;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
 ----
-7	0	1	1	5	3	col2	CAST	1,2,DDD,	Error when converting column "col2". Could not convert string "DDD" to 'INTEGER'
-7	0	3	17	17	1	col0	CAST	EEE,7,FFF,	Error when converting column "col0". Could not convert string "EEE" to 'INTEGER'
-7	0	3	17	23	3	col2	CAST	EEE,7,FFF,	Error when converting column "col2". Could not convert string "FFF" to 'INTEGER'
+0	1	1	5	3	col2	CAST	1,2,DDD,	Error when converting column "col2". Could not convert string "DDD" to 'INTEGER'
+0	3	17	17	1	col0	CAST	EEE,7,FFF,	Error when converting column "col0". Could not convert string "EEE" to 'INTEGER'
+0	3	17	23	3	col2	CAST	EEE,7,FFF,	Error when converting column "col2". Could not convert string "FFF" to 'INTEGER'
 
 statement ok
 DROP TABLE reject_errors;
@@ -60,11 +60,11 @@ SELECT * FROM read_csv(
 4	5	9
 6	7	CCC
 
-query IIIIIIIIII
-FROM reject_errors ORDER BY ALL;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
 ----
-11	0	2	10	12	2	col1	CAST	4,BBB,9,	Error when converting column "col1". Could not convert string "BBB" to 'INTEGER'
-11	1	3	17	17	1	col0	CAST	EEE,7,FFF,	Error when converting column "col0". Could not convert string "EEE" to 'INTEGER'
+0	2	10	12	2	col1	CAST	4,BBB,9,	Error when converting column "col1". Could not convert string "BBB" to 'INTEGER'
+1	3	17	17	1	col0	CAST	EEE,7,FFF,	Error when converting column "col0". Could not convert string "EEE" to 'INTEGER'
 
 statement ok
 DROP TABLE reject_errors;
@@ -106,11 +106,11 @@ SELECT SUM(num) FROM read_csv(
 ----
 4270
 
-query IIIIIIIIII
-FROM reject_errors ORDER BY ALL;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
 ----
-19	0	2176	10876	10876	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
-19	0	4176	20876	20876	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
+0	2176	10876	10876	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
+0	4176	20876	20876	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
 
 statement ok
 DROP TABLE reject_errors;
@@ -126,11 +126,11 @@ SELECT SUM(num) FROM read_csv(
 ----
 6774
 
-query IIIIIIIIII
-FROM reject_errors ORDER BY ALL;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
 ----
-23	0	3680	18396	18396	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
-23	0	5680	28396	28396	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
+0	3680	18396	18396	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
+0	5680	28396	28396	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
 
 
 statement ok
@@ -148,13 +148,13 @@ SELECT SUM(num) FROM read_csv(
 ----
 11044
 
-query IIIIIIIIII
-FROM reject_errors ORDER BY ALL;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
 ----
-27	0	2176	10876	10876	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
-27	0	4176	20876	20876	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
-27	1	3680	18396	18396	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
-27	1	5680	28396	28396	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
+0	2176	10876	10876	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
+0	4176	20876	20876	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
+1	3680	18396	18396	1	num	CAST	B, A	Error when converting column "num". Could not convert string "B" to 'INTEGER'
+1	5680	28396	28396	1	num	CAST	C, A	Error when converting column "num". Could not convert string "C" to 'INTEGER'
 
 statement ok
 DROP TABLE reject_errors;
@@ -178,20 +178,20 @@ ON L.num = R.num;
 1	A	1	A
 3	C	3	C
 
-query IIIIIIIIIIIII
-FROM reject_scans ORDER BY ALL;
+query IIIIIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_scans ORDER BY ALL;
 ----
-31	0	test/sql/copy/csv/data/error/mismatch/small1.csv	,	"	"	\n	0	true	{'num': 'INTEGER','str': 'VARCHAR'}	NULL	NULL	store_rejects=true
-31	1	test/sql/copy/csv/data/error/mismatch/small2.csv	,	"	"	\n	0	true	{'num': 'INTEGER','str': 'VARCHAR'}	NULL	NULL	store_rejects=true
+0	test/sql/copy/csv/data/error/mismatch/small1.csv	,	"	"	\n	0	true	{'num': 'INTEGER','str': 'VARCHAR'}	NULL	NULL	store_rejects=true
+1	test/sql/copy/csv/data/error/mismatch/small2.csv	,	"	"	\n	0	true	{'num': 'INTEGER','str': 'VARCHAR'}	NULL	NULL	store_rejects=true
 
 
-query IIIIIIIIII
-FROM reject_errors ORDER BY ALL;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
 ----
-31	0	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
-31	0	6	27	27	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
-31	1	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
-31	1	5	23	23	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+0	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+0	6	27	27	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+1	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+1	5	23	23	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
 
 statement ok
 DROP TABLE reject_errors;
@@ -217,12 +217,12 @@ ON L.num = R.num;
 3	C	3	C
 
 
-query IIIIIIIIII
-FROM reject_errors ORDER BY ALL;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
 ----
-36	0	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
-36	0	6	27	27	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
-36	1	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+0	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+0	6	27	27	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
+1	3	15	15	1	num	CAST	X,Y	Error when converting column "num". Could not convert string "X" to 'INTEGER'
 
 
 statement ok
diff --git a/test/sql/copy/csv/rejects/csv_rejects_two_tables.test b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
index 70fef75c473f..f856d929fa2e 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
@@ -16,19 +16,19 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
 BIGINT	VARCHAR	11044	11044	2
 
 
-query IIIIIIIIIIIII
-FROM reject_scans order by all;
+query IIIIIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_scans order by all;
 ----
-3	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	sample_size=1, store_rejects=true
-3	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	sample_size=1, store_rejects=true
+0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	sample_size=1, store_rejects=true
+1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	sample_size=1, store_rejects=true
 
-query IIIIIIIIII
-FROM reject_errors order by all;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-3	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-3	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-3	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
 # Test giving the name of errors table
 statement error
@@ -51,19 +51,19 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
 ----
 BIGINT	VARCHAR	11044	11044	2
 
-query IIIIIIIIIIIII
-FROM reject_scans order by all;
+query IIIIIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_scans order by all;
 ----
-8	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_2', sample_size=1
-8	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_2', sample_size=1
+0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_2', sample_size=1
+1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	false	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_table='rejects_errors_2', sample_size=1
 
-query IIIIIIIIII
-FROM rejects_errors_2 order by all;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM rejects_errors_2 order by all;
 ----
-8	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-8	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-8	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-8	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
 statement ok
 drop table reject_errors;
@@ -77,19 +77,19 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
 ----
 BIGINT	VARCHAR	11044	11044	2
 
-query IIIIIIIIIIIII
-FROM rejects_scan_2 order by all;
+query IIIIIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM rejects_scan_2 order by all;
 ----
-12	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_2', sample_size=1
-12	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_2', sample_size=1
+0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_2', sample_size=1
+1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_2', sample_size=1
 
-query IIIIIIIIII
-FROM reject_errors order by all;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-12	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-12	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-12	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-12	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
 # Test giving the name of both tables
 query IIIII
@@ -102,20 +102,20 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
 ----
 BIGINT	VARCHAR	11044	11044	2
 
-query IIIIIIIIIIIII
-SELECT *
+query IIIIIIIIIIII
+SELECT * EXCLUDE (scan_id)
 FROM rejects_scan_3 order by all;
 ----
-15	0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_3', rejects_table='rejects_errors_3', sample_size=1
-15	1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_3', rejects_table='rejects_errors_3', sample_size=1
+0	test/sql/copy/csv/data/error/mismatch/big_bad.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_3', rejects_table='rejects_errors_3', sample_size=1
+1	test/sql/copy/csv/data/error/mismatch/big_bad2.csv	,	\0	\0	\n	0	0	{'column0': 'BIGINT','column1': 'VARCHAR'}	NULL	NULL	rejects_scan='rejects_scan_3', rejects_table='rejects_errors_3', sample_size=1
 
-query IIIIIIIIII
-FROM rejects_errors_3 order by all;
+query IIIIIIIII
+SELECT * EXCLUDE (scan_id) FROM rejects_errors_3 order by all;
 ----
-15	0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-15	0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
-15	1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
-15	1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+0	2176	10876	10876	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+0	4176	20876	20876	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
+1	3680	18396	18396	1	column0	CAST	B, A	Error when converting column "column0". Could not convert string "B" to 'BIGINT'
+1	5680	28396	28396	1	column0	CAST	C, A	Error when converting column "column0". Could not convert string "C" to 'BIGINT'
 
 statement ok
 drop table reject_errors;
diff --git a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
index e579648f8794..f6b9840b2130 100644
--- a/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
+++ b/test/sql/copy/csv/rejects/test_invalid_utf_rejects.test
@@ -11,10 +11,10 @@ statement ok
 from read_csv('test/sql/copy/csv/data/test/invalid_utf_big.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'},
  auto_detect=false, header = 0, delim = ',', store_rejects=true)
 
-query IIIIIIIIII rowsort
-FROM reject_errors ORDER BY ALL;
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
 ----
-3	0	3001	54001	54007	2	col2	INVALID UNICODE	valid,invalid_??_part,valid	Invalid unicode (byte sequence mismatch) detected.
-3	0	3012	54209	54221	3	col3	INVALID UNICODE	valid,valid,invalid_??_part	Invalid unicode (byte sequence mismatch) detected.
-3	0	3023	54417	54423	2	col2	INVALID UNICODE	valid,invalid_??_part,valid	Invalid unicode (byte sequence mismatch) detected.
-3	0	3034	54625	54637	3	col3	INVALID UNICODE	valid,valid,invalid_??_part	Invalid unicode (byte sequence mismatch) detected.
\ No newline at end of file
+0	3001	54001	54007	2	col2	INVALID UNICODE	valid,invalid_??_part,valid	Invalid unicode (byte sequence mismatch) detected.
+0	3012	54209	54221	3	col3	INVALID UNICODE	valid,valid,invalid_??_part	Invalid unicode (byte sequence mismatch) detected.
+0	3023	54417	54423	2	col2	INVALID UNICODE	valid,invalid_??_part,valid	Invalid unicode (byte sequence mismatch) detected.
+0	3034	54625	54637	3	col3	INVALID UNICODE	valid,valid,invalid_??_part	Invalid unicode (byte sequence mismatch) detected.
\ No newline at end of file
diff --git a/test/sql/copy/csv/rejects/test_mixed.test b/test/sql/copy/csv/rejects/test_mixed.test
index d1f9b1decedc..54ef879556ab 100644
--- a/test/sql/copy/csv/rejects/test_mixed.test
+++ b/test/sql/copy/csv/rejects/test_mixed.test
@@ -55,12 +55,12 @@ SELECT * FROM read_csv(
 1	2	pedro
 1	2	pedro
 
-query IIIIIIIIII rowsort
-FROM reject_errors ORDER BY ALL;
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors ORDER BY ALL;
 ----
-3	0	10	103	106	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 3 Found: 2
-3	0	14	143	154	4	NULL	TOO MANY COLUMNS	1,2,"pedro",5	Expected Number of Columns: 3 Found: 4
-3	0	19	205	207	2	b	CAST	1,bla,"pedro"	Error when converting column "b". Could not convert string "bla" to 'INTEGER'
-3	0	22	243	247	3	c	UNQUOTED VALUE	1,2,"pedro"bla	Value with unterminated quote found.
-3	0	32	366	366	NULL	NULL	LINE SIZE OVER MAXIMUM	1,2,"pedro thiago timbo holanda"	Maximum line size of 20 bytes exceeded. Actual Size:33 bytes.
-3	0	38	459	463	3	c	INVALID UNICODE	1,2,"pedro??"	Invalid unicode (byte sequence mismatch) detected.
\ No newline at end of file
+0	10	103	106	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 3 Found: 2
+0	14	143	154	4	NULL	TOO MANY COLUMNS	1,2,"pedro",5	Expected Number of Columns: 3 Found: 4
+0	19	205	207	2	b	CAST	1,bla,"pedro"	Error when converting column "b". Could not convert string "bla" to 'INTEGER'
+0	22	243	247	3	c	UNQUOTED VALUE	1,2,"pedro"bla	Value with unterminated quote found.
+0	32	366	366	NULL	NULL	LINE SIZE OVER MAXIMUM	1,2,"pedro thiago timbo holanda"	Maximum line size of 20 bytes exceeded. Actual Size:33 bytes.
+0	38	459	463	3	c	INVALID UNICODE	1,2,"pedro??"	Invalid unicode (byte sequence mismatch) detected.
\ No newline at end of file

From 8e5ebeffbafcbb947ee973c96368f726bd21304b Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Fri, 5 Apr 2024 11:27:58 +0200
Subject: [PATCH 081/147] Letss see if our CI can take this

---
 test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
index 07b99f83c601..e53ef286a495 100644
--- a/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
+++ b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
@@ -13,9 +13,6 @@ copy lineitem to '__TEST_DIR__/lineitem.csv.gz';
 statement ok
 SET temp_directory=''
 
-# load the DB from disk (Avoids OOM when generating ze table)
-load __TEST_DIR__/lineitem_compressed.db
-
 statement ok
 CREATE TABLE lineitem_2(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity DECIMAL(15,2) NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
 

From fbdf2f86b5464871f8431221bff3efa47fd55e06 Mon Sep 17 00:00:00 2001
From: Max Gabrielsson <max@gabrielsson.com>
Date: Mon, 8 Apr 2024 10:16:15 +0200
Subject: [PATCH 082/147] initializes unknown indexes on catalog lookup

---
 .../catalog_entry/duck_schema_entry.cpp       | 20 +++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/catalog/catalog_entry/duck_schema_entry.cpp b/src/catalog/catalog_entry/duck_schema_entry.cpp
index 105270d49b41..cbd78d374847 100644
--- a/src/catalog/catalog_entry/duck_schema_entry.cpp
+++ b/src/catalog/catalog_entry/duck_schema_entry.cpp
@@ -36,8 +36,8 @@
 
 namespace duckdb {
 
-void FindForeignKeyInformation(CatalogEntry &entry, AlterForeignKeyType alter_fk_type,
-                               vector<unique_ptr<AlterForeignKeyInfo>> &fk_arrays) {
+static void FindForeignKeyInformation(CatalogEntry &entry, AlterForeignKeyType alter_fk_type,
+                                      vector<unique_ptr<AlterForeignKeyInfo>> &fk_arrays) {
 	if (entry.type != CatalogType::TABLE_ENTRY) {
 		return;
 	}
@@ -63,6 +63,19 @@ void FindForeignKeyInformation(CatalogEntry &entry, AlterForeignKeyType alter_fk
 	}
 }
 
+static void LazyLoadIndexes(ClientContext &context, CatalogEntry &entry) {
+	if (entry.type == CatalogType::TABLE_ENTRY) {
+		auto &table_entry = entry.Cast<TableCatalogEntry>();
+		table_entry.GetStorage().info->InitializeIndexes(context);
+	} else if (entry.type == CatalogType::INDEX_ENTRY) {
+		auto &index_entry = entry.Cast<IndexCatalogEntry>();
+		auto &table_entry = Catalog::GetEntry(context, CatalogType::TABLE_ENTRY, index_entry.catalog.GetName(),
+		                                      index_entry.GetSchemaName(), index_entry.GetTableName())
+		                        .Cast<TableCatalogEntry>();
+		table_entry.GetStorage().info->InitializeIndexes(context);
+	}
+}
+
 DuckSchemaEntry::DuckSchemaEntry(Catalog &catalog, CreateSchemaInfo &info)
     : SchemaCatalogEntry(catalog, info), tables(catalog, make_uniq<DefaultViewGenerator>(catalog, *this)),
       indexes(catalog), table_functions(catalog), copy_functions(catalog), pragma_functions(catalog),
@@ -287,6 +300,9 @@ void DuckSchemaEntry::DropEntry(ClientContext &context, DropInfo &info) {
 		                       CatalogTypeToString(existing_entry->type), CatalogTypeToString(info.type));
 	}
 
+	// if this is a index or table with indexes, initialize any unknown index instances
+	LazyLoadIndexes(context, *existing_entry);
+
 	// if there is a foreign key constraint, get that information
 	vector<unique_ptr<AlterForeignKeyInfo>> fk_arrays;
 	FindForeignKeyInformation(*existing_entry, AlterForeignKeyType::AFT_DELETE, fk_arrays);

From 57d12c7803fe2b81de8bc804f4bc38f16d54d2bd Mon Sep 17 00:00:00 2001
From: Max Gabrielsson <max@gabrielsson.com>
Date: Mon, 8 Apr 2024 10:35:10 +0200
Subject: [PATCH 083/147] remove other initialization

---
 src/function/table/table_scan.cpp | 3 ---
 src/storage/data_table.cpp        | 4 ----
 src/storage/local_storage.cpp     | 4 ----
 3 files changed, 11 deletions(-)

diff --git a/src/function/table/table_scan.cpp b/src/function/table/table_scan.cpp
index 097642361756..26a9843775e7 100644
--- a/src/function/table/table_scan.cpp
+++ b/src/function/table/table_scan.cpp
@@ -306,9 +306,6 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun
 		return;
 	}
 
-	// Lazily initialize any unknown indexes that might have been loaded by an extension
-	storage.info->InitializeIndexes(context);
-
 	// behold
 	storage.info->indexes.Scan([&](Index &index) {
 		// first rewrite the index expression so the ColumnBindings align with the column bindings of the current table
diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp
index ac55613c9295..0a3e0ed034f3 100644
--- a/src/storage/data_table.cpp
+++ b/src/storage/data_table.cpp
@@ -89,8 +89,6 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
 		column_definitions.emplace_back(column_def.Copy());
 	}
 
-	// try to initialize unknown indexes
-	info->InitializeIndexes(context);
 	// first check if there are any indexes that exist that point to the removed column
 	info->indexes.Scan([&](Index &index) {
 		for (auto &column_id : index.column_ids) {
@@ -155,8 +153,6 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id
 	for (auto &column_def : parent.column_definitions) {
 		column_definitions.emplace_back(column_def.Copy());
 	}
-	// try to initialize unknown indexes
-	info->InitializeIndexes(context);
 
 	// first check if there are any indexes that exist that point to the changed column
 	info->indexes.Scan([&](Index &index) {
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index 2c7fb0fe1d79..791847b102eb 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -470,9 +470,6 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
 		storage.AppendToIndexes(transaction, append_state, append_count, true);
 	}
 
-	// try to initialize any unknown indexes
-	table.info->InitializeIndexes(context);
-
 	// possibly vacuum any excess index data
 	table.info->indexes.Scan([&](Index &index) {
 		index.Vacuum();
@@ -575,7 +572,6 @@ TableIndexList &LocalStorage::GetIndexes(DataTable &table) {
 	if (!storage) {
 		throw InternalException("LocalStorage::GetIndexes - local storage not found");
 	}
-	table.info->InitializeIndexes(context);
 	return storage->indexes;
 }
 

From 3b561cb8a6821a4bc0797b4b9253ec248437e90e Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 8 Apr 2024 10:59:22 +0200
Subject: [PATCH 084/147] Remove extra catalog function

---
 src/catalog/catalog.cpp                               | 11 -----------
 .../operator/persistent/csv_rejects_table.cpp         |  4 ++--
 src/include/duckdb/catalog/catalog.hpp                |  2 --
 3 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp
index 7e865661f862..775af179588d 100644
--- a/src/catalog/catalog.cpp
+++ b/src/catalog/catalog.cpp
@@ -773,17 +773,6 @@ CatalogEntry &Catalog::GetEntry(ClientContext &context, const string &schema, co
 	throw CatalogException("CatalogElement \"%s.%s\" does not exist!", schema, name);
 }
 
-bool Catalog::EntryExists(ClientContext &context, const string &schema, const string &name) {
-	vector<CatalogType> entry_types {CatalogType::TABLE_ENTRY, CatalogType::SEQUENCE_ENTRY};
-	for (auto entry_type : entry_types) {
-		auto result = GetEntry(context, entry_type, schema, name, OnEntryNotFound::RETURN_NULL);
-		if (result) {
-			return true;
-		}
-	}
-	return false;
-}
-
 optional_ptr<CatalogEntry> Catalog::GetEntry(ClientContext &context, CatalogType type, const string &schema_name,
                                              const string &name, OnEntryNotFound if_not_found,
                                              QueryErrorContext error_context) {
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 31f63d0279b8..4d3248bf3944 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -25,8 +25,8 @@ shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context,
 	    "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(rejects_scan) + "_" + StringUtil::Upper(rejects_error);
 	auto &cache = ObjectCache::GetObjectCache(context);
 	auto &catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
-	bool rejects_scan_exist = catalog.EntryExists(context, DEFAULT_SCHEMA, rejects_scan);
-	bool rejects_error_exist = catalog.EntryExists(context, DEFAULT_SCHEMA, rejects_error);
+	auto rejects_scan_exist = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, DEFAULT_SCHEMA, rejects_scan, OnEntryNotFound::RETURN_NULL) != nullptr;
+	auto rejects_error_exist = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, DEFAULT_SCHEMA, rejects_error, OnEntryNotFound::RETURN_NULL) != nullptr;
 	if ((rejects_scan_exist || rejects_error_exist) && !cache.Get<CSVRejectsTable>(key)) {
 		std::ostringstream error;
 		if (rejects_scan_exist) {
diff --git a/src/include/duckdb/catalog/catalog.hpp b/src/include/duckdb/catalog/catalog.hpp
index 654fe4a3569c..244a5f362037 100644
--- a/src/include/duckdb/catalog/catalog.hpp
+++ b/src/include/duckdb/catalog/catalog.hpp
@@ -232,8 +232,6 @@ class Catalog {
 	//! Gets the "schema.name" entry without a specified type, if entry does not exist an exception is thrown
 	DUCKDB_API CatalogEntry &GetEntry(ClientContext &context, const string &schema, const string &name);
 
-	//! Returns true if the "schema.name" entry without a specified type exists
-	DUCKDB_API bool EntryExists(ClientContext &context, const string &schema, const string &name);
 
 	//! Fetches a logical type from the catalog
 	DUCKDB_API LogicalType GetType(ClientContext &context, const string &schema, const string &names,

From df3d7ff3d0c8cba66f14bc67f15dd383b081d811 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 8 Apr 2024 11:24:29 +0200
Subject: [PATCH 085/147] more pr requests, adding tests and assertions

---
 .../operator/persistent/csv_rejects_table.cpp | 12 +++++++--
 src/include/duckdb/catalog/catalog.hpp        |  1 -
 .../csv/rejects/csv_rejects_two_tables.test   | 26 ++++++++++++++++---
 third_party/utf8proc/utf8proc_wrapper.cpp     |  4 ++-
 4 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 4d3248bf3944..1cd98d85fe81 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -21,12 +21,19 @@ TableCatalogEntry &CSVRejectsTable::GetScansTable(ClientContext &context) {
 
 shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &rejects_scan,
                                                          const string &rejects_error) {
+	// Check that these names can't be the same
+	if (rejects_scan == rejects_error) {
+		throw BinderException("The names of the rejects scan and rejects error tables can't be the same. Use different "
+		                      "names for these tables.");
+	}
 	auto key =
 	    "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(rejects_scan) + "_" + StringUtil::Upper(rejects_error);
 	auto &cache = ObjectCache::GetObjectCache(context);
 	auto &catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
-	auto rejects_scan_exist = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, DEFAULT_SCHEMA, rejects_scan, OnEntryNotFound::RETURN_NULL) != nullptr;
-	auto rejects_error_exist = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, DEFAULT_SCHEMA, rejects_error, OnEntryNotFound::RETURN_NULL) != nullptr;
+	auto rejects_scan_exist = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, DEFAULT_SCHEMA, rejects_scan,
+	                                           OnEntryNotFound::RETURN_NULL) != nullptr;
+	auto rejects_error_exist = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, DEFAULT_SCHEMA, rejects_error,
+	                                            OnEntryNotFound::RETURN_NULL) != nullptr;
 	if ((rejects_scan_exist || rejects_error_exist) && !cache.Get<CSVRejectsTable>(key)) {
 		std::ostringstream error;
 		if (rejects_scan_exist) {
@@ -38,6 +45,7 @@ shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context,
 		error << "Either drop the used name(s), or give other name options in the CSV Reader function.\n";
 		throw BinderException(error.str());
 	}
+
 	return cache.GetOrCreate<CSVRejectsTable>(key, rejects_scan, rejects_error);
 }
 
diff --git a/src/include/duckdb/catalog/catalog.hpp b/src/include/duckdb/catalog/catalog.hpp
index 244a5f362037..871738a975de 100644
--- a/src/include/duckdb/catalog/catalog.hpp
+++ b/src/include/duckdb/catalog/catalog.hpp
@@ -232,7 +232,6 @@ class Catalog {
 	//! Gets the "schema.name" entry without a specified type, if entry does not exist an exception is thrown
 	DUCKDB_API CatalogEntry &GetEntry(ClientContext &context, const string &schema, const string &name);
 
-
 	//! Fetches a logical type from the catalog
 	DUCKDB_API LogicalType GetType(ClientContext &context, const string &schema, const string &names,
 	                               OnEntryNotFound if_not_found);
diff --git a/test/sql/copy/csv/rejects/csv_rejects_two_tables.test b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
index f856d929fa2e..902031695f9f 100644
--- a/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
+++ b/test/sql/copy/csv/rejects/csv_rejects_two_tables.test
@@ -156,8 +156,7 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
     rejects_scan = 't'
     );
 ----
-Reject Scan Table name "t" is already in use. Reject Error Table name "t" is already in use. Either drop the used name(s), or give other name options in the CSV Reader function.
-
+The names of the rejects scan and rejects error tables can't be the same. Use different names for these tables.
 
 # Test giving the name of the tables with store_rejects and/or ignore_errors set to false throws
 statement error
@@ -230,4 +229,25 @@ SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), M
      store_rejects = false
     );
 ----
-REJECTS_SCAN option is only supported when store_rejects is not manually set to false
\ No newline at end of file
+REJECTS_SCAN option is only supported when store_rejects is not manually set to false
+
+# Add a test where both tables have the same name (This should fail, because they both have the same name)
+statement error
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_scan = 'same_name_because_why_not',
+    rejects_table = 'same_name_because_why_not',
+    store_rejects = true
+    );
+----
+The names of the rejects scan and rejects error tables can't be the same. Use different names for these tables.
+
+# This hopefully doesn't fail because the names don't get registered if they fail.
+statement ok
+SELECT typeof(first(column0)), typeof(first(column1)), COUNT(*), SUM(column0), MAX(len(column1)) FROM read_csv_auto(
+    'test/sql/copy/csv/data/error/mismatch/big_bad*.csv',
+    sample_size=1,
+    rejects_scan = 'same_name_because_why_not',
+    rejects_table = 'same_name_because_why_not_2',
+    store_rejects = true);
diff --git a/third_party/utf8proc/utf8proc_wrapper.cpp b/third_party/utf8proc/utf8proc_wrapper.cpp
index 02f6c0efc5de..9ff3615e8a89 100644
--- a/third_party/utf8proc/utf8proc_wrapper.cpp
+++ b/third_party/utf8proc/utf8proc_wrapper.cpp
@@ -1,6 +1,6 @@
 #include "utf8proc_wrapper.hpp"
 #include "utf8proc.hpp"
-
+#include "duckdb/common/assert.hpp"
 using namespace std;
 
 namespace duckdb {
@@ -103,6 +103,7 @@ UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *i
 }
 
 void Utf8Proc::MakeValid(char *s, size_t len, char special_flag){
+	D_ASSERT(special_flag <=127);
 	UnicodeType type = UnicodeType::ASCII;
 	for (size_t i = 0; i < len; i++) {
 		int c = (int) s[i];
@@ -133,6 +134,7 @@ void Utf8Proc::MakeValid(char *s, size_t len, char special_flag){
 			type = UnicodeType::ASCII;
 		}
 	}
+	D_ASSERT(Utf8Proc::IsValid(s,len));
 }
 
 char* Utf8Proc::Normalize(const char *s, size_t len) {

From 7e078316e8aba6465ec2e7382ea9ce428a28aa9e Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 8 Apr 2024 12:42:09 +0200
Subject: [PATCH 086/147] Also print error_line when throwing

---
 src/execution/operator/csv_scanner/util/csv_error.cpp | 4 ++++
 test/sql/copy/csv/csv_error_message.test              | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/src/execution/operator/csv_scanner/util/csv_error.cpp b/src/execution/operator/csv_scanner/util/csv_error.cpp
index da9f5d5e2435..d22738f21415 100644
--- a/src/execution/operator/csv_scanner/util/csv_error.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_error.cpp
@@ -19,12 +19,16 @@ void CSVErrorHandler::ThrowError(CSVError csv_error) {
 	std::ostringstream error;
 	if (PrintLineNumber(csv_error)) {
 		error << "CSV Error on Line: " << GetLine(csv_error.error_info) << '\n';
+		if (!csv_error.csv_row.empty()) {
+			error << "Original Line: " << csv_error.csv_row << '\n';
+		}
 	}
 	if (csv_error.full_error_message.empty()) {
 		error << csv_error.error_message;
 	} else {
 		error << csv_error.full_error_message;
 	}
+
 	switch (csv_error.type) {
 	case CSVErrorType::CAST_ERROR:
 		throw ConversionException(error.str());
diff --git a/test/sql/copy/csv/csv_error_message.test b/test/sql/copy/csv/csv_error_message.test
index b41d049fe65d..b51e98dd87e9 100644
--- a/test/sql/copy/csv/csv_error_message.test
+++ b/test/sql/copy/csv/csv_error_message.test
@@ -26,3 +26,7 @@ SELECT * FROM read_csv('__TEST_DIR__/int_parse_error.csv', columns={'i': 'INT'},
 ----
 Line: 104
 
+statement error
+SELECT * FROM read_csv('__TEST_DIR__/int_parse_error.csv', columns={'i': 'INT'}, header=False, auto_detect=false)
+----
+Original Line: hello
\ No newline at end of file

From 2f7be72713dfaf306a62abf3d7d9374149bf2bb1 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 8 Apr 2024 12:53:21 +0200
Subject: [PATCH 087/147] Current File Index

---
 .../csv_scanner/table_function/global_csv_state.cpp      | 2 +-
 src/execution/operator/persistent/csv_rejects_table.cpp  | 8 ++++++++
 .../execution/operator/persistent/csv_rejects_table.hpp  | 9 ++++++++-
 src/include/duckdb/transaction/transaction_context.hpp   | 2 --
 src/transaction/transaction_context.cpp                  | 6 ------
 5 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index b156f0af8b95..1759450ef41a 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -246,7 +246,7 @@ void CSVGlobalState::FillRejectsTable() {
 		InternalAppender scans_appender(context, scans_table);
 		idx_t scan_idx = context.transaction.GetActiveQuery();
 		for (auto &file : file_scans) {
-			idx_t file_idx = context.transaction.GetIncrementalIndex();
+			idx_t file_idx = rejects->GetCurrentFileIndex(scan_idx);
 			auto file_name = file->file_path;
 			auto &errors = file->error_handler->errors;
 			// We first insert the file into the file scans table
diff --git a/src/execution/operator/persistent/csv_rejects_table.cpp b/src/execution/operator/persistent/csv_rejects_table.cpp
index 1cd98d85fe81..11e8c1b0edf0 100644
--- a/src/execution/operator/persistent/csv_rejects_table.cpp
+++ b/src/execution/operator/persistent/csv_rejects_table.cpp
@@ -19,6 +19,14 @@ TableCatalogEntry &CSVRejectsTable::GetScansTable(ClientContext &context) {
 	return table_entry;
 }
 
+idx_t CSVRejectsTable::GetCurrentFileIndex(idx_t query_id) {
+	if (current_query_id != query_id) {
+		current_query_id = query_id;
+		current_file_idx = 0;
+	}
+	return current_file_idx++;
+}
+
 shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &rejects_scan,
                                                          const string &rejects_error) {
 	// Check that these names can't be the same
diff --git a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
index 88dd86377dc7..d1075a4fbffe 100644
--- a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
+++ b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
@@ -32,7 +32,8 @@ class CSVRejectsTable : public ObjectCacheEntry {
 	TableCatalogEntry &GetErrorsTable(ClientContext &context);
 	TableCatalogEntry &GetScansTable(ClientContext &context);
 
-public:
+	idx_t GetCurrentFileIndex(idx_t query_id);
+
 	static string ObjectType() {
 		return "csv_rejects_table_cache";
 	}
@@ -40,6 +41,12 @@ class CSVRejectsTable : public ObjectCacheEntry {
 	string GetObjectType() override {
 		return ObjectType();
 	}
+
+private:
+	//! Current File Index being used in the query
+	idx_t current_file_idx = 0;
+	//! Current Query ID being executed
+	idx_t current_query_id = 0;
 };
 
 } // namespace duckdb
diff --git a/src/include/duckdb/transaction/transaction_context.hpp b/src/include/duckdb/transaction/transaction_context.hpp
index b265c0131498..b0a50103bb46 100644
--- a/src/include/duckdb/transaction/transaction_context.hpp
+++ b/src/include/duckdb/transaction/transaction_context.hpp
@@ -48,7 +48,6 @@ class TransactionContext {
 	}
 
 	idx_t GetActiveQuery();
-	idx_t GetIncrementalIndex();
 	void ResetActiveQuery();
 	void SetActiveQuery(transaction_t query_number);
 
@@ -57,7 +56,6 @@ class TransactionContext {
 	bool auto_commit;
 
 	unique_ptr<MetaTransaction> current_transaction;
-	idx_t incremental_index = 0;
 
 	TransactionContext(const TransactionContext &) = delete;
 };
diff --git a/src/transaction/transaction_context.cpp b/src/transaction/transaction_context.cpp
index 82d1fa43094f..7185a263894b 100644
--- a/src/transaction/transaction_context.cpp
+++ b/src/transaction/transaction_context.cpp
@@ -89,19 +89,13 @@ idx_t TransactionContext::GetActiveQuery() {
 	return current_transaction->GetActiveQuery();
 }
 
-idx_t TransactionContext::GetIncrementalIndex() {
-	return incremental_index++;
-}
-
 void TransactionContext::ResetActiveQuery() {
-	incremental_index = 0;
 	if (current_transaction) {
 		SetActiveQuery(MAXIMUM_QUERY_ID);
 	}
 }
 
 void TransactionContext::SetActiveQuery(transaction_t query_number) {
-	incremental_index = 0;
 	if (!current_transaction) {
 		throw InternalException("SetActiveQuery called without active transaction");
 	}

From 88454e4ce7511ed656da38406096002a36c360f1 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 8 Apr 2024 14:57:15 +0200
Subject: [PATCH 088/147] Maybe this is fine for the serializer?

---
 .../duckdb/storage/serialization/nodes.json   | 49 +++++++++-------
 src/storage/serialization/serialize_nodes.cpp | 58 ++++++++++---------
 2 files changed, 59 insertions(+), 48 deletions(-)

diff --git a/src/include/duckdb/storage/serialization/nodes.json b/src/include/duckdb/storage/serialization/nodes.json
index acf730100b86..034db172d9db 100644
--- a/src/include/duckdb/storage/serialization/nodes.json
+++ b/src/include/duckdb/storage/serialization/nodes.json
@@ -537,7 +537,8 @@
     "members": [
       {"id": 100,
         "name": "ignore_errors",
-        "type": "CSVOption<bool>"
+        "type": "CSVOption<bool>",
+        "default": "false"
       },
       {"id": 101,
         "name": "buffer_sample_size",
@@ -604,64 +605,72 @@
         "type": "vector<bool>"
       },
       {"id": 117,
-        "name": "store_rejects",
-        "type": "CSVOption<bool>"
+        "name": "rejects_table_name",
+        "type": "CSVOption<string>",
+        "default": "{\"reject_errors\"}"
       },
       {"id": 118,
         "name": "rejects_limit",
         "type": "idx_t"
       },
       {"id": 119,
+        "name": "rejects_recovery_columns",
+        "type": "vector<string>",
+        "deleted": true
+      },
+      {"id": 120,
+        "name": "rejects_recovery_column_ids",
+        "type": "vector<idx_t>",
+        "deleted": true
+      },
+      {"id": 121,
         "name": "dialect_options.state_machine_options.delimiter",
         "type": "CSVOption<char>"
       },
-      {"id": 120,
+      {"id": 122,
         "name": "dialect_options.state_machine_options.quote",
         "type": "CSVOption<char>"
       },
-      {"id": 121,
+      {"id": 123,
         "name": "dialect_options.state_machine_options.escape",
         "type": "CSVOption<char>"
       },
-      {"id": 122,
+      {"id": 124,
         "name": "dialect_options.header",
         "type": "CSVOption<bool>"
       },
-      {"id": 123,
+      {"id": 125,
         "name": "dialect_options.num_cols",
         "type": "idx_t"
       },
-      {"id": 124,
+      {"id": 126,
         "name": "dialect_options.state_machine_options.new_line",
         "type": "CSVOption<NewLineIdentifier>"
       },
-      {"id": 125,
+      {"id": 127,
         "name": "dialect_options.skip_rows",
         "type": "CSVOption<idx_t>"
       },
-      {"id": 126,
+      {"id": 128,
         "name": "dialect_options.date_format",
         "type": "map<LogicalTypeId, CSVOption<StrpTimeFormat>>"
       },
-      {"id": 127,
+      {"id": 129,
         "name": "sniffer_user_mismatch_error",
         "type": "string"
       },
-      {"id": 128,
+      {"id": 130,
         "name": "parallel",
         "type": "bool"
       },
-      {"id": 129,
-        "name": "rejects_table_name",
-        "type": "CSVOption<string>"
-      },
-      {"id": 130,
-        "name": "rejects_scan_name",
-        "type": "CSVOption<string>"
-      },
       {"id": 131,
         "name": "was_type_manually_set",
         "type": "vector<bool>"
+      },
+      {"id": 132,
+        "name": "rejects_scan_name",
+        "type": "CSVOption<string>",
+        "default": "{\"reject_scans\"}"
       }
     ],
     "pointer_type": "none"
diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp
index 2359d127ef24..a0d459a938f0 100644
--- a/src/storage/serialization/serialize_nodes.cpp
+++ b/src/storage/serialization/serialize_nodes.cpp
@@ -118,7 +118,7 @@ CSVOption<T> CSVOption<T>::Deserialize(Deserializer &deserializer) {
 }
 
 void CSVReaderOptions::Serialize(Serializer &serializer) const {
-	serializer.WriteProperty<CSVOption<bool>>(100, "ignore_errors", ignore_errors);
+	serializer.WritePropertyWithDefault<CSVOption<bool>>(100, "ignore_errors", ignore_errors, false);
 	serializer.WritePropertyWithDefault<idx_t>(101, "buffer_sample_size", buffer_sample_size);
 	serializer.WritePropertyWithDefault<string>(102, "null_str", null_str);
 	serializer.WriteProperty<FileCompressionType>(103, "compression", compression);
@@ -135,26 +135,27 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WritePropertyWithDefault<idx_t>(114, "buffer_size", buffer_size);
 	serializer.WriteProperty<MultiFileReaderOptions>(115, "file_options", file_options);
 	serializer.WritePropertyWithDefault<vector<bool>>(116, "force_quote", force_quote);
-	serializer.WriteProperty<CSVOption<bool>>(117, "store_rejects", store_rejects);
+	serializer.WritePropertyWithDefault<CSVOption<string>>(117, "rejects_table_name", rejects_table_name, {"reject_errors"});
 	serializer.WritePropertyWithDefault<idx_t>(118, "rejects_limit", rejects_limit);
-	serializer.WriteProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
-	serializer.WriteProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
-	serializer.WriteProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
-	serializer.WriteProperty<CSVOption<bool>>(122, "dialect_options.header", dialect_options.header);
-	serializer.WritePropertyWithDefault<idx_t>(123, "dialect_options.num_cols", dialect_options.num_cols);
-	serializer.WriteProperty<CSVOption<NewLineIdentifier>>(124, "dialect_options.state_machine_options.new_line", dialect_options.state_machine_options.new_line);
-	serializer.WriteProperty<CSVOption<idx_t>>(125, "dialect_options.skip_rows", dialect_options.skip_rows);
-	serializer.WriteProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", dialect_options.date_format);
-	serializer.WritePropertyWithDefault<string>(127, "sniffer_user_mismatch_error", sniffer_user_mismatch_error);
-	serializer.WritePropertyWithDefault<bool>(128, "parallel", parallel);
-	serializer.WriteProperty<CSVOption<string>>(129, "rejects_table_name", rejects_table_name);
-	serializer.WriteProperty<CSVOption<string>>(130, "rejects_scan_name", rejects_scan_name);
+	/* [Deleted] (vector<string>) "rejects_recovery_columns" */
+	/* [Deleted] (vector<idx_t>) "rejects_recovery_column_ids" */
+	serializer.WriteProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
+	serializer.WriteProperty<CSVOption<char>>(122, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
+	serializer.WriteProperty<CSVOption<char>>(123, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
+	serializer.WriteProperty<CSVOption<bool>>(124, "dialect_options.header", dialect_options.header);
+	serializer.WritePropertyWithDefault<idx_t>(125, "dialect_options.num_cols", dialect_options.num_cols);
+	serializer.WriteProperty<CSVOption<NewLineIdentifier>>(126, "dialect_options.state_machine_options.new_line", dialect_options.state_machine_options.new_line);
+	serializer.WriteProperty<CSVOption<idx_t>>(127, "dialect_options.skip_rows", dialect_options.skip_rows);
+	serializer.WriteProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(128, "dialect_options.date_format", dialect_options.date_format);
+	serializer.WritePropertyWithDefault<string>(129, "sniffer_user_mismatch_error", sniffer_user_mismatch_error);
+	serializer.WritePropertyWithDefault<bool>(130, "parallel", parallel);
 	serializer.WritePropertyWithDefault<vector<bool>>(131, "was_type_manually_set", was_type_manually_set);
+	serializer.WritePropertyWithDefault<CSVOption<string>>(132, "rejects_scan_name", rejects_scan_name, {"reject_scans"});
 }
 
 CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	CSVReaderOptions result;
-	deserializer.ReadProperty<CSVOption<bool>>(100, "ignore_errors", result.ignore_errors);
+	deserializer.ReadPropertyWithDefault<CSVOption<bool>>(100, "ignore_errors", result.ignore_errors, false);
 	deserializer.ReadPropertyWithDefault<idx_t>(101, "buffer_sample_size", result.buffer_sample_size);
 	deserializer.ReadPropertyWithDefault<string>(102, "null_str", result.null_str);
 	deserializer.ReadProperty<FileCompressionType>(103, "compression", result.compression);
@@ -171,21 +172,22 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	deserializer.ReadPropertyWithDefault<idx_t>(114, "buffer_size", result.buffer_size);
 	deserializer.ReadProperty<MultiFileReaderOptions>(115, "file_options", result.file_options);
 	deserializer.ReadPropertyWithDefault<vector<bool>>(116, "force_quote", result.force_quote);
-	deserializer.ReadProperty<CSVOption<bool>>(117, "store_rejects", result.store_rejects);
+	deserializer.ReadPropertyWithDefault<CSVOption<string>>(117, "rejects_table_name", result.rejects_table_name, {"reject_errors"});
 	deserializer.ReadPropertyWithDefault<idx_t>(118, "rejects_limit", result.rejects_limit);
-	deserializer.ReadProperty<CSVOption<char>>(119, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
-	deserializer.ReadProperty<CSVOption<char>>(120, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
-	deserializer.ReadProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
-	deserializer.ReadProperty<CSVOption<bool>>(122, "dialect_options.header", result.dialect_options.header);
-	deserializer.ReadPropertyWithDefault<idx_t>(123, "dialect_options.num_cols", result.dialect_options.num_cols);
-	deserializer.ReadProperty<CSVOption<NewLineIdentifier>>(124, "dialect_options.state_machine_options.new_line", result.dialect_options.state_machine_options.new_line);
-	deserializer.ReadProperty<CSVOption<idx_t>>(125, "dialect_options.skip_rows", result.dialect_options.skip_rows);
-	deserializer.ReadProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(126, "dialect_options.date_format", result.dialect_options.date_format);
-	deserializer.ReadPropertyWithDefault<string>(127, "sniffer_user_mismatch_error", result.sniffer_user_mismatch_error);
-	deserializer.ReadPropertyWithDefault<bool>(128, "parallel", result.parallel);
-	deserializer.ReadProperty<CSVOption<string>>(129, "rejects_table_name", result.rejects_table_name);
-	deserializer.ReadProperty<CSVOption<string>>(130, "rejects_scan_name", result.rejects_scan_name);
+	deserializer.ReadDeletedProperty<vector<string>>(119, "rejects_recovery_columns");
+	deserializer.ReadDeletedProperty<vector<idx_t>>(120, "rejects_recovery_column_ids");
+	deserializer.ReadProperty<CSVOption<char>>(121, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
+	deserializer.ReadProperty<CSVOption<char>>(122, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
+	deserializer.ReadProperty<CSVOption<char>>(123, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
+	deserializer.ReadProperty<CSVOption<bool>>(124, "dialect_options.header", result.dialect_options.header);
+	deserializer.ReadPropertyWithDefault<idx_t>(125, "dialect_options.num_cols", result.dialect_options.num_cols);
+	deserializer.ReadProperty<CSVOption<NewLineIdentifier>>(126, "dialect_options.state_machine_options.new_line", result.dialect_options.state_machine_options.new_line);
+	deserializer.ReadProperty<CSVOption<idx_t>>(127, "dialect_options.skip_rows", result.dialect_options.skip_rows);
+	deserializer.ReadProperty<map<LogicalTypeId, CSVOption<StrpTimeFormat>>>(128, "dialect_options.date_format", result.dialect_options.date_format);
+	deserializer.ReadPropertyWithDefault<string>(129, "sniffer_user_mismatch_error", result.sniffer_user_mismatch_error);
+	deserializer.ReadPropertyWithDefault<bool>(130, "parallel", result.parallel);
 	deserializer.ReadPropertyWithDefault<vector<bool>>(131, "was_type_manually_set", result.was_type_manually_set);
+	deserializer.ReadPropertyWithDefault<CSVOption<string>>(132, "rejects_scan_name", result.rejects_scan_name, {"reject_scans"});
 	return result;
 }
 

From 2fb9648b2f33226a4d1254fa4a6da326e9f76030 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 8 Apr 2024 15:03:23 +0200
Subject: [PATCH 089/147] Go away utility

---
 .../duckdb/execution/operator/persistent/csv_rejects_table.hpp  | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
index d1075a4fbffe..7b88e0de80cb 100644
--- a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
+++ b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp
@@ -1,7 +1,5 @@
 #pragma once
 
-#include <utility>
-
 #include "duckdb/storage/object_cache.hpp"
 #include "duckdb/common/mutex.hpp"
 #include "duckdb/common/typedefs.hpp"

From feea4ce0fc4f7ed0ad3fd1d97dff415e1fe19650 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 8 Apr 2024 15:08:29 +0200
Subject: [PATCH 090/147] Update this test for smaller SF

---
 .../test_multiple_big_compressed_csvs.test_slow   | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
index e53ef286a495..4c8116b7d3cd 100644
--- a/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
+++ b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
@@ -5,19 +5,22 @@
 require tpch
 
 statement ok
-CALL dbgen(sf=10);
+CALL dbgen(sf=1);
+
+statement ok
+SET temp_directory=''
 
 statement ok
 copy lineitem to '__TEST_DIR__/lineitem.csv.gz';
 
 statement ok
-SET temp_directory=''
+DROP TABLE lineitem;
 
 statement ok
-CREATE TABLE lineitem_2(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity DECIMAL(15,2) NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
+CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity DECIMAL(15,2) NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
 
 statement ok
-INSERT INTO lineitem_2 FROM read_csv([
+INSERT INTO lineitem FROM read_csv([
 	'__TEST_DIR__/lineitem.csv.gz',
 	'__TEST_DIR__/lineitem.csv.gz',
 	'__TEST_DIR__/lineitem.csv.gz',
@@ -45,6 +48,6 @@ INSERT INTO lineitem_2 FROM read_csv([
 ]);
 
 query I
-select count(*) from lineitem_2
+select count(*) from lineitem
 ----
-1439665248
\ No newline at end of file
+144029160
\ No newline at end of file

From 3870eda4a057b20a35e8addf19c0a2f70a407b3d Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 8 Apr 2024 16:51:46 +0200
Subject: [PATCH 091/147] some special code for options

---
 .../duckdb/common/serializer/deserializer.hpp    | 12 ++++++++++++
 .../duckdb/common/serializer/serializer.hpp      | 16 ++++++++++++++++
 .../duckdb/storage/serialization/nodes.json      |  6 +++---
 src/storage/serialization/serialize_nodes.cpp    |  8 ++++----
 4 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/src/include/duckdb/common/serializer/deserializer.hpp b/src/include/duckdb/common/serializer/deserializer.hpp
index 000104c43c60..e4096878a810 100644
--- a/src/include/duckdb/common/serializer/deserializer.hpp
+++ b/src/include/duckdb/common/serializer/deserializer.hpp
@@ -114,6 +114,18 @@ class Deserializer {
 		OnOptionalPropertyEnd(true);
 	}
 
+	template <typename T>
+	inline void ReadPropertyWithDefault(const field_id_t field_id, const char *tag, CSVOption<T> &ret,
+	                                    T &&default_value) {
+		if (!OnOptionalPropertyBegin(field_id, tag)) {
+			ret = std::forward<T>(default_value);
+			OnOptionalPropertyEnd(false);
+			return;
+		}
+		ret = Read<T>();
+		OnOptionalPropertyEnd(true);
+	}
+
 	// Special case:
 	// Read into an existing data_ptr_t
 	inline void ReadProperty(const field_id_t field_id, const char *tag, data_ptr_t ret, idx_t count) {
diff --git a/src/include/duckdb/common/serializer/serializer.hpp b/src/include/duckdb/common/serializer/serializer.hpp
index 10b926d048e1..f791b4a892df 100644
--- a/src/include/duckdb/common/serializer/serializer.hpp
+++ b/src/include/duckdb/common/serializer/serializer.hpp
@@ -17,6 +17,7 @@
 #include "duckdb/common/unordered_set.hpp"
 #include "duckdb/common/optional_idx.hpp"
 #include "duckdb/common/value_operations/value_operations.hpp"
+#include "duckdb/execution/operator/csv_scanner/csv_option.hpp"
 
 namespace duckdb {
 
@@ -86,6 +87,21 @@ class Serializer {
 		OnOptionalPropertyEnd(true);
 	}
 
+	// Specialization for Value (default Value comparison throws when comparing nulls)
+	template <class T>
+	void WritePropertyWithDefault(const field_id_t field_id, const char *tag, const CSVOption<T> &value,
+	                              const T &&default_value) {
+		// If current value is default, don't write it
+		if (!serialize_default_values && (value == default_value)) {
+			OnOptionalPropertyBegin(field_id, tag, false);
+			OnOptionalPropertyEnd(false);
+			return;
+		}
+		OnOptionalPropertyBegin(field_id, tag, true);
+		WriteValue(value.GetValue());
+		OnOptionalPropertyEnd(true);
+	}
+
 	// Special case: data_ptr_T
 	void WriteProperty(const field_id_t field_id, const char *tag, const_data_ptr_t ptr, idx_t count) {
 		OnPropertyBegin(field_id, tag);
diff --git a/src/include/duckdb/storage/serialization/nodes.json b/src/include/duckdb/storage/serialization/nodes.json
index 034db172d9db..547be7329ad8 100644
--- a/src/include/duckdb/storage/serialization/nodes.json
+++ b/src/include/duckdb/storage/serialization/nodes.json
@@ -537,7 +537,7 @@
     "members": [
       {"id": 100,
         "name": "ignore_errors",
-        "type": "CSVOption<bool>",
+        "type": "bool",
         "default": "false"
       },
       {"id": 101,
@@ -606,8 +606,8 @@
       },
       {"id": 117,
         "name": "rejects_table_name",
-        "type": "CSVOption<string>",
-        "default": "{\"reject_errors\"}"
+        "type": "string",
+        "default": "\"reject_errors\""
       },
       {"id": 118,
         "name": "rejects_limit",
diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp
index a0d459a938f0..39529e35e786 100644
--- a/src/storage/serialization/serialize_nodes.cpp
+++ b/src/storage/serialization/serialize_nodes.cpp
@@ -118,7 +118,7 @@ CSVOption<T> CSVOption<T>::Deserialize(Deserializer &deserializer) {
 }
 
 void CSVReaderOptions::Serialize(Serializer &serializer) const {
-	serializer.WritePropertyWithDefault<CSVOption<bool>>(100, "ignore_errors", ignore_errors, false);
+	serializer.WritePropertyWithDefault<bool>(100, "ignore_errors", ignore_errors, false);
 	serializer.WritePropertyWithDefault<idx_t>(101, "buffer_sample_size", buffer_sample_size);
 	serializer.WritePropertyWithDefault<string>(102, "null_str", null_str);
 	serializer.WriteProperty<FileCompressionType>(103, "compression", compression);
@@ -135,7 +135,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WritePropertyWithDefault<idx_t>(114, "buffer_size", buffer_size);
 	serializer.WriteProperty<MultiFileReaderOptions>(115, "file_options", file_options);
 	serializer.WritePropertyWithDefault<vector<bool>>(116, "force_quote", force_quote);
-	serializer.WritePropertyWithDefault<CSVOption<string>>(117, "rejects_table_name", rejects_table_name, {"reject_errors"});
+	serializer.WritePropertyWithDefault<string>(117, "rejects_table_name", rejects_table_name, "reject_errors");
 	serializer.WritePropertyWithDefault<idx_t>(118, "rejects_limit", rejects_limit);
 	/* [Deleted] (vector<string>) "rejects_recovery_columns" */
 	/* [Deleted] (vector<idx_t>) "rejects_recovery_column_ids" */
@@ -155,7 +155,7 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 
 CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	CSVReaderOptions result;
-	deserializer.ReadPropertyWithDefault<CSVOption<bool>>(100, "ignore_errors", result.ignore_errors, false);
+	deserializer.ReadPropertyWithDefault<bool>(100, "ignore_errors", result.ignore_errors, false);
 	deserializer.ReadPropertyWithDefault<idx_t>(101, "buffer_sample_size", result.buffer_sample_size);
 	deserializer.ReadPropertyWithDefault<string>(102, "null_str", result.null_str);
 	deserializer.ReadProperty<FileCompressionType>(103, "compression", result.compression);
@@ -172,7 +172,7 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	deserializer.ReadPropertyWithDefault<idx_t>(114, "buffer_size", result.buffer_size);
 	deserializer.ReadProperty<MultiFileReaderOptions>(115, "file_options", result.file_options);
 	deserializer.ReadPropertyWithDefault<vector<bool>>(116, "force_quote", result.force_quote);
-	deserializer.ReadPropertyWithDefault<CSVOption<string>>(117, "rejects_table_name", result.rejects_table_name, {"reject_errors"});
+	deserializer.ReadPropertyWithDefault<string>(117, "rejects_table_name", result.rejects_table_name, "reject_errors");
 	deserializer.ReadPropertyWithDefault<idx_t>(118, "rejects_limit", result.rejects_limit);
 	deserializer.ReadDeletedProperty<vector<string>>(119, "rejects_recovery_columns");
 	deserializer.ReadDeletedProperty<vector<idx_t>>(120, "rejects_recovery_column_ids");

From 41cd77ae336fb61aae34947116862f17144d1c0d Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Tue, 9 Apr 2024 12:57:07 +0200
Subject: [PATCH 092/147] One more test still having scan_ids

---
 .../csv_incorrect_columns_amount_rejects.test | 84 +++++++++----------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
index 2b59e17547d3..f1b63112a4c4 100644
--- a/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
+++ b/test/sql/copy/csv/rejects/csv_incorrect_columns_amount_rejects.test
@@ -13,13 +13,13 @@ SELECT * FROM read_csv(
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
     store_rejects=true, auto_detect=false, header = 1);
 
-query IIIIIIIIII rowsort
-FROM reject_errors order by all;
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-3	0	1814	14505	14510	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3
-3	0	1823	14575	14576	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
-3	0	2378	19009	19010	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
-3	0	2762	22075	22078	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
+0	1814	14505	14510	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3
+0	1823	14575	14576	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+0	2378	19009	19010	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+0	2762	22075	22078	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
 
 statement ok
 DROP TABLE reject_errors;
@@ -33,15 +33,15 @@ SELECT * FROM read_csv(
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
     store_rejects=true, auto_detect=false, header = 1);
 
-query IIIIIIIIII rowsort
-FROM reject_errors order by all;
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-7	0	1096	8761	8768	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
-7	0	1096	8761	8770	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
-7	0	1159	9269	9276	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
-7	0	1159	9269	9278	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
-7	0	1206	9649	9656	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
-7	0	2769	22155	22162	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+0	1096	8761	8768	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+0	1096	8761	8770	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+0	1159	9269	9276	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+0	1159	9269	9278	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+0	1206	9649	9656	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+0	2769	22155	22162	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
 
 statement ok
 DROP TABLE reject_errors;
@@ -55,15 +55,15 @@ SELECT * FROM read_csv(
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
     store_rejects=true, auto_detect=false, header = 1);
 
-query IIIIIIIIII rowsort
-FROM reject_errors order by all;
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all;
 ----
-11	0	1604	12825	12826	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
-11	0	1671	13355	13362	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
-11	0	1671	13355	13364	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
-11	0	2751	21999	22002	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
-11	0	2768	22131	22138	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
-11	0	2768	22131	22140	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+0	1604	12825	12826	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+0	1671	13355	13362	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+0	1671	13355	13364	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+0	2751	21999	22002	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
+0	2768	22131	22138	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+0	2768	22131	22140	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
 
 
 # Different Buffer Sizes
@@ -102,24 +102,24 @@ SELECT * FROM read_csv(
     columns = {'a': 'INTEGER', 'b': 'INTEGER', 'c': 'INTEGER', 'd': 'INTEGER'},
    store_rejects=true, auto_detect=false, header = 1);
 
-query IIIIIIIIII rowsort
-FROM reject_errors order by all
+query IIIIIIIII rowsort
+SELECT * EXCLUDE (scan_id) FROM reject_errors order by all
 ----
-35	0	1814	14505	14510	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3
-35	0	1823	14575	14576	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
-35	0	2378	19009	19010	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
-35	0	2762	22075	22078	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
-35	1	1096	8761	8768	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
-35	1	1096	8761	8770	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
-35	1	1159	9269	9276	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
-35	1	1159	9269	9278	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
-35	1	1206	9649	9656	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
-35	1	2769	22155	22162	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
-35	2	1604	12825	12826	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
-35	2	1671	13355	13362	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
-35	2	1671	13355	13364	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
-35	2	2751	21999	22002	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
-35	2	2768	22131	22138	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
-35	2	2768	22131	22140	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
-35	3	3	17	24	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
-35	3	4	27	32	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3
+0	1814	14505	14510	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3
+0	1823	14575	14576	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+0	2378	19009	19010	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+0	2762	22075	22078	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
+1	1096	8761	8768	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+1	1096	8761	8770	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+1	1159	9269	9276	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+1	1159	9269	9278	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+1	1206	9649	9656	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+1	2769	22155	22162	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+2	1604	12825	12826	1	b	MISSING COLUMNS	1	Expected Number of Columns: 4 Found: 1
+2	1671	13355	13362	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+2	1671	13355	13364	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+2	2751	21999	22002	2	c	MISSING COLUMNS	1,2	Expected Number of Columns: 4 Found: 2
+2	2768	22131	22138	5	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 5
+2	2768	22131	22140	6	NULL	TOO MANY COLUMNS	1,2,3,4,5,6	Expected Number of Columns: 4 Found: 6
+3	3	17	24	5	NULL	TOO MANY COLUMNS	1,2,3,4,5	Expected Number of Columns: 4 Found: 5
+3	4	27	32	3	d	MISSING COLUMNS	1,2,3	Expected Number of Columns: 4 Found: 3

From 437f8de28428234f864571d4db66b93d103a9ba9 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Tue, 9 Apr 2024 22:05:46 +0200
Subject: [PATCH 093/147] Respect read-only mode in dbgen and dsdgen

---
 extension/tpcds/tpcds_extension.cpp           | 10 +++++--
 extension/tpch/tpch_extension.cpp             | 12 +++++---
 .../duckdb/function/table_function.hpp        |  5 ++--
 .../binder/tableref/bind_table_function.cpp   |  2 +-
 src/planner/operator/logical_get.cpp          |  2 +-
 test/sql/tpcds/dsdgen_readonly.test           | 30 +++++++++++++++++++
 test/sql/tpch/dbgen_readonly.test             | 30 +++++++++++++++++++
 7 files changed, 80 insertions(+), 11 deletions(-)
 create mode 100644 test/sql/tpcds/dsdgen_readonly.test
 create mode 100644 test/sql/tpch/dbgen_readonly.test

diff --git a/extension/tpcds/tpcds_extension.cpp b/extension/tpcds/tpcds_extension.cpp
index c6157b67ae93..44c92d91333c 100644
--- a/extension/tpcds/tpcds_extension.cpp
+++ b/extension/tpcds/tpcds_extension.cpp
@@ -43,13 +43,17 @@ static duckdb::unique_ptr<FunctionData> DsdgenBind(ClientContext &context, Table
 			result->keys = kv.second.GetValue<bool>();
 		}
 	}
+	if (input.binder) {
+		auto &catalog = Catalog::GetCatalog(context, result->catalog);
+		input.binder->properties.modified_databases.insert(catalog.GetName());
+	}
 	return_types.emplace_back(LogicalType::BOOLEAN);
 	names.emplace_back("Success");
 	return std::move(result);
 }
 
 static void DsdgenFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
-	auto &data = (DSDGenFunctionData &)*data_p.bind_data;
+	auto &data = data_p.bind_data->CastNoConst<DSDGenFunctionData>();
 	if (data.finished) {
 		return;
 	}
@@ -82,7 +86,7 @@ static duckdb::unique_ptr<FunctionData> TPCDSQueryBind(ClientContext &context, T
 }
 
 static void TPCDSQueryFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
-	auto &data = (TPCDSData &)*data_p.global_state;
+	auto &data = data_p.global_state->Cast<TPCDSData>();
 	idx_t tpcds_queries = tpcds::DSDGenWrapper::QueriesCount();
 	if (data.offset >= tpcds_queries) {
 		// finished returning values
@@ -116,7 +120,7 @@ static duckdb::unique_ptr<FunctionData> TPCDSQueryAnswerBind(ClientContext &cont
 }
 
 static void TPCDSQueryAnswerFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
-	auto &data = (TPCDSData &)*data_p.global_state;
+	auto &data = data_p.global_state->Cast<TPCDSData>();
 	idx_t tpcds_queries = tpcds::DSDGenWrapper::QueriesCount();
 	vector<double> scale_factors {1, 10};
 	idx_t total_answers = tpcds_queries * scale_factors.size();
diff --git a/extension/tpch/tpch_extension.cpp b/extension/tpch/tpch_extension.cpp
index 68548438ab20..e1d6016b0681 100644
--- a/extension/tpch/tpch_extension.cpp
+++ b/extension/tpch/tpch_extension.cpp
@@ -8,6 +8,7 @@
 #include "duckdb/parser/parser.hpp"
 #include "duckdb/parser/statement/select_statement.hpp"
 #include "duckdb/main/extension_util.hpp"
+#include "duckdb/transaction/transaction.hpp"
 #endif
 
 #include "dbgen/dbgen.hpp"
@@ -51,14 +52,17 @@ static duckdb::unique_ptr<FunctionData> DbgenBind(ClientContext &context, TableF
 	if (result->children != 1 && result->step == -1) {
 		throw InvalidInputException("Step must be defined when children are defined");
 	}
-
+	if (input.binder) {
+		auto &catalog = Catalog::GetCatalog(context, result->catalog);
+		input.binder->properties.modified_databases.insert(catalog.GetName());
+	}
 	return_types.emplace_back(LogicalType::BOOLEAN);
 	names.emplace_back("Success");
 	return std::move(result);
 }
 
 static void DbgenFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
-	auto &data = (DBGenFunctionData &)*data_p.bind_data;
+	auto &data = data_p.bind_data->CastNoConst<DBGenFunctionData>();
 	if (data.finished) {
 		return;
 	}
@@ -92,7 +96,7 @@ static duckdb::unique_ptr<FunctionData> TPCHQueryBind(ClientContext &context, Ta
 }
 
 static void TPCHQueryFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
-	auto &data = (TPCHData &)*data_p.global_state;
+	auto &data = data_p.global_state->Cast<TPCHData>();
 	idx_t tpch_queries = 22;
 	if (data.offset >= tpch_queries) {
 		// finished returning values
@@ -126,7 +130,7 @@ static duckdb::unique_ptr<FunctionData> TPCHQueryAnswerBind(ClientContext &conte
 }
 
 static void TPCHQueryAnswerFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
-	auto &data = (TPCHData &)*data_p.global_state;
+	auto &data = data_p.global_state->Cast<TPCHData>();
 	idx_t tpch_queries = 22;
 	vector<double> scale_factors {0.01, 0.1, 1};
 	idx_t total_answers = tpch_queries * scale_factors.size();
diff --git a/src/include/duckdb/function/table_function.hpp b/src/include/duckdb/function/table_function.hpp
index 30cc0d9f8df7..3321d274f902 100644
--- a/src/include/duckdb/function/table_function.hpp
+++ b/src/include/duckdb/function/table_function.hpp
@@ -83,9 +83,9 @@ struct LocalTableFunctionState {
 struct TableFunctionBindInput {
 	TableFunctionBindInput(vector<Value> &inputs, named_parameter_map_t &named_parameters,
 	                       vector<LogicalType> &input_table_types, vector<string> &input_table_names,
-	                       optional_ptr<TableFunctionInfo> info)
+	                       optional_ptr<TableFunctionInfo> info, optional_ptr<Binder> binder)
 	    : inputs(inputs), named_parameters(named_parameters), input_table_types(input_table_types),
-	      input_table_names(input_table_names), info(info) {
+	      input_table_names(input_table_names), info(info), binder(binder) {
 	}
 
 	vector<Value> &inputs;
@@ -93,6 +93,7 @@ struct TableFunctionBindInput {
 	vector<LogicalType> &input_table_types;
 	vector<string> &input_table_names;
 	optional_ptr<TableFunctionInfo> info;
+	optional_ptr<Binder> binder;
 };
 
 struct TableFunctionInitInput {
diff --git a/src/planner/binder/tableref/bind_table_function.cpp b/src/planner/binder/tableref/bind_table_function.cpp
index 16a60ce1cd19..be9848683b54 100644
--- a/src/planner/binder/tableref/bind_table_function.cpp
+++ b/src/planner/binder/tableref/bind_table_function.cpp
@@ -140,7 +140,7 @@ Binder::BindTableFunctionInternal(TableFunction &table_function, const string &f
 	vector<string> return_names;
 	if (table_function.bind || table_function.bind_replace) {
 		TableFunctionBindInput bind_input(parameters, named_parameters, input_table_types, input_table_names,
-		                                  table_function.function_info.get());
+		                                  table_function.function_info.get(), this);
 		if (table_function.bind_replace) {
 			auto new_plan = table_function.bind_replace(context, bind_input);
 			if (new_plan != nullptr) {
diff --git a/src/planner/operator/logical_get.cpp b/src/planner/operator/logical_get.cpp
index 1ee9256189ed..077eb6cbbaa9 100644
--- a/src/planner/operator/logical_get.cpp
+++ b/src/planner/operator/logical_get.cpp
@@ -165,7 +165,7 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(Deserializer &deserializer)
 		deserializer.ReadProperty(208, "input_table_types", result->input_table_types);
 		deserializer.ReadProperty(209, "input_table_names", result->input_table_names);
 		TableFunctionBindInput input(result->parameters, result->named_parameters, result->input_table_types,
-		                             result->input_table_names, function.function_info.get());
+		                             result->input_table_names, function.function_info.get(), nullptr);
 
 		vector<LogicalType> bind_return_types;
 		vector<string> bind_names;
diff --git a/test/sql/tpcds/dsdgen_readonly.test b/test/sql/tpcds/dsdgen_readonly.test
new file mode 100644
index 000000000000..a037cade63ec
--- /dev/null
+++ b/test/sql/tpcds/dsdgen_readonly.test
@@ -0,0 +1,30 @@
+# name: test/sql/tpcds/dsdgen_readonly.test
+# description: Test that dsdgen respects read-only mode
+# group: [tpcds]
+
+require tpcds
+
+load __TEST_DIR__/test_dsdgen_ro.db
+
+statement ok
+CREATE TABLE tbl (i INTEGER);
+
+load __TEST_DIR__/test_dsdgen_ro.db readonly
+
+statement error
+CALL dsdgen(sf=0);
+----
+read-only
+
+load
+
+statement ok
+ATTACH '__TEST_DIR__/test_dsdgen_ro.db' AS dsdgentest (READ_ONLY)
+
+statement error
+CALL dsdgen(sf=0, catalog='dsdgentest');
+----
+read-only
+
+statement ok
+CALL dsdgen(sf=0);
diff --git a/test/sql/tpch/dbgen_readonly.test b/test/sql/tpch/dbgen_readonly.test
new file mode 100644
index 000000000000..de5ecc8d3377
--- /dev/null
+++ b/test/sql/tpch/dbgen_readonly.test
@@ -0,0 +1,30 @@
+# name: test/sql/tpch/dbgen_readonly.test
+# description: Test that dbgen respects read-only mode
+# group: [tpch]
+
+require tpch
+
+load __TEST_DIR__/test_dbgen_ro.db
+
+statement ok
+CREATE TABLE tbl (i INTEGER);
+
+load __TEST_DIR__/test_dbgen_ro.db readonly
+
+statement error
+CALL dbgen(sf=0);
+----
+read-only
+
+load
+
+statement ok
+ATTACH '__TEST_DIR__/test_dbgen_ro.db' AS dbgentest (READ_ONLY)
+
+statement error
+CALL dbgen(sf=0, catalog='dbgentest');
+----
+read-only
+
+statement ok
+CALL dbgen(sf=0);

From c4a109f97e017e9ba6733aceafca7c803d31bd62 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Tue, 9 Apr 2024 22:22:43 +0200
Subject: [PATCH 094/147] Correctly handle quoted database names in USE
 statement

---
 src/catalog/catalog_search_path.cpp           |  5 +++
 .../transform/statement/transform_use.cpp     |  5 ++-
 test/sql/attach/attach_dbname_quotes.test     | 42 +++++++++++++++++++
 3 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 test/sql/attach/attach_dbname_quotes.test

diff --git a/src/catalog/catalog_search_path.cpp b/src/catalog/catalog_search_path.cpp
index 6be5f491c0f0..9368e300f4ad 100644
--- a/src/catalog/catalog_search_path.cpp
+++ b/src/catalog/catalog_search_path.cpp
@@ -67,6 +67,11 @@ CatalogSearchEntry CatalogSearchEntry::ParseInternal(const string &input, idx_t
 		if (input[idx] == '"') {
 			//! unquote
 			idx++;
+			if (idx < input.size() && input[idx] == '"') {
+				// escaped quote
+				entry += input[idx];
+				continue;
+			}
 			goto normal;
 		}
 		entry += input[idx];
diff --git a/src/parser/transform/statement/transform_use.cpp b/src/parser/transform/statement/transform_use.cpp
index a4d76843725f..4bb8a2b86414 100644
--- a/src/parser/transform/statement/transform_use.cpp
+++ b/src/parser/transform/statement/transform_use.cpp
@@ -11,9 +11,10 @@ unique_ptr<SetStatement> Transformer::TransformUse(duckdb_libpgquery::PGUseStmt
 	}
 	string name;
 	if (IsInvalidSchema(qualified_name.schema)) {
-		name = qualified_name.name;
+		name = KeywordHelper::WriteOptionallyQuoted(qualified_name.name, '"');
 	} else {
-		name = qualified_name.schema + "." + qualified_name.name;
+		name = KeywordHelper::WriteOptionallyQuoted(qualified_name.schema, '"') + "." +
+		       KeywordHelper::WriteOptionallyQuoted(qualified_name.name, '"');
 	}
 	auto name_expr = make_uniq<ConstantExpression>(Value(name));
 	return make_uniq<SetVariableStatement>("schema", std::move(name_expr), SetScope::AUTOMATIC);
diff --git a/test/sql/attach/attach_dbname_quotes.test b/test/sql/attach/attach_dbname_quotes.test
new file mode 100644
index 000000000000..6c1069dd95c6
--- /dev/null
+++ b/test/sql/attach/attach_dbname_quotes.test
@@ -0,0 +1,42 @@
+# name: test/sql/attach/attach_dbname_quotes.test
+# description: Test ATTACH with a quoted database name
+# group: [attach]
+
+require noforcestorage
+
+statement ok
+ATTACH ':memory:' as "my""db";
+
+statement ok
+CREATE TABLE "my""db".tbl(i int);
+
+statement ok
+INSERT INTO "my""db".tbl VALUES (42)
+
+statement ok
+USE "my""db";
+
+query I
+SELECT * FROM tbl
+----
+42
+
+statement ok
+USE memory
+
+statement ok
+CREATE SCHEMA "my""db"."my""schema"
+
+statement ok
+CREATE TABLE "my""db"."my""schema".tbl(i int);
+
+statement ok
+INSERT INTO "my""db"."my""schema".tbl VALUES (84)
+
+statement ok
+USE "my""db"."my""schema"
+
+query I
+SELECT * FROM tbl
+----
+84

From 1ce844a7e7a9da99e43f7e925e261a576a02513b Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Tue, 9 Apr 2024 22:23:40 +0200
Subject: [PATCH 095/147] Only quotes

---
 test/sql/attach/attach_dbname_quotes.test | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/sql/attach/attach_dbname_quotes.test b/test/sql/attach/attach_dbname_quotes.test
index 6c1069dd95c6..2c4157dcb1ac 100644
--- a/test/sql/attach/attach_dbname_quotes.test
+++ b/test/sql/attach/attach_dbname_quotes.test
@@ -40,3 +40,9 @@ query I
 SELECT * FROM tbl
 ----
 84
+
+statement ok
+CREATE SCHEMA """"
+
+statement ok
+USE """"

From eb760b8aae9eed69ef5087168d4085d86e991468 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Tue, 9 Apr 2024 23:54:16 +0200
Subject: [PATCH 096/147] Test fix

---
 test/sql/attach/attach_table_info.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/sql/attach/attach_table_info.test b/test/sql/attach/attach_table_info.test
index d8609aab98be..7f312f0d530c 100644
--- a/test/sql/attach/attach_table_info.test
+++ b/test/sql/attach/attach_table_info.test
@@ -38,7 +38,7 @@ SELECT current_database()
 memory
 
 statement ok
-USE "new_database.new_schema"
+USE new_database.new_schema
 
 query ITTTTT nosort table_info
 PRAGMA table_info('integers');

From 684c10bb92d0fc11c351ee19794edcb69c1babbf Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Tue, 9 Apr 2024 23:54:52 +0200
Subject: [PATCH 097/147] Fix for Python client bind

---
 tools/pythonpkg/src/python_udf.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/pythonpkg/src/python_udf.cpp b/tools/pythonpkg/src/python_udf.cpp
index 8850e8e0218d..45f1f40c6f16 100644
--- a/tools/pythonpkg/src/python_udf.cpp
+++ b/tools/pythonpkg/src/python_udf.cpp
@@ -65,7 +65,7 @@ static void ConvertPyArrowToDataChunk(const py::object &table, Vector &out, Clie
 	vector<LogicalType> input_types;
 	vector<string> input_names;
 
-	auto bind_input = TableFunctionBindInput(children, named_params, input_types, input_names, nullptr);
+	TableFunctionBindInput bind_input(children, named_params, input_types, input_names, nullptr, nullptr);
 	vector<LogicalType> return_types;
 	vector<string> return_names;
 

From 797aa1e571d7a89ee094b958b4ad4f130913f914 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Wed, 10 Apr 2024 08:46:21 +0200
Subject: [PATCH 098/147] skip_reload because of ATTACH

---
 test/sql/tpcds/dsdgen_readonly.test | 2 ++
 test/sql/tpch/dbgen_readonly.test   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/test/sql/tpcds/dsdgen_readonly.test b/test/sql/tpcds/dsdgen_readonly.test
index a037cade63ec..ec0c448d8e2e 100644
--- a/test/sql/tpcds/dsdgen_readonly.test
+++ b/test/sql/tpcds/dsdgen_readonly.test
@@ -4,6 +4,8 @@
 
 require tpcds
 
+require skip_reload
+
 load __TEST_DIR__/test_dsdgen_ro.db
 
 statement ok
diff --git a/test/sql/tpch/dbgen_readonly.test b/test/sql/tpch/dbgen_readonly.test
index de5ecc8d3377..33cdb551c609 100644
--- a/test/sql/tpch/dbgen_readonly.test
+++ b/test/sql/tpch/dbgen_readonly.test
@@ -4,6 +4,8 @@
 
 require tpch
 
+require skip_reload
+
 load __TEST_DIR__/test_dbgen_ro.db
 
 statement ok

From 9562296f692c5acba20b60b296e80585da803cc3 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Wed, 10 Apr 2024 09:35:22 +0200
Subject: [PATCH 099/147] [CI] Remove GITHUB_PAT variable from R-CMD-check

---
 .github/workflows/R_CMD_CHECK.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/R_CMD_CHECK.yml b/.github/workflows/R_CMD_CHECK.yml
index a5fe7c191be1..d111e135547a 100644
--- a/.github/workflows/R_CMD_CHECK.yml
+++ b/.github/workflows/R_CMD_CHECK.yml
@@ -51,7 +51,6 @@ jobs:
           - {os: ubuntu-latest,   r: 'release'}
 
     env:
-      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       R_KEEP_PKG_SOURCE: yes
 
     steps:

From 1a1cd7b565b5b7f6f87f85f249cd2fa46f6f0965 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Wed, 10 Apr 2024 09:43:00 +0200
Subject: [PATCH 100/147] Remove quotes in tests

---
 test/sql/attach/attach_did_you_mean.test | 2 +-
 test/sql/attach/attach_nested_types.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/sql/attach/attach_did_you_mean.test b/test/sql/attach/attach_did_you_mean.test
index 26c418a4ba6f..967d2103e3a5 100644
--- a/test/sql/attach/attach_did_you_mean.test
+++ b/test/sql/attach/attach_did_you_mean.test
@@ -55,7 +55,7 @@ SELECT * FROM memory.hello
 
 # what if we switch default database AND default schema?
 statement ok
-USE "db1.myschema"
+USE db1.myschema
 
 statement ok
 SELECT * FROM blablabla
diff --git a/test/sql/attach/attach_nested_types.test b/test/sql/attach/attach_nested_types.test
index 9b63c6e8c052..99369a286c99 100644
--- a/test/sql/attach/attach_nested_types.test
+++ b/test/sql/attach/attach_nested_types.test
@@ -49,7 +49,7 @@ SELECT "table" FROM database.schema.table
 {'col': {'field': 42}}
 
 statement ok
-USE "database.schema"
+USE database.schema
 
 query I
 SELECT "table" FROM "table"

From 6f3e37475d983fece4b967ed7a438b3379b3bc4d Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Wed, 10 Apr 2024 10:17:32 +0200
Subject: [PATCH 101/147] Bump-back duckdb_azure to pre-lzma custom vcpkg-port
 moving bminor mirror

---
 .github/config/out_of_tree_extensions.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index 131d84e90812..c94a1e029af6 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -38,7 +38,7 @@ if (NOT MINGW)
     duckdb_extension_load(azure
             LOAD_TESTS
             GIT_URL https://github.com/duckdb/duckdb_azure
-            GIT_TAG 6620a32454c1eb2e455104d87262061d2464aad0
+            GIT_TAG 506b1fa0f3f892000130feac7a0e1de346095e80
             APPLY_PATCHES
             )
 endif()

From f41e41be8f49d6dc9f7bdd23dcc992aaefa098bc Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Wed, 10 Apr 2024 11:17:53 +0200
Subject: [PATCH 102/147] Bump-forward duckdb_azure to main, post-lzma custom
 vcpkg-port moving mirror

---
 .github/config/out_of_tree_extensions.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index c94a1e029af6..5d1795ef1168 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -38,7 +38,7 @@ if (NOT MINGW)
     duckdb_extension_load(azure
             LOAD_TESTS
             GIT_URL https://github.com/duckdb/duckdb_azure
-            GIT_TAG 506b1fa0f3f892000130feac7a0e1de346095e80
+            GIT_TAG 4512a652479016d40d712f990cab9b9aab43d341
             APPLY_PATCHES
             )
 endif()

From bbd74ac95dc58000d7add23c2ecf099a5ff284a5 Mon Sep 17 00:00:00 2001
From: Christina Sioula <chrisiou.myk@gmail.com>
Date: Fri, 5 Apr 2024 12:59:37 +0200
Subject: [PATCH 103/147] support reading gzipped files with the test runner

---
 .gitignore                           |   3 +-
 data/storage/index_0-9-1.db.gz       | Bin 0 -> 3332 bytes
 data/storage/test.db.gz              | Bin 0 -> 591 bytes
 test/sql/storage/unzip.test          |  62 +++++++++++++++++++++++++++
 test/sqlite/sqllogic_command.cpp     |  36 ++++++++++++++++
 test/sqlite/sqllogic_command.hpp     |  16 +++++++
 test/sqlite/sqllogic_parser.cpp      |   3 ++
 test/sqlite/sqllogic_parser.hpp      |   3 +-
 test/sqlite/sqllogic_test_runner.cpp |  34 +++++++++++++--
 9 files changed, 152 insertions(+), 5 deletions(-)
 create mode 100644 data/storage/index_0-9-1.db.gz
 create mode 100644 data/storage/test.db.gz
 create mode 100644 test/sql/storage/unzip.test

diff --git a/.gitignore b/.gitignore
index 67114a06f137..f754b6ec73c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -339,9 +339,10 @@ zig-cache/*
 
 # .db files
 *.db
+*.db.gz
 
 # local cmake extension config
 extension/extension_config_local.cmake
 
 # extension_external dir
-extension_external
\ No newline at end of file
+extension_external
diff --git a/data/storage/index_0-9-1.db.gz b/data/storage/index_0-9-1.db.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c4f963ac8cc47a3dc49c7d763924dfe3a9547da8
GIT binary patch
literal 3332
zcmd^<{WlwC8^_bqO=}aIJ~}PUTJ^F;Lx*s@Wk<VC%TpDav@}}GYlMa(q=ceYFOAYv
z#Y47qVMe@Gnx-0RH0z^5%Mwwfl8I^rA*sAFJ^KUvZudF&57%{_b6@B4J>TyQDlq~m
zjN79D2C^ytM9Ww!z;beoz7LX;n|r++o?q@~6DsiXJ$xj=&zeRxw8!kg#$8;Rn4ah=
zE;#(XWm`Y(bKk?3XR})R{e!*(`FcTafBmyNaKcP6;$Ki+TyPdYxXaO5YJpGh_J?-W
zcGU_4>t5)31Zwt1sdtTjxU^Cbm@T7htn)orf!;Zyrpli^V>uxA_1+V6O3DGsJs)V!
z)ksZTmDNrF0O0wK1k<e~_u$e*c0t-d^Yb&HR-5h1A1eq<*Q7fNTkwa<6NOvV%=2?}
zM36+8<N8TJ5pzE^;m25Cp>wcuKr?YJZ~nni{cnZ0HIZEm#a}8tXNfY7dpYZLz7^uF
z&TlYB__~HvAbIHLecA(M0#nJWGZpm>Zf@KvGCY1X#nxh(#(a5iH%Z{qdDb}DvHpOq
zaTQw_9b+&&G<VYm)vtV4@1ikpsB5IrK}Ta!Y!o(DNJvqfO#%4O8Abqmf7t`9&52X~
zsgETfF7Is$E23khDzUQrDos5{GHof$tBx_@AK$x94w#*49_v%*AAOuDMg}n`1RZ8f
zc5A44DQ+z3`%zRev|%4=5E#^;(4c2T-tk%T#EI03F>cQfr<AUocdz}#^UF8Qh;_P^
zC_BzUAN|PlrBWKqoMNHX%(pMhMJ@R0*^c2RdhUCY5>#`ftZ6<kL4q_!Rz_LONvT=)
z<yp>jm&U$_Z0;M%q>JiA#Im(AkjQNX!rC)c%-@$~<hjvik!foKX+4gW^br8C5Q**l
zwfV&_(NUoeCsWb00WM~JiEsoI33S^JM*DAEAe{Nh9LmrIv7#qCW}oF77Vb!x`Id9c
zq=hI)Jm@<1%;Odomt894EJc)_h8v})UP*|L2#tUL2+>J_U48eM==DmP?Aj(hnxQIl
z$hG=LAT_+a=F!I0)6ea}7#DL%<=3xc5c6p}0Sh~a-+Sy>EMSM*wSg8^22(d0+QzSG
zk4Ip`Z1J^#Ui_SOFI2E-eErEWp42e+$(IC^uUiRZ<#%xRkkFh6w0-v3-%R=ba-}3V
z=2UD^M-ugw*S;!|&ap+Pe8?bX_^tU>>F4yb(&0NTrI+50Y^2T_Iv+o71+}NOhiG6a
z5R%BvUp~GsR2)T99k_6<NgT>vrnFLC*Ne$(F#dh9=Hxun(u233SS6ytb)9gX)FCOe
z|4O&m`o_zwGL&E<xU^Ds;|p@=uYvrEL`2`nMS2$GzYC|0kfWEx(jM7M5d*m!D$6S|
zYy&S4sZnU#4E3(@sm2YS<D;-NRqu{oOdMfNemA=0^L%d^ywwcAG#C@&|M<4mY<%f1
zzP5L&el+_J=Io`AZpBkk8RwUoLSR~^xLsH9B3=09pmyocPdhR|7!+0)Mwf4*tkP68
zcz4CXLywkOaJhOhe0-B6BXwrzAYVBR%4tp;P{VaKa9z?@IYKdA&go=c0CEJVr_NT%
zUe7+<sYzTS9(*O3!u9Y@N^d+<G>rwDGH!<DE|{rfS>ba}Q7xESHo|@HIy#SZ8#1`E
zQrIkm46d%@&)tQsg-DCtPRn1@aMxI_KIe*u??i6Ke8ZlvH=t#P)+zSg)$SQ2^`%@q
zKU7kgHzH{S@+~$$g6(~By(K4Ac-+julJ~LyIS`MGM)Go;s(mT~lB4A_Kd{`YZ=FPA
zJxVGYE9gCT)z2y#VbwP}a4iULq-R*o2VWJp?%Li8Z9$j*5KRH{slg!asOP5u24v6R
zp-=2aXBIGwieY0+BRkRO-UDG+B9Zs{{{4aX?&wQ0C8Y1*H)Klg@75*i=X+gye1GiB
zw5c)JV$XIS)5Co-<Y32XoYKGP9}Vfrouq4v+2$-aGR1vUsKV8Fr~gZ^P~oFMzwNBV
z7HqwPZX5zj(N76%h62V}_6|j+|Cx>ft0%P%SCd^}SSl0lPWh;_674i3@zfmJy~T6e
zdAyiHd4hXG8U@y&m;%6MDfJ42w*n2j$&=(8E9XVz^+ccZO?6;4+i_?DiGKoaD=L-P
zO@?3~uxhFV$^OE(`?G&@BvClF<_&?cE(2PIVE}DuLFY{lLAtq!l}BqA-cHA*i~6xN
dReSViC_VLRqwQ>@E!d1^2z?0cX8<Qlz~3R)RV)Ai

literal 0
HcmV?d00001

diff --git a/data/storage/test.db.gz b/data/storage/test.db.gz
new file mode 100644
index 0000000000000000000000000000000000000000..385bf0711dcf023835e5961f3617449e6f5deeb9
GIT binary patch
literal 591
zcmb2|=HPHq6is7bE=ese(Mw5UczgT2mP?=v`-kH7fo9WAao^~R)OaJpee}>Vr<(@#
zoSi49g$K+#YW#ucp5x8Po#+1SY)drhQme7^J-hz%?*F@XzqkAzW1Ay>`S0Q7Q_nu#
zy3}CK=Agoj5mI;bL&ct*dV1^5pF<PGSr`}?>aP3<`Tbty=Z?9266_%U#Jqp?6TeEH
z4Q2%L?!T-nd;G5A*{+Gt|9W|xR$SYnBDOWa;?7O$Td&V>O^Hrk8*eHy>+0pS_0_`1
zX6<I*zVPzBmB*5dPQOw#`Si2p+6C8e8~b}}#U@oPw|->~G?1Y_UjO^?n0Z%{{Ot2x
zzi$y-eL;@%tHg)fZ$Iknu3cGm@ZV(N>idF64o^B|aD7eLbBRZRa~~B|m1fu0UU|>?
z!~dAuUj~K)*_A6!n5#eEs8qT6l*Prk<}C*}-t+w0x4(Y>|HZW+TSgu@Aa2W?k>_}a
I!GMJU0N>E9%K!iX

literal 0
HcmV?d00001

diff --git a/test/sql/storage/unzip.test b/test/sql/storage/unzip.test
new file mode 100644
index 000000000000..6592fc6c672c
--- /dev/null
+++ b/test/sql/storage/unzip.test
@@ -0,0 +1,62 @@
+# name: test/sql/storage/unzip.test
+# description: Support gzipped files in the test runner
+# group: [storage]
+
+# unzip to specific path
+unzip data/storage/test.db.gz __TEST_DIR__/test.db
+
+load  __TEST_DIR__/test.db readonly
+
+query I
+SELECT a+1 FROM tbl;
+----
+6
+
+# unzip a 1.8M file to default extraction path -> __TEST_DIR__/
+unzip data/storage/index_0-9-1.db.gz
+
+load __TEST_DIR__/index_0-9-1.db readonly
+
+query II
+SELECT table_name, index_count FROM duckdb_tables() ORDER BY table_name;
+----
+fk_tbl	1
+idx_tbl	2
+pk_tbl	2
+
+# unzip to default extraction path from NULL input
+unzip data/storage/test.db.gz NULL
+
+load __TEST_DIR__/test.db readonly
+
+query I
+SELECT a+2 FROM tbl;
+----
+7
+
+## test invalid use
+# unzip
+
+## not gzipped database
+# unzip data/storage/test.db
+
+## not gzipped database
+# unzip data/storage/test.db
+
+## test NULL input paths
+# unzip NULL
+# unzip NULL NULL
+# unzip NULL data/storage/test.db
+
+## invalid input path
+# unzip path/to/nowhere data/storage/not_existed.db
+
+## invalid extraction path
+# unzip data/storage/test.db.gz path/to/nowhere
+
+## already existed database file in the extraction - warning: this will overwrite existed wal_test_092.db
+# unzip data/storage/test.db.gz data/storage/wal_test_092.db
+
+## extraction path to directory
+# unzip data/storage/test.db.gz __TEST_DIR__/
+# unzip data/storage/test.db.gz __TEST_DIR__
diff --git a/test/sqlite/sqllogic_command.cpp b/test/sqlite/sqllogic_command.cpp
index b9f64b5f407a..bd1b3e459729 100644
--- a/test/sqlite/sqllogic_command.cpp
+++ b/test/sqlite/sqllogic_command.cpp
@@ -144,6 +144,10 @@ SleepCommand::SleepCommand(SQLLogicTestRunner &runner, idx_t duration, SleepUnit
     : Command(runner), duration(duration), unit(unit) {
 }
 
+UnzipCommand::UnzipCommand(SQLLogicTestRunner &runner, string &input, string &output)
+    : Command(runner), input_path(input), extraction_path(output) {
+}
+
 struct ParallelExecuteContext {
 	ParallelExecuteContext(SQLLogicTestRunner &runner, const vector<duckdb::unique_ptr<Command>> &loop_commands,
 	                       LoopDefinition definition)
@@ -387,4 +391,36 @@ void Statement::ExecuteInternal(ExecuteContext &context) const {
 	}
 }
 
+void UnzipCommand::ExecuteInternal(ExecuteContext &context) const {
+	VirtualFileSystem vfs;
+
+	// input
+	FileOpenFlags in_flags(FileFlags::FILE_FLAGS_READ);
+	in_flags.SetCompression(FileCompressionType::GZIP);
+	auto compressed_file_handle = vfs.OpenFile(input_path, in_flags);
+	if (compressed_file_handle == nullptr) {
+		throw CatalogException("Cannot open the file \"%s\"", input_path);
+	}
+
+	// read the compressed data from the file
+	int64_t file_size = vfs.GetFileSize(*compressed_file_handle);
+	std::unique_ptr<char[]> compressed_buffer(new char[BUFFER_SIZE]);
+	int64_t bytes_read = vfs.Read(*compressed_file_handle, compressed_buffer.get(), BUFFER_SIZE);
+	if (bytes_read < file_size) {
+		throw CatalogException("Cannot read the file \"%s\"", input_path);
+	}
+
+	// output
+	FileOpenFlags out_flags(FileOpenFlags::FILE_FLAGS_FILE_CREATE | FileOpenFlags::FILE_FLAGS_WRITE);
+	auto output_file = vfs.OpenFile(extraction_path, out_flags);
+	if (!output_file) {
+		throw CatalogException("Cannot open the file \"%s\"", extraction_path);
+	}
+
+	int64_t bytes_written = vfs.Write(*output_file, compressed_buffer.get(), BUFFER_SIZE);
+	if (bytes_written < file_size) {
+		throw CatalogException("Cannot write the file \"%s\"", extraction_path);
+	}
+}
+
 } // namespace duckdb
diff --git a/test/sqlite/sqllogic_command.hpp b/test/sqlite/sqllogic_command.hpp
index 0f45d8aa2755..d6b2e6c9f71a 100644
--- a/test/sqlite/sqllogic_command.hpp
+++ b/test/sqlite/sqllogic_command.hpp
@@ -9,6 +9,7 @@
 #pragma once
 
 #include "duckdb.hpp"
+#include "duckdb/common/virtual_file_system.hpp"
 
 namespace duckdb {
 class SQLLogicTestRunner;
@@ -142,4 +143,19 @@ class SleepCommand : public Command {
 	SleepUnit unit;
 };
 
+class UnzipCommand : public Command {
+public:
+	// 1 MB
+	static constexpr const int64_t BUFFER_SIZE = 1u << 23;
+
+public:
+	UnzipCommand(SQLLogicTestRunner &runner, string &input, string &output);
+
+	void ExecuteInternal(ExecuteContext &context) const override;
+
+private:
+	string input_path;
+	string extraction_path;
+};
+
 } // namespace duckdb
diff --git a/test/sqlite/sqllogic_parser.cpp b/test/sqlite/sqllogic_parser.cpp
index 8cccc18ff64e..8fdb3f1bb85c 100644
--- a/test/sqlite/sqllogic_parser.cpp
+++ b/test/sqlite/sqllogic_parser.cpp
@@ -166,6 +166,7 @@ bool SQLLogicParser::IsSingleLineStatement(SQLLogicToken &token) {
 	case SQLLogicTokenType::SQLLOGIC_RESTART:
 	case SQLLogicTokenType::SQLLOGIC_RECONNECT:
 	case SQLLogicTokenType::SQLLOGIC_SLEEP:
+	case SQLLogicTokenType::SQLLOGIC_UNZIP:
 		return true;
 
 	case SQLLogicTokenType::SQLLOGIC_SKIP_IF:
@@ -219,6 +220,8 @@ SQLLogicTokenType SQLLogicParser::CommandToToken(const string &token) {
 		return SQLLogicTokenType::SQLLOGIC_RECONNECT;
 	} else if (token == "sleep") {
 		return SQLLogicTokenType::SQLLOGIC_SLEEP;
+	} else if (token == "unzip") {
+		return SQLLogicTokenType::SQLLOGIC_UNZIP;
 	}
 	Fail("Unrecognized parameter %s", token);
 	return SQLLogicTokenType::SQLLOGIC_INVALID;
diff --git a/test/sqlite/sqllogic_parser.hpp b/test/sqlite/sqllogic_parser.hpp
index af32e943c509..87e79be2b3a1 100644
--- a/test/sqlite/sqllogic_parser.hpp
+++ b/test/sqlite/sqllogic_parser.hpp
@@ -34,7 +34,8 @@ enum class SQLLogicTokenType {
 	SQLLOGIC_LOAD,
 	SQLLOGIC_RESTART,
 	SQLLOGIC_RECONNECT,
-	SQLLOGIC_SLEEP
+	SQLLOGIC_SLEEP,
+	SQLLOGIC_UNZIP
 };
 
 class SQLLogicToken {
diff --git a/test/sqlite/sqllogic_test_runner.cpp b/test/sqlite/sqllogic_test_runner.cpp
index 7fbbc621445f..6c7f32d38859 100644
--- a/test/sqlite/sqllogic_test_runner.cpp
+++ b/test/sqlite/sqllogic_test_runner.cpp
@@ -1,12 +1,14 @@
 
 #include "catch.hpp"
-
-#include "sqllogic_test_runner.hpp"
 #include "test_helpers.hpp"
+#include "sqllogic_parser.hpp"
+#include "sqllogic_test_runner.hpp"
 #include "duckdb/main/extension_helper.hpp"
 #include "duckdb/main/extension/generated_extension_loader.hpp"
 #include "duckdb/main/extension_entries.hpp"
-#include "sqllogic_parser.hpp"
+#include "duckdb/common/virtual_file_system.hpp"
+#include "duckdb/common/file_open_flags.hpp"
+
 #ifdef DUCKDB_OUT_OF_TREE
 #include DUCKDB_EXTENSION_HEADER
 #endif
@@ -732,6 +734,32 @@ void SQLLogicTestRunner::ExecuteFile(string script) {
 			auto sleep_unit = SleepCommand::ParseUnit(token.parameters[1]);
 			auto command = make_uniq<SleepCommand>(*this, sleep_duration, sleep_unit);
 			ExecuteCommand(std::move(command));
+		} else if (token.type == SQLLogicTokenType::SQLLOGIC_UNZIP) {
+			if (token.parameters.size() != 1 && token.parameters.size() != 2) {
+				parser.Fail("unzip requires 1 argument: <path/to/file.db.gz> [optional: "
+				            "<path/to/unzipped_file.db>, default: __TEST_DIR__/<file.db>]");
+			}
+
+			// set input path
+			auto input_path = ReplaceKeywords(token.parameters[0]);
+
+			// file name
+			idx_t filename_start_pos = input_path.find_last_of("/") + 1;
+			if (!StringUtil::EndsWith(input_path, ".gz")) {
+				parser.Fail("unzip: input has not a GZIP extension");
+			}
+			string filename = input_path.substr(filename_start_pos, input_path.size() - filename_start_pos - 3);
+
+			// extraction path
+			string default_extraction_path = ReplaceKeywords("__TEST_DIR__/" + filename);
+			string extraction_path =
+			    (token.parameters.size() == 2) ? ReplaceKeywords(token.parameters[1]) : default_extraction_path;
+			if (extraction_path == "NULL") {
+				extraction_path = default_extraction_path;
+			}
+
+			auto command = make_uniq<UnzipCommand>(*this, input_path, extraction_path);
+			ExecuteCommand(std::move(command));
 		}
 	}
 	if (InLoop()) {

From 8097fdc0baa9fb0b36a5df08df93734dc507dbca Mon Sep 17 00:00:00 2001
From: Christina Sioula <chrisiou.myk@gmail.com>
Date: Wed, 10 Apr 2024 13:13:44 +0200
Subject: [PATCH 104/147] add README.md

---
 data/storage/README.md | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 data/storage/README.md

diff --git a/data/storage/README.md b/data/storage/README.md
new file mode 100644
index 000000000000..82b1534a5a62
--- /dev/null
+++ b/data/storage/README.md
@@ -0,0 +1,23 @@
+# DuckDB database files
+DuckDB v0.10 enables the creation of storage database files, ensuring seamless compatibility for subsequent versions to read files generated by older versions. *Note: it is recommended to compress large database files.*
+
+For further details, please refer to: [Storage & Compatibility](https://duckdb.org/docs/internals/storage#compatibility)
+
+### How to gzip a database file
+```
+gzip <filename.db>
+```
+
+### How to decompress and load a gzipped database file in the test runner
+```
+unzip <path/to/filename.db.gz> <extraction/path/to/filename.db>
+
+load <extraction/path/to/filename.db> readonly
+```
+
+or by using the default extraction path `__TEST_DIR__/` (temporary space)
+```
+unzip <path/to/filename.db.gz>
+
+load __TEST_DIR__/<filename.db> readonly
+```

From 4fad019905a4482fde4aa9fb901dfa2266fce3df Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Wed, 10 Apr 2024 13:57:24 +0200
Subject: [PATCH 105/147] Add a useful comment

---
 test/sql/attach/attach_dbname_quotes.test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/sql/attach/attach_dbname_quotes.test b/test/sql/attach/attach_dbname_quotes.test
index 2c4157dcb1ac..c231f066682a 100644
--- a/test/sql/attach/attach_dbname_quotes.test
+++ b/test/sql/attach/attach_dbname_quotes.test
@@ -13,6 +13,7 @@ CREATE TABLE "my""db".tbl(i int);
 statement ok
 INSERT INTO "my""db".tbl VALUES (42)
 
+# use with a table name in quotes
 statement ok
 USE "my""db";
 

From 75aeb028914a9b58f3eb8c43691b035cfbceeaf2 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 10 Apr 2024 13:58:31 +0200
Subject: [PATCH 106/147] table creation is too much for the CI, time to count

---
 .../copy/csv/test_multiple_big_compressed_csvs.test_slow   | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
index 4c8116b7d3cd..4b0fa97dc92c 100644
--- a/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
+++ b/test/sql/copy/csv/test_multiple_big_compressed_csvs.test_slow
@@ -19,8 +19,8 @@ DROP TABLE lineitem;
 statement ok
 CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity DECIMAL(15,2) NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
 
-statement ok
-INSERT INTO lineitem FROM read_csv([
+query I
+select count(*) from read_csv([
 	'__TEST_DIR__/lineitem.csv.gz',
 	'__TEST_DIR__/lineitem.csv.gz',
 	'__TEST_DIR__/lineitem.csv.gz',
@@ -46,8 +46,5 @@ INSERT INTO lineitem FROM read_csv([
 	'__TEST_DIR__/lineitem.csv.gz',
 	'__TEST_DIR__/lineitem.csv.gz',
 ]);
-
-query I
-select count(*) from lineitem
 ----
 144029160
\ No newline at end of file

From 4bfe3f3fe3020e754d278ea39911eded230b66e0 Mon Sep 17 00:00:00 2001
From: Tom Ebergen <tom@ebergen.com>
Date: Wed, 10 Apr 2024 14:06:22 +0200
Subject: [PATCH 107/147] add test

---
 src/optimizer/optimizer.cpp                   | 12 ++--
 test/optimizer/arithmetic_simplification.test | 66 +++++++------------
 test/optimizer/topn/complex_top_n.test        | 43 ++++++++++++
 test/optimizer/{ => topn}/topn_optimizer.test |  2 +-
 4 files changed, 75 insertions(+), 48 deletions(-)
 create mode 100644 test/optimizer/topn/complex_top_n.test
 rename test/optimizer/{ => topn}/topn_optimizer.test (94%)

diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp
index ce20ca7d8a5f..b4bbd37ce7d9 100644
--- a/src/optimizer/optimizer.cpp
+++ b/src/optimizer/optimizer.cpp
@@ -153,6 +153,12 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
 		cse_optimizer.VisitOperator(*plan);
 	});
 
+	// transform ORDER BY + LIMIT to TopN
+	RunOptimizer(OptimizerType::TOP_N, [&]() {
+		TopN topn;
+		plan = topn.Optimize(std::move(plan));
+	});
+
 	// creates projection maps so unused columns are projected out early
 	RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() {
 		ColumnLifetimeAnalyzer column_lifetime(true);
@@ -179,12 +185,6 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
 		column_lifetime.VisitOperator(*plan);
 	});
 
-	// transform ORDER BY + LIMIT to TopN
-	RunOptimizer(OptimizerType::TOP_N, [&]() {
-		TopN topn;
-		plan = topn.Optimize(std::move(plan));
-	});
-
 	// apply simple expression heuristics to get an initial reordering
 	RunOptimizer(OptimizerType::REORDER_FILTER, [&]() {
 		ExpressionHeuristics expression_heuristics(*this);
diff --git a/test/optimizer/arithmetic_simplification.test b/test/optimizer/arithmetic_simplification.test
index f55e0031a6d1..f3bdd71f9d9c 100644
--- a/test/optimizer/arithmetic_simplification.test
+++ b/test/optimizer/arithmetic_simplification.test
@@ -1,48 +1,32 @@
 # name: test/optimizer/arithmetic_simplification.test
-# description: Arithmetic simplification test
+# description: topN 
 # group: [optimizer]
 
 statement ok
-CREATE TABLE test(X INTEGER);
+attach 'appian.duckdb' as appian;
 
 statement ok
-PRAGMA explain_output = OPTIMIZED_ONLY;
-
-# verify that nop arithmetic is flattened
-query I nosort xnorm
-EXPLAIN SELECT X FROM test
-----
-
-query I nosort xnorm
-EXPLAIN SELECT X+0 FROM test
-----
-
-query I nosort xnorm
-EXPLAIN SELECT 0+X FROM test
-----
-
-query I nosort xnorm
-EXPLAIN SELECT X-0 FROM test
-----
-
-query I nosort xnorm
-EXPLAIN SELECT X*1 FROM test
-----
-
-query I nosort xnorm
-EXPLAIN SELECT 1*X FROM test
-----
-
-query I nosort xnorm
-EXPLAIN SELECT X//1 FROM test
-----
-
-# division by zero results in a NULL
-query I nosort xnull
-EXPLAIN SELECT NULL FROM test
-----
-
-query I nosort xnull
-EXPLAIN SELECT X//0 FROM test
-----
+use appian;
 
+statement ok
+WITH CTE AS (
+  SELECT J1P, CUSTOMER_PRIORITY, CUSTOMER_ID FROM CUSTOMERVIEW
+  LEFT JOIN (
+    SELECT ORDER_CUSTOMERID, SUM(ORDERITEMVIEW.ORDERITEM_QUANTITY) AS J1P FROM ORDERVIEW
+    LEFT JOIN ORDERITEMVIEW ON (ORDERVIEW.ORDER_ID = ORDERITEMVIEW.ORDERITEM_ORDERID)
+    WHERE (ORDERVIEW.ORDER_ISEXPEDITEDSHIPPED IS TRUE)
+    GROUP BY ORDERVIEW.ORDER_CUSTOMERID
+  ) AS J1J ON (J1J.ORDER_CUSTOMERID = CUSTOMERVIEW.CUSTOMER_ID)
+  ORDER BY CUSTOMER_PRIORITY ASC
+  LIMIT 50 OFFSET 50
+) SELECT J1P, Q2P, Q3P FROM CTE
+LEFT JOIN (
+  SELECT ORDER_CUSTOMERID FROM ORDERVIEW
+) AS Q1J ON (Q1J.ORDER_CUSTOMERID = CTE.CUSTOMER_ID)
+LEFT JOIN (
+  SELECT CREDITCARD_CUSTOMERID AS Q2P FROM CREDITCARDVIEW
+) AS Q2J ON (Q2J.Q2P = CTE.CUSTOMER_ID)
+LEFT JOIN (
+  SELECT ORDER_CUSTOMERID Q3P FROM ORDERVIEW
+  LEFT JOIN ORDERITEMVIEW ON ORDERVIEW.ORDER_ID = ORDERITEM_ORDERID
+) AS Q3J ON (Q3J.Q3P = CTE.CUSTOMER_ID);
diff --git a/test/optimizer/topn/complex_top_n.test b/test/optimizer/topn/complex_top_n.test
new file mode 100644
index 000000000000..db3d7556ca69
--- /dev/null
+++ b/test/optimizer/topn/complex_top_n.test
@@ -0,0 +1,43 @@
+# name: test/optimizer/topn/complex_top_n.test
+# description: topN 
+# group: [optimizer]
+
+statement ok
+SELECT SETSEED(0.42);
+
+statement ok
+create table CUSTOMERVIEW as select range customer_id, range*random()::INT % 3 as customer_priority from range(1000, 2000); 
+
+statement ok
+create table OrderView as select range order_id, ((random()*2::INT)%2)::BOOL order_isExpeditedShipped, range + (random() * 3)::INT order_customerId from range(1000, 2000);
+
+statement ok
+create table OrderItemView as select random()*25 orderItem_quantity, range orderItem_orderId from range(1000, 2000);
+
+statement ok
+create table CREDITCARDVIEW as select range CREDITCARD_CUSTOMERID from range(1000, 2000); 
+
+query III
+WITH CTE AS (
+  SELECT J1P, CUSTOMER_PRIORITY, CUSTOMER_ID FROM CUSTOMERVIEW
+  LEFT JOIN (
+    SELECT ORDER_CUSTOMERID, SUM(ORDERITEMVIEW.ORDERITEM_QUANTITY) AS J1P FROM ORDERVIEW
+    LEFT JOIN ORDERITEMVIEW ON (ORDERVIEW.ORDER_ID = ORDERITEMVIEW.ORDERITEM_ORDERID)
+    WHERE (ORDERVIEW.ORDER_ISEXPEDITEDSHIPPED IS TRUE)
+    GROUP BY ORDERVIEW.ORDER_CUSTOMERID
+  ) AS J1J ON (J1J.ORDER_CUSTOMERID = CUSTOMERVIEW.CUSTOMER_ID)
+  ORDER BY CUSTOMER_PRIORITY ASC
+  LIMIT 50 OFFSET 50
+) SELECT J1P, Q2P, Q3P FROM CTE
+LEFT JOIN (
+  SELECT ORDER_CUSTOMERID FROM ORDERVIEW
+) AS Q1J ON (Q1J.ORDER_CUSTOMERID = CTE.CUSTOMER_ID)
+LEFT JOIN (
+  SELECT CREDITCARD_CUSTOMERID AS Q2P FROM CREDITCARDVIEW
+) AS Q2J ON (Q2J.Q2P = CTE.CUSTOMER_ID)
+LEFT JOIN (
+  SELECT ORDER_CUSTOMERID Q3P FROM ORDERVIEW
+  LEFT JOIN ORDERITEMVIEW ON ORDERVIEW.ORDER_ID = ORDERITEM_ORDERID
+) AS Q3J ON (Q3J.Q3P = CTE.CUSTOMER_ID);
+----
+432 values hashing to c51b3f7dd78a68c95de3f44866394cfb
diff --git a/test/optimizer/topn_optimizer.test b/test/optimizer/topn/topn_optimizer.test
similarity index 94%
rename from test/optimizer/topn_optimizer.test
rename to test/optimizer/topn/topn_optimizer.test
index ceb042b6ef49..e899e1c7e974 100644
--- a/test/optimizer/topn_optimizer.test
+++ b/test/optimizer/topn/topn_optimizer.test
@@ -1,4 +1,4 @@
-# name: test/optimizer/topn_optimizer.test
+# name: test/optimizer/topn/opn_optimizer.test
 # description: Test Top N optimization
 # group: [optimizer]
 

From df62014929d7de833d6b3c2e815bb679d0fed04f Mon Sep 17 00:00:00 2001
From: Tom Ebergen <tom@ebergen.com>
Date: Wed, 10 Apr 2024 14:20:59 +0200
Subject: [PATCH 108/147] make format-fix, and fix old test case

---
 test/optimizer/arithmetic_simplification.test | 66 ++++++++++++-------
 test/optimizer/topn/complex_top_n.test        |  4 +-
 test/optimizer/topn/topn_optimizer.test       |  4 +-
 3 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/test/optimizer/arithmetic_simplification.test b/test/optimizer/arithmetic_simplification.test
index f3bdd71f9d9c..f55e0031a6d1 100644
--- a/test/optimizer/arithmetic_simplification.test
+++ b/test/optimizer/arithmetic_simplification.test
@@ -1,32 +1,48 @@
 # name: test/optimizer/arithmetic_simplification.test
-# description: topN 
+# description: Arithmetic simplification test
 # group: [optimizer]
 
 statement ok
-attach 'appian.duckdb' as appian;
+CREATE TABLE test(X INTEGER);
 
 statement ok
-use appian;
+PRAGMA explain_output = OPTIMIZED_ONLY;
+
+# verify that nop arithmetic is flattened
+query I nosort xnorm
+EXPLAIN SELECT X FROM test
+----
+
+query I nosort xnorm
+EXPLAIN SELECT X+0 FROM test
+----
+
+query I nosort xnorm
+EXPLAIN SELECT 0+X FROM test
+----
+
+query I nosort xnorm
+EXPLAIN SELECT X-0 FROM test
+----
+
+query I nosort xnorm
+EXPLAIN SELECT X*1 FROM test
+----
+
+query I nosort xnorm
+EXPLAIN SELECT 1*X FROM test
+----
+
+query I nosort xnorm
+EXPLAIN SELECT X//1 FROM test
+----
+
+# division by zero results in a NULL
+query I nosort xnull
+EXPLAIN SELECT NULL FROM test
+----
+
+query I nosort xnull
+EXPLAIN SELECT X//0 FROM test
+----
 
-statement ok
-WITH CTE AS (
-  SELECT J1P, CUSTOMER_PRIORITY, CUSTOMER_ID FROM CUSTOMERVIEW
-  LEFT JOIN (
-    SELECT ORDER_CUSTOMERID, SUM(ORDERITEMVIEW.ORDERITEM_QUANTITY) AS J1P FROM ORDERVIEW
-    LEFT JOIN ORDERITEMVIEW ON (ORDERVIEW.ORDER_ID = ORDERITEMVIEW.ORDERITEM_ORDERID)
-    WHERE (ORDERVIEW.ORDER_ISEXPEDITEDSHIPPED IS TRUE)
-    GROUP BY ORDERVIEW.ORDER_CUSTOMERID
-  ) AS J1J ON (J1J.ORDER_CUSTOMERID = CUSTOMERVIEW.CUSTOMER_ID)
-  ORDER BY CUSTOMER_PRIORITY ASC
-  LIMIT 50 OFFSET 50
-) SELECT J1P, Q2P, Q3P FROM CTE
-LEFT JOIN (
-  SELECT ORDER_CUSTOMERID FROM ORDERVIEW
-) AS Q1J ON (Q1J.ORDER_CUSTOMERID = CTE.CUSTOMER_ID)
-LEFT JOIN (
-  SELECT CREDITCARD_CUSTOMERID AS Q2P FROM CREDITCARDVIEW
-) AS Q2J ON (Q2J.Q2P = CTE.CUSTOMER_ID)
-LEFT JOIN (
-  SELECT ORDER_CUSTOMERID Q3P FROM ORDERVIEW
-  LEFT JOIN ORDERITEMVIEW ON ORDERVIEW.ORDER_ID = ORDERITEM_ORDERID
-) AS Q3J ON (Q3J.Q3P = CTE.CUSTOMER_ID);
diff --git a/test/optimizer/topn/complex_top_n.test b/test/optimizer/topn/complex_top_n.test
index db3d7556ca69..a26f364ae06c 100644
--- a/test/optimizer/topn/complex_top_n.test
+++ b/test/optimizer/topn/complex_top_n.test
@@ -1,6 +1,6 @@
 # name: test/optimizer/topn/complex_top_n.test
-# description: topN 
-# group: [optimizer]
+# description: topN
+# group: [topn]
 
 statement ok
 SELECT SETSEED(0.42);
diff --git a/test/optimizer/topn/topn_optimizer.test b/test/optimizer/topn/topn_optimizer.test
index e899e1c7e974..96e0c9ec1c60 100644
--- a/test/optimizer/topn/topn_optimizer.test
+++ b/test/optimizer/topn/topn_optimizer.test
@@ -1,6 +1,6 @@
-# name: test/optimizer/topn/opn_optimizer.test
+# name: test/optimizer/topn/topn_optimizer.test
 # description: Test Top N optimization
-# group: [optimizer]
+# group: [topn]
 
 statement ok
 CREATE TABLE integers(i INTEGER, j INTEGER)

From 7a5678f9594420d8c1f30dc98824299840cf6652 Mon Sep 17 00:00:00 2001
From: Tom Ebergen <tom@ebergen.com>
Date: Wed, 10 Apr 2024 14:46:07 +0200
Subject: [PATCH 109/147] check for topn in test

---
 test/optimizer/topn/complex_top_n.test | 35 ++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/test/optimizer/topn/complex_top_n.test b/test/optimizer/topn/complex_top_n.test
index a26f364ae06c..04d51fe117b3 100644
--- a/test/optimizer/topn/complex_top_n.test
+++ b/test/optimizer/topn/complex_top_n.test
@@ -6,16 +6,16 @@ statement ok
 SELECT SETSEED(0.42);
 
 statement ok
-create table CUSTOMERVIEW as select range customer_id, range*random()::INT % 3 as customer_priority from range(1000, 2000); 
+create table CUSTOMERVIEW as select range customer_id, range*random()::INT % 3 as customer_priority from range(1000, 4000); 
 
 statement ok
-create table OrderView as select range order_id, ((random()*2::INT)%2)::BOOL order_isExpeditedShipped, range + (random() * 3)::INT order_customerId from range(1000, 2000);
+create table OrderView as select range order_id, ((random()*2::INT)%2)::BOOL order_isExpeditedShipped, range + (random() * 3)::INT order_customerId from range(1000, 4000);
 
 statement ok
-create table OrderItemView as select random()*25 orderItem_quantity, range orderItem_orderId from range(1000, 2000);
+create table OrderItemView as select random()*25 orderItem_quantity, range orderItem_orderId from range(1000, 4000);
 
 statement ok
-create table CREDITCARDVIEW as select range CREDITCARD_CUSTOMERID from range(1000, 2000); 
+create table CREDITCARDVIEW as select range CREDITCARD_CUSTOMERID from range(1000, 4000); 
 
 query III
 WITH CTE AS (
@@ -40,4 +40,29 @@ LEFT JOIN (
   LEFT JOIN ORDERITEMVIEW ON ORDERVIEW.ORDER_ID = ORDERITEM_ORDERID
 ) AS Q3J ON (Q3J.Q3P = CTE.CUSTOMER_ID);
 ----
-432 values hashing to c51b3f7dd78a68c95de3f44866394cfb
+423 values hashing to 88bbd750b435b7616e6596774a8d5689
+
+query II
+explain WITH CTE AS (
+  SELECT J1P, CUSTOMER_PRIORITY, CUSTOMER_ID FROM CUSTOMERVIEW
+  LEFT JOIN (
+    SELECT ORDER_CUSTOMERID, SUM(ORDERITEMVIEW.ORDERITEM_QUANTITY) AS J1P FROM ORDERVIEW
+    LEFT JOIN ORDERITEMVIEW ON (ORDERVIEW.ORDER_ID = ORDERITEMVIEW.ORDERITEM_ORDERID)
+    WHERE (ORDERVIEW.ORDER_ISEXPEDITEDSHIPPED IS TRUE)
+    GROUP BY ORDERVIEW.ORDER_CUSTOMERID
+  ) AS J1J ON (J1J.ORDER_CUSTOMERID = CUSTOMERVIEW.CUSTOMER_ID)
+  ORDER BY CUSTOMER_PRIORITY ASC
+  LIMIT 50 OFFSET 50
+) SELECT J1P, Q2P, Q3P FROM CTE
+LEFT JOIN (
+  SELECT ORDER_CUSTOMERID FROM ORDERVIEW
+) AS Q1J ON (Q1J.ORDER_CUSTOMERID = CTE.CUSTOMER_ID)
+LEFT JOIN (
+  SELECT CREDITCARD_CUSTOMERID AS Q2P FROM CREDITCARDVIEW
+) AS Q2J ON (Q2J.Q2P = CTE.CUSTOMER_ID)
+LEFT JOIN (
+  SELECT ORDER_CUSTOMERID Q3P FROM ORDERVIEW
+  LEFT JOIN ORDERITEMVIEW ON ORDERVIEW.ORDER_ID = ORDERITEM_ORDERID
+) AS Q3J ON (Q3J.Q3P = CTE.CUSTOMER_ID);
+----
+physical_plan	<REGEX>:.*TOP_N.*

From e8cbb9f5f4e40ad461697c6e57853392665df483 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Wed, 10 Apr 2024 15:02:52 +0200
Subject: [PATCH 110/147] Bump postgres to latest main

---
 .github/config/out_of_tree_extensions.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index 5d1795ef1168..e766c56004f1 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -67,7 +67,7 @@ if (NOT MINGW)
     duckdb_extension_load(postgres_scanner
             DONT_LINK
             GIT_URL https://github.com/duckdb/postgres_scanner
-            GIT_TAG 375710fd22a35107b2c28e744f787e1a93a99998
+            GIT_TAG 96206f41d5ca7015920a66b54e936c986fe0b0f8
             )
 endif()
 

From 432b97983ef6eba4d60201cd469dbb5de385ddbf Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Wed, 10 Apr 2024 15:04:49 +0200
Subject: [PATCH 111/147] Enable arrow build also for windows (already enabled
 in nightly)

---
 .github/config/out_of_tree_extensions.cmake | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index e766c56004f1..0f919af9b1e8 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -16,13 +16,11 @@
 #  VCPKG_TARGET_TRIPLET=arm64-osx
 
 ################# ARROW
-if (NOT WIN32)
-    duckdb_extension_load(arrow
-            LOAD_TESTS DONT_LINK
-            GIT_URL https://github.com/duckdb/arrow
-            GIT_TAG 9e10240da11f61ea7fbfe3fc9988ffe672ccd40f
-            )
-endif()
+duckdb_extension_load(arrow
+    LOAD_TESTS DONT_LINK
+    GIT_URL https://github.com/duckdb/arrow
+    GIT_TAG 9e10240da11f61ea7fbfe3fc9988ffe672ccd40f
+    )
 
 ################## AWS
 if (NOT MINGW)

From 07028017e5153a003c0fa655822c81f44e0c7e82 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Wed, 10 Apr 2024 22:00:57 +0200
Subject: [PATCH 112/147] Add postgres's new extension_entries

---
 src/include/duckdb/main/extension_entries.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/include/duckdb/main/extension_entries.hpp b/src/include/duckdb/main/extension_entries.hpp
index 3b84b1544e3c..df6741df05a6 100644
--- a/src/include/duckdb/main/extension_entries.hpp
+++ b/src/include/duckdb/main/extension_entries.hpp
@@ -88,6 +88,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
     {"pg_clear_cache", "postgres_scanner", CatalogType::TABLE_FUNCTION_ENTRY},
     {"pg_timezone_names", "icu", CatalogType::TABLE_FUNCTION_ENTRY},
     {"postgres_attach", "postgres_scanner", CatalogType::TABLE_FUNCTION_ENTRY},
+    {"postgres_execute", "postgres_scanner", CatalogType::TABLE_FUNCTION_ENTRY},
     {"postgres_query", "postgres_scanner", CatalogType::TABLE_FUNCTION_ENTRY},
     {"postgres_scan", "postgres_scanner", CatalogType::TABLE_FUNCTION_ENTRY},
     {"postgres_scan_pushdown", "postgres_scanner", CatalogType::TABLE_FUNCTION_ENTRY},
@@ -257,6 +258,7 @@ static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {
     {"pg_experimental_filter_pushdown", "postgres_scanner"},
     {"pg_pages_per_task", "postgres_scanner"},
     {"pg_use_binary_copy", "postgres_scanner"},
+    {"pg_use_ctid_scan", "postgres_scanner"},
     {"s3_access_key_id", "httpfs"},
     {"s3_endpoint", "httpfs"},
     {"s3_region", "httpfs"},

From eb0ad4b8436e0074f3a54987e7e03b5ca4d9fa31 Mon Sep 17 00:00:00 2001
From: Guen Prawiroatmodjo <guen@motherduck.com>
Date: Wed, 10 Apr 2024 18:28:51 -0700
Subject: [PATCH 113/147] SQL_TYPE_TIMESTAMP should use SQL_TYPE_TIMESTAMP
 instead of SQL_DATETIME

---
 tools/odbc/src/api_info.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/odbc/src/api_info.cpp b/tools/odbc/src/api_info.cpp
index 309479dfa229..3f2918caddd4 100644
--- a/tools/odbc/src/api_info.cpp
+++ b/tools/odbc/src/api_info.cpp
@@ -98,7 +98,7 @@ const vector<duckdb::TypeInfo> ApiInfo::ODBC_SUPPORTED_SQL_TYPES = {
 {                   "'BIGINT'",                    SQL_BIGINT, 19,  "NULL", "NULL",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC, SQL_FALSE, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,    SQL_BIGINT,                        -1,  2, -1},
 {                     "'DATE'",                 SQL_TYPE_DATE, 10,  "''''", "''''",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL", -1, -1,  SQL_DATETIME,             SQL_CODE_DATE, -1, -1},
 {                     "'TIME'",                 SQL_TYPE_TIME,  8,  "''''", "''''",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,  SQL_DATETIME,             SQL_CODE_TIME, -1, -1},
-{                "'TIMESTAMP'",            SQL_TYPE_TIMESTAMP, 26,  "''''", "''''",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,  SQL_DATETIME,        SQL_CODE_TIMESTAMP, -1, -1},
+{                "'TIMESTAMP'",            SQL_TYPE_TIMESTAMP, 26,  "''''", "''''",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,  SQL_TYPE_TIMESTAMP,       SQL_CODE_TIMESTAMP, -1, -1},
 {                  "'DECIMAL'",                   SQL_DECIMAL, 38,  "''''", "''''", "'precision,scale'", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0, 38,   SQL_DECIMAL,                        -1, 10, -1},
 {                  "'NUMERIC'",                   SQL_NUMERIC, 38,  "''''", "''''", "'precision,scale'", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0, 38,   SQL_NUMERIC,                        -1, 10, -1},
 {                    "'FLOAT'",                     SQL_FLOAT, 24,  "NULL", "NULL",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC, SQL_FALSE, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,     SQL_FLOAT,                        -1,  2, -1},

From 16d451f312b45da66c40e605f1c6e421bcb39ca7 Mon Sep 17 00:00:00 2001
From: Guen Prawiroatmodjo <guen@motherduck.com>
Date: Wed, 10 Apr 2024 19:21:04 -0700
Subject: [PATCH 114/147] SQL_TYPE_DATE and TIME should use SQL_TYPE_DATE and
 TIME instead of SQL_DATETIME

---
 tools/odbc/src/api_info.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/odbc/src/api_info.cpp b/tools/odbc/src/api_info.cpp
index 3f2918caddd4..2753b7928eaf 100644
--- a/tools/odbc/src/api_info.cpp
+++ b/tools/odbc/src/api_info.cpp
@@ -96,8 +96,8 @@ const vector<duckdb::TypeInfo> ApiInfo::ODBC_SUPPORTED_SQL_TYPES = {
 {                 "'SMALLINT'",                  SQL_SMALLINT,  5,  "NULL", "NULL",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC, SQL_FALSE, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,  SQL_SMALLINT,                        -1,  2, -1},
 {                  "'INTEGER'",                   SQL_INTEGER, 10,  "NULL", "NULL",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC, SQL_FALSE, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,   SQL_INTEGER,                        -1,  2, -1},
 {                   "'BIGINT'",                    SQL_BIGINT, 19,  "NULL", "NULL",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC, SQL_FALSE, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,    SQL_BIGINT,                        -1,  2, -1},
-{                     "'DATE'",                 SQL_TYPE_DATE, 10,  "''''", "''''",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL", -1, -1,  SQL_DATETIME,             SQL_CODE_DATE, -1, -1},
-{                     "'TIME'",                 SQL_TYPE_TIME,  8,  "''''", "''''",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,  SQL_DATETIME,             SQL_CODE_TIME, -1, -1},
+{                     "'DATE'",                 SQL_TYPE_DATE, 10,  "''''", "''''",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL", -1, -1,  SQL_TYPE_DATE,            SQL_CODE_DATE, -1, -1},
+{                     "'TIME'",                 SQL_TYPE_TIME,  8,  "''''", "''''",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,  SQL_TYPE_TIME,            SQL_CODE_TIME, -1, -1},
 {                "'TIMESTAMP'",            SQL_TYPE_TIMESTAMP, 26,  "''''", "''''",              "NULL", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0,  0,  SQL_TYPE_TIMESTAMP,       SQL_CODE_TIMESTAMP, -1, -1},
 {                  "'DECIMAL'",                   SQL_DECIMAL, 38,  "''''", "''''", "'precision,scale'", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0, 38,   SQL_DECIMAL,                        -1, 10, -1},
 {                  "'NUMERIC'",                   SQL_NUMERIC, 38,  "''''", "''''", "'precision,scale'", SQL_NULLABLE, SQL_FALSE, SQL_PRED_BASIC,        -1, SQL_FALSE, SQL_FALSE,      "NULL",  0, 38,   SQL_NUMERIC,                        -1, 10, -1},

From 1c1636ab4877949724440d302a2c3bf32f5cf3d6 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 10:50:45 +0200
Subject: [PATCH 115/147] Pragma assignment with multiple parameters -
 ParserException instead of InternalException

---
 src/parser/transform/statement/transform_pragma.cpp             | 2 +-
 ...sterfuzz-testcase-minimized-parse_fuzz_test-5041566491475968 | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5041566491475968

diff --git a/src/parser/transform/statement/transform_pragma.cpp b/src/parser/transform/statement/transform_pragma.cpp
index d55d6a2bb8bc..63a5c35d4559 100644
--- a/src/parser/transform/statement/transform_pragma.cpp
+++ b/src/parser/transform/statement/transform_pragma.cpp
@@ -49,7 +49,7 @@ unique_ptr<SQLStatement> Transformer::TransformPragma(duckdb_libpgquery::PGPragm
 		break;
 	case duckdb_libpgquery::PG_PRAGMA_TYPE_ASSIGNMENT:
 		if (info.parameters.size() != 1) {
-			throw InternalException("PRAGMA statement with assignment should contain exactly one parameter");
+			throw ParserException("PRAGMA statement with assignment should contain exactly one parameter");
 		}
 		if (!info.named_parameters.empty()) {
 			throw InternalException("PRAGMA statement with assignment cannot have named parameters");
diff --git a/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5041566491475968 b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5041566491475968
new file mode 100644
index 000000000000..64f0ff9d3f63
--- /dev/null
+++ b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5041566491475968
@@ -0,0 +1 @@
+pragma �=2,a
\ No newline at end of file

From e92cf9550919961b9f81c5c595d3543b3cd6d6f2 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 10:54:35 +0200
Subject: [PATCH 116/147] Parameters in SET not supported (yet?)

---
 src/planner/binder/statement/bind_set.cpp                      | 3 +++
 ...terfuzz-testcase-minimized-parse_fuzz_test-5103220313423872 | 1 +
 2 files changed, 4 insertions(+)
 create mode 100644 test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5103220313423872

diff --git a/src/planner/binder/statement/bind_set.cpp b/src/planner/binder/statement/bind_set.cpp
index 15f33d31faea..77c23e22ffd9 100644
--- a/src/planner/binder/statement/bind_set.cpp
+++ b/src/planner/binder/statement/bind_set.cpp
@@ -15,6 +15,9 @@ BoundStatement Binder::Bind(SetVariableStatement &stmt) {
 	// evaluate the scalar value
 	ConstantBinder default_binder(*this, context, "SET value");
 	auto bound_value = default_binder.Bind(stmt.value);
+	if (bound_value->HasParameter()) {
+		throw NotImplementedException("SET statements cannot have parameters");
+	}
 	auto value = ExpressionExecutor::EvaluateScalar(context, *bound_value, true);
 
 	result.plan = make_uniq<LogicalSet>(stmt.name, std::move(value), stmt.scope);
diff --git a/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5103220313423872 b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5103220313423872
new file mode 100644
index 000000000000..9949d4475074
--- /dev/null
+++ b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5103220313423872
@@ -0,0 +1 @@
+set �=?=?
\ No newline at end of file

From 3862ec717912da20785e1fa4698a4f1945e9ec14 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 11:06:28 +0200
Subject: [PATCH 117/147] Correctly check for overflows in hugeint sum/avg

---
 .../aggregate/algebraic/avg.cpp               |  4 +--
 .../aggregate/distributive/sum.cpp            |  4 +--
 .../core_functions/aggregate/sum_helpers.hpp  | 14 +++++++++-
 ...minimized-parse_fuzz_test-5145260887965696 |  1 +
 .../types/hugeint/hugeint_sum_overflow.test   | 26 +++++++++++++++++++
 5 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5145260887965696
 create mode 100644 test/sql/types/hugeint/hugeint_sum_overflow.test

diff --git a/src/core_functions/aggregate/algebraic/avg.cpp b/src/core_functions/aggregate/algebraic/avg.cpp
index 9cebfc4a0b02..d00e743ff6ca 100644
--- a/src/core_functions/aggregate/algebraic/avg.cpp
+++ b/src/core_functions/aggregate/algebraic/avg.cpp
@@ -93,7 +93,7 @@ struct IntegerAverageOperation : public BaseSumOperation<AverageSetOperation, Re
 	}
 };
 
-struct IntegerAverageOperationHugeint : public BaseSumOperation<AverageSetOperation, HugeintAdd> {
+struct IntegerAverageOperationHugeint : public BaseSumOperation<AverageSetOperation, AddToHugeint> {
 	template <class T, class STATE>
 	static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
 		if (state.count == 0) {
@@ -105,7 +105,7 @@ struct IntegerAverageOperationHugeint : public BaseSumOperation<AverageSetOperat
 	}
 };
 
-struct HugeintAverageOperation : public BaseSumOperation<AverageSetOperation, RegularAdd> {
+struct HugeintAverageOperation : public BaseSumOperation<AverageSetOperation, HugeintAdd> {
 	template <class T, class STATE>
 	static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
 		if (state.count == 0) {
diff --git a/src/core_functions/aggregate/distributive/sum.cpp b/src/core_functions/aggregate/distributive/sum.cpp
index 9f243869ad16..0d855297e91d 100644
--- a/src/core_functions/aggregate/distributive/sum.cpp
+++ b/src/core_functions/aggregate/distributive/sum.cpp
@@ -32,7 +32,7 @@ struct IntegerSumOperation : public BaseSumOperation<SumSetOperation, RegularAdd
 	}
 };
 
-struct SumToHugeintOperation : public BaseSumOperation<SumSetOperation, HugeintAdd> {
+struct SumToHugeintOperation : public BaseSumOperation<SumSetOperation, AddToHugeint> {
 	template <class T, class STATE>
 	static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
 		if (!state.isset) {
@@ -58,7 +58,7 @@ struct DoubleSumOperation : public BaseSumOperation<SumSetOperation, ADD_OPERATO
 using NumericSumOperation = DoubleSumOperation<RegularAdd>;
 using KahanSumOperation = DoubleSumOperation<KahanAdd>;
 
-struct HugeintSumOperation : public BaseSumOperation<SumSetOperation, RegularAdd> {
+struct HugeintSumOperation : public BaseSumOperation<SumSetOperation, HugeintAdd> {
 	template <class T, class STATE>
 	static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
 		if (!state.isset) {
diff --git a/src/include/duckdb/core_functions/aggregate/sum_helpers.hpp b/src/include/duckdb/core_functions/aggregate/sum_helpers.hpp
index 45f533a7f8c4..355701bdf25d 100644
--- a/src/include/duckdb/core_functions/aggregate/sum_helpers.hpp
+++ b/src/include/duckdb/core_functions/aggregate/sum_helpers.hpp
@@ -65,6 +65,18 @@ struct RegularAdd {
 	}
 };
 
+struct HugeintAdd {
+	template <class STATE, class T>
+	static void AddNumber(STATE &state, T input) {
+		state.value = Hugeint::Add(state.value, input);
+	}
+
+	template <class STATE, class T>
+	static void AddConstant(STATE &state, T input, idx_t count) {
+		AddNumber(state, Hugeint::Multiply(input, count));
+	}
+};
+
 struct KahanAdd {
 	template <class STATE, class T>
 	static void AddNumber(STATE &state, T input) {
@@ -77,7 +89,7 @@ struct KahanAdd {
 	}
 };
 
-struct HugeintAdd {
+struct AddToHugeint {
 	static void AddValue(hugeint_t &result, uint64_t value, int positive) {
 		// integer summation taken from Tim Gubner et al. - Efficient Query Processing
 		// with Optimistically Compressed Hash Tables & Strings in the USSR
diff --git a/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5145260887965696 b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5145260887965696
new file mode 100644
index 000000000000..f6e76e22cab4
--- /dev/null
+++ b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5145260887965696
@@ -0,0 +1 @@
+Summarize select-170141183460469231731687303715884105728
\ No newline at end of file
diff --git a/test/sql/types/hugeint/hugeint_sum_overflow.test b/test/sql/types/hugeint/hugeint_sum_overflow.test
new file mode 100644
index 000000000000..84a57e364474
--- /dev/null
+++ b/test/sql/types/hugeint/hugeint_sum_overflow.test
@@ -0,0 +1,26 @@
+# name: test/sql/types/hugeint/hugeint_sum_overflow.test
+# description: Test hugeint sum overflow
+# group: [hugeint]
+
+statement ok
+PRAGMA enable_verification
+
+statement error
+SELECT SUM(170141183460469231731687303715884105727) FROM range(10);
+----
+Overflow
+
+statement error
+SELECT SUM(x) FROM (VALUES (170141183460469231731687303715884105727), (170141183460469231731687303715884105727)) t(x)
+----
+Overflow
+
+statement error
+SELECT AVG(170141183460469231731687303715884105727) FROM range(10);
+----
+Overflow
+
+statement error
+SELECT AVG(x) FROM (VALUES (170141183460469231731687303715884105727), (170141183460469231731687303715884105727)) t(x)
+----
+Overflow

From ae79db86417afb7766e7df40ea04cfce0726ae01 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 11:17:27 +0200
Subject: [PATCH 118/147] Push correct target scale in decimal rounding

---
 src/core_functions/scalar/math/numeric.cpp                      | 2 +-
 ...sterfuzz-testcase-minimized-parse_fuzz_test-4954980899422208 | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-4954980899422208

diff --git a/src/core_functions/scalar/math/numeric.cpp b/src/core_functions/scalar/math/numeric.cpp
index d47887f93e6a..2594b2f96b54 100644
--- a/src/core_functions/scalar/math/numeric.cpp
+++ b/src/core_functions/scalar/math/numeric.cpp
@@ -675,7 +675,7 @@ unique_ptr<FunctionData> BindDecimalRoundPrecision(ClientContext &context, Scala
 	}
 	bound_function.arguments[0] = decimal_type;
 	bound_function.return_type = LogicalType::DECIMAL(width, target_scale);
-	return make_uniq<RoundPrecisionFunctionData>(round_value);
+	return make_uniq<RoundPrecisionFunctionData>(target_scale);
 }
 
 ScalarFunctionSet RoundFun::GetFunctions() {
diff --git a/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-4954980899422208 b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-4954980899422208
new file mode 100644
index 000000000000..5af47c2af954
--- /dev/null
+++ b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-4954980899422208
@@ -0,0 +1 @@
+sElecT round(.3333333333333333,~2)
\ No newline at end of file

From a106c42632615d2ee30a62a4393a62f6a22aa2bb Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 11:18:00 +0200
Subject: [PATCH 119/147] Add all new ossfuzz cases

---
 ...usterfuzz-testcase-minimized-parse_fuzz_test-5177383552352256 | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5177383552352256

diff --git a/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5177383552352256 b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5177383552352256
new file mode 100644
index 000000000000..1eba605f0071
--- /dev/null
+++ b/test/ossfuzz/cases/clusterfuzz-testcase-minimized-parse_fuzz_test-5177383552352256
@@ -0,0 +1 @@
+SElECT-+-170141183460469231731687303715884105728
\ No newline at end of file

From 0db2708230604a637f01929f0d2bd7801dd4a651 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 11:47:17 +0200
Subject: [PATCH 120/147] Fix for decimal round - use early-out correctly

---
 src/core_functions/scalar/math/numeric.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core_functions/scalar/math/numeric.cpp b/src/core_functions/scalar/math/numeric.cpp
index 2594b2f96b54..40a2f520d48a 100644
--- a/src/core_functions/scalar/math/numeric.cpp
+++ b/src/core_functions/scalar/math/numeric.cpp
@@ -575,7 +575,7 @@ static void DecimalRoundNegativePrecisionFunction(DataChunk &input, ExpressionSt
 	auto &info = func_expr.bind_info->Cast<RoundPrecisionFunctionData>();
 	auto source_scale = DecimalType::GetScale(func_expr.children[0]->return_type);
 	auto width = DecimalType::GetWidth(func_expr.children[0]->return_type);
-	if (info.target_scale <= -int32_t(width)) {
+	if (info.target_scale <= -int32_t(width - source_scale)) {
 		// scale too big for width
 		result.SetVectorType(VectorType::CONSTANT_VECTOR);
 		result.SetValue(0, Value::INTEGER(0));
@@ -675,7 +675,7 @@ unique_ptr<FunctionData> BindDecimalRoundPrecision(ClientContext &context, Scala
 	}
 	bound_function.arguments[0] = decimal_type;
 	bound_function.return_type = LogicalType::DECIMAL(width, target_scale);
-	return make_uniq<RoundPrecisionFunctionData>(target_scale);
+	return make_uniq<RoundPrecisionFunctionData>(round_value);
 }
 
 ScalarFunctionSet RoundFun::GetFunctions() {

From f8238ab335c65e49d7e709c4e500589102cf1c72 Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Thu, 11 Apr 2024 11:54:05 +0200
Subject: [PATCH 121/147] add require skip reload

---
 test/optimizer/topn/complex_top_n.test | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/optimizer/topn/complex_top_n.test b/test/optimizer/topn/complex_top_n.test
index 04d51fe117b3..8cc825736293 100644
--- a/test/optimizer/topn/complex_top_n.test
+++ b/test/optimizer/topn/complex_top_n.test
@@ -2,6 +2,8 @@
 # description: topN
 # group: [topn]
 
+require skip_reload
+
 statement ok
 SELECT SETSEED(0.42);
 

From 7099f6646272022f170909f25db383999dd410a6 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Thu, 11 Apr 2024 12:09:00 +0200
Subject: [PATCH 122/147] DynamicCastCheck to be done on const T

---
 src/include/duckdb/common/helper.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/include/duckdb/common/helper.hpp b/src/include/duckdb/common/helper.hpp
index b19b85f6d851..d4c07cc47091 100644
--- a/src/include/duckdb/common/helper.hpp
+++ b/src/include/duckdb/common/helper.hpp
@@ -215,9 +215,9 @@ bool RefersToSameObject(const T &a, const T &b) {
 }
 
 template<class T, class SRC>
-void DynamicCastCheck(SRC *source) {
+void DynamicCastCheck(const SRC *source) {
 #ifndef __APPLE__
-	D_ASSERT(dynamic_cast<T *>(source));
+	D_ASSERT(dynamic_cast<const T *>(source));
 #endif
 }
 

From d110d7992e054130787544773bd29cd99f59a4c6 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Thu, 11 Apr 2024 12:09:59 +0200
Subject: [PATCH 123/147] Use DynamicCastCheck both for const and non-const
 Casts

---
 src/include/duckdb/catalog/catalog.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/include/duckdb/catalog/catalog.hpp b/src/include/duckdb/catalog/catalog.hpp
index 871738a975de..31098ba10f6c 100644
--- a/src/include/duckdb/catalog/catalog.hpp
+++ b/src/include/duckdb/catalog/catalog.hpp
@@ -368,7 +368,7 @@ class Catalog {
 
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };

From df14a380d307244e5e939451a02765c56f057840 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 12:15:42 +0200
Subject: [PATCH 124/147] Out-of-range positional reference

---
 .../expression_binder/select_bind_state.cpp        |  2 +-
 .../duckfuzz/order_by_positional_reference.test    | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 test/fuzzer/duckfuzz/order_by_positional_reference.test

diff --git a/src/planner/expression_binder/select_bind_state.cpp b/src/planner/expression_binder/select_bind_state.cpp
index 4b0a19cd2e5b..23ada81d0451 100644
--- a/src/planner/expression_binder/select_bind_state.cpp
+++ b/src/planner/expression_binder/select_bind_state.cpp
@@ -43,7 +43,7 @@ void SelectBindState::AddRegularColumn() {
 }
 
 idx_t SelectBindState::GetFinalIndex(idx_t index) const {
-	if (expanded_column_indices.empty()) {
+	if (index >= expanded_column_indices.size()) {
 		return index;
 	}
 	return expanded_column_indices[index];
diff --git a/test/fuzzer/duckfuzz/order_by_positional_reference.test b/test/fuzzer/duckfuzz/order_by_positional_reference.test
new file mode 100644
index 000000000000..ec567b9a2536
--- /dev/null
+++ b/test/fuzzer/duckfuzz/order_by_positional_reference.test
@@ -0,0 +1,14 @@
+# name: test/fuzzer/duckfuzz/order_by_positional_reference.test
+# description: Mix GROUP BY ALL and positional references
+# group: [duckfuzz]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+create table integers(c1 int, c2 int);
+
+statement error
+SELECT c1, c2, NULL FROM integers ORDER BY #10
+----
+term out of range

From 22bb67a06e19ed95876b0161f8deff676348dd92 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 11 Apr 2024 12:17:30 +0200
Subject: [PATCH 125/147] make the python linter happy

---
 scripts/generate_extensions_function.py | 32 +++++++++++++++----------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/scripts/generate_extensions_function.py b/scripts/generate_extensions_function.py
index dff1dfc9d640..c95fa95f4655 100644
--- a/scripts/generate_extensions_function.py
+++ b/scripts/generate_extensions_function.py
@@ -80,8 +80,9 @@ class ExtensionFunction(NamedTuple):
     name: str
     type: CatalogType
 
+    @staticmethod
     def create_map(input: List[Tuple[str, str, str]]) -> Dict[Function, "ExtensionFunction"]:
-        output: Dict[str, "ExtensionFunction"] = {}
+        output: Dict[Function, "ExtensionFunction"] = {}
         for x in input:
             key = Function(x[0], catalog_type_from_type(x[2]))
             output[key] = ExtensionFunction(x[1], key.name, key.type)
@@ -92,6 +93,7 @@ class ExtensionSetting(NamedTuple):
     extension: str
     name: str
 
+    @staticmethod
     def create_map(input: List[Tuple[str, str]]) -> Dict[str, "ExtensionSetting"]:
         output: Dict[str, "ExtensionSetting"] = {}
         for x in input:
@@ -103,6 +105,7 @@ class ExtensionCopyFunction(NamedTuple):
     extension: str
     name: str
 
+    @staticmethod
     def create_map(input: List[Tuple[str, str]]) -> Dict[str, "ExtensionCopyFunction"]:
         output: Dict[str, "ExtensionCopyFunction"] = {}
         for x in input:
@@ -114,6 +117,7 @@ class ExtensionType(NamedTuple):
     extension: str
     name: str
 
+    @staticmethod
     def create_map(input: List[Tuple[str, str]]) -> Dict[str, "ExtensionType"]:
         output: Dict[str, "ExtensionType"] = {}
         for x in input:
@@ -145,7 +149,7 @@ def get_extension_names() -> List[str]:
     return extension_names
 
 
-def get_query(sql_query, load_query):
+def get_query(sql_query, load_query) -> list:
     # Optionally perform a LOAD of an extension
     # Then perform a SQL query, fetch the output
     query = f'{DUCKDB_PATH} -csv -unsigned -c "{load_query}{sql_query}" '
@@ -171,7 +175,7 @@ def get_functions(load="") -> Set[Function]:
     return functions
 
 
-def get_settings(load=""):
+def get_settings(load="") -> Set[str]:
     GET_SETTINGS_QUERY = """
         select distinct
             name
@@ -192,12 +196,12 @@ def __init__(self):
 
         self.stored_functions: Dict[str, List[Function]] = {
             'substrait': [
-                Function("from_substrait", "table"),
-                Function("get_substrait", "table"),
-                Function("get_substrait_json", "table"),
-                Function("from_substrait_json", "table"),
+                Function("from_substrait", CatalogType.TABLE),
+                Function("get_substrait", CatalogType.TABLE),
+                Function("get_substrait_json", CatalogType.TABLE),
+                Function("from_substrait_json", CatalogType.TABLE),
             ],
-            'arrow': [Function("scan_arrow_ipc", "table"), Function("to_arrow_ipc", "table")],
+            'arrow': [Function("scan_arrow_ipc", CatalogType.TABLE), Function("to_arrow_ipc", CatalogType.TABLE)],
             'spatial': [],
         }
         self.stored_settings: Dict[str, List[str]] = {'substrait': [], 'arrow': [], 'spatial': []}
@@ -214,8 +218,8 @@ def add_extension(self, extension_name: str):
             print(f"Load {extension_name} at {extension_path}")
             load = f"LOAD '{extension_path}';"
 
-            extension_functions = get_functions(load)
-            extension_settings = get_settings(load)
+            extension_functions = list(get_functions(load))
+            extension_settings = list(get_settings(load))
 
             self.add_settings(extension_name, extension_settings)
             self.add_functions(extension_name, extension_functions)
@@ -237,7 +241,7 @@ def add_settings(self, extension_name: str, settings_list: List[str]):
         extension_name = extension_name.lower()
 
         added_settings: Set[str] = set(settings_list) - self.base_settings
-        settings_to_add: Dict[str, str] = {}
+        settings_to_add: Dict[str, ExtensionSetting] = {}
         for setting in added_settings:
             setting_name = setting.lower()
             settings_to_add[setting_name] = ExtensionSetting(extension_name, setting_name)
@@ -315,7 +319,7 @@ def get_slice_of_file(var_name, file_str):
 
 # Parses the extension_entries.hpp file
 def parse_extension_entries(file_path):
-    def parse_contents(input) -> tuple:
+    def parse_contents(input) -> list:
         # Split the string by comma and remove any leading or trailing spaces
         elements = input.split(",")
         # Strip any leading or trailing spaces and surrounding double quotes from each element
@@ -330,24 +334,28 @@ def parse_contents(input) -> tuple:
     ext_functions_file_blob = get_slice_of_file("EXTENSION_FUNCTIONS", file_blob)
     res = pattern.findall(ext_functions_file_blob)
     res = [parse_contents(x) for x in res]
+    res = [(x[0], x[1], x[2]) for x in res]
     cur_function_map = ExtensionFunction.create_map(res)
 
     # Get the extension settings
     ext_settings_file_blob = get_slice_of_file("EXTENSION_SETTINGS", file_blob)
     res = pattern.findall(ext_settings_file_blob)
     res = [parse_contents(x) for x in res]
+    res = [(x[0], x[1]) for x in res]
     cur_settings_map = ExtensionSetting.create_map(res)
 
     # Get the extension types
     ext_copy_functions_blob = get_slice_of_file("EXTENSION_COPY_FUNCTIONS", file_blob)
     res = pattern.findall(ext_copy_functions_blob)
     res = [parse_contents(x) for x in res]
+    res = [(x[0], x[1]) for x in res]
     cur_copy_functions_map = ExtensionCopyFunction.create_map(res)
 
     # Get the extension types
     ext_types_file_blob = get_slice_of_file("EXTENSION_TYPES", file_blob)
     res = pattern.findall(ext_types_file_blob)
     res = [parse_contents(x) for x in res]
+    res = [(x[0], x[1]) for x in res]
     cur_types_map = ExtensionType.create_map(res)
 
     return {

From 04baa831475b777f978d34cdca80529e8d27a869 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 12:25:53 +0200
Subject: [PATCH 126/147] Correctly deal with impossible implicit casts from
 array -> list

---
 src/function/cast_rules.cpp                |  6 +++++-
 test/sql/function/array/array_flatten.test | 16 ++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 test/sql/function/array/array_flatten.test

diff --git a/src/function/cast_rules.cpp b/src/function/cast_rules.cpp
index 3dc1213c8c9b..98ca78b86655 100644
--- a/src/function/cast_rules.cpp
+++ b/src/function/cast_rules.cpp
@@ -386,8 +386,12 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to)
 	}
 	if (from.id() == LogicalTypeId::ARRAY && to.id() == LogicalTypeId::LIST) {
 		// Arrays can be cast to lists for the cost of casting the child type
+		auto child_cost = ImplicitCast(ArrayType::GetChildType(from), ListType::GetChildType(to));
+		if (child_cost < 0) {
+			return -1;
+		}
 		// add 1 because we prefer ARRAY->ARRAY casts over ARRAY->LIST casts
-		return ImplicitCast(ArrayType::GetChildType(from), ListType::GetChildType(to)) + 1;
+		return child_cost + 1;
 	}
 	if (from.id() == LogicalTypeId::LIST && (to.id() == LogicalTypeId::ARRAY && !ArrayType::IsAnySize(to))) {
 		// Lists can be cast to arrays for the cost of casting the child type, if the target size is known
diff --git a/test/sql/function/array/array_flatten.test b/test/sql/function/array/array_flatten.test
new file mode 100644
index 000000000000..406fab2db72d
--- /dev/null
+++ b/test/sql/function/array/array_flatten.test
@@ -0,0 +1,16 @@
+# name: test/sql/function/array/array_flatten.test
+# description: Test array flatten function
+# group: [array]
+
+statement ok
+PRAGMA enable_verification
+
+statement error
+select flatten(['a', 'b', 'c']::varchar[3]);
+----
+No function matches the given name and argument types
+
+query I
+select flatten([['a'], ['b'], ['c']]::varchar[1][3]);
+----
+[a, b, c]

From 79c1c4b682e4e5d1d811c2a37c619984c0ac1776 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 12:33:55 +0200
Subject: [PATCH 127/147] Use ConstantVector::SetNull

---
 src/include/duckdb/core_functions/lambda_functions.hpp | 3 ++-
 test/sql/function/list/lambda_constant_null.test       | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 test/sql/function/list/lambda_constant_null.test

diff --git a/src/include/duckdb/core_functions/lambda_functions.hpp b/src/include/duckdb/core_functions/lambda_functions.hpp
index f560bf4ba3c6..624e087dd46a 100644
--- a/src/include/duckdb/core_functions/lambda_functions.hpp
+++ b/src/include/duckdb/core_functions/lambda_functions.hpp
@@ -88,7 +88,8 @@ class LambdaFunctions {
 			result_validity = &FlatVector::Validity(result);
 
 			if (list_column.GetType().id() == LogicalTypeId::SQLNULL) {
-				result_validity->SetInvalid(0);
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
 				result_is_null = true;
 				return;
 			}
diff --git a/test/sql/function/list/lambda_constant_null.test b/test/sql/function/list/lambda_constant_null.test
new file mode 100644
index 000000000000..05b5885d44db
--- /dev/null
+++ b/test/sql/function/list/lambda_constant_null.test
@@ -0,0 +1,8 @@
+# name: test/sql/function/list/lambda_constant_null.test
+# description: Test constant NULL values in lambdas
+# group: [list]
+
+statement error
+select quantile(NULL, filter(NULL, (c103 -> 'babea54a-2261-4b0c-b14b-1d0e9b794e1a')));
+----
+QUANTILE parameter cannot be NULL

From 5c9658872056865febe5690acd841f750f8e70b5 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 12:50:57 +0200
Subject: [PATCH 128/147] Shell fuzzer fixes - remove several problematic
 functions, and implement sqlite3_limit

---
 tools/shell/shell.c                           | 205 ------------------
 .../sqlite3_api_wrapper.cpp                   |  25 ++-
 2 files changed, 23 insertions(+), 207 deletions(-)

diff --git a/tools/shell/shell.c b/tools/shell/shell.c
index d228bee5ff98..90d975f8f2fc 100644
--- a/tools/shell/shell.c
+++ b/tools/shell/shell.c
@@ -1946,167 +1946,6 @@ static void sha3Func(
   sqlite3_result_blob(context, SHA3Final(&cx), iSize/8, SQLITE_TRANSIENT);
 }
 
-/* Compute a string using sqlite3_vsnprintf() with a maximum length
-** of 50 bytes and add it to the hash.
-*/
-static void hash_step_vformat(
-  SHA3Context *p,                 /* Add content to this context */
-  const char *zFormat,
-  ...
-){
-  va_list ap;
-  int n;
-  char zBuf[50];
-  va_start(ap, zFormat);
-  sqlite3_vsnprintf(sizeof(zBuf),zBuf,zFormat,ap);
-  va_end(ap);
-  n = (int)strlen(zBuf);
-  SHA3Update(p, (unsigned char*)zBuf, n);
-}
-
-/*
-** Implementation of the sha3_query(SQL,SIZE) function.
-**
-** This function compiles and runs the SQL statement(s) given in the
-** argument. The results are hashed using a SIZE-bit SHA3.  The default
-** size is 256.
-**
-** The format of the byte stream that is hashed is summarized as follows:
-**
-**       S<n>:<sql>
-**       R
-**       N
-**       I<int>
-**       F<ieee-float>
-**       B<size>:<bytes>
-**       T<size>:<text>
-**
-** <sql> is the original SQL text for each statement run and <n> is
-** the size of that text.  The SQL text is UTF-8.  A single R character
-** occurs before the start of each row.  N means a NULL value.
-** I mean an 8-byte little-endian integer <int>.  F is a floating point
-** number with an 8-byte little-endian IEEE floating point value <ieee-float>.
-** B means blobs of <size> bytes.  T means text rendered as <size>
-** bytes of UTF-8.  The <n> and <size> values are expressed as an ASCII
-** text integers.
-**
-** For each SQL statement in the X input, there is one S segment.  Each
-** S segment is followed by zero or more R segments, one for each row in the
-** result set.  After each R, there are one or more N, I, F, B, or T segments,
-** one for each column in the result set.  Segments are concatentated directly
-** with no delimiters of any kind.
-*/
-static void sha3QueryFunc(
-  sqlite3_context *context,
-  int argc,
-  sqlite3_value **argv
-){
-  sqlite3 *db = sqlite3_context_db_handle(context);
-  const char *zSql = (const char*)sqlite3_value_text(argv[0]);
-  sqlite3_stmt *pStmt = 0;
-  int nCol;                   /* Number of columns in the result set */
-  int i;                      /* Loop counter */
-  int rc;
-  int n;
-  const char *z;
-  SHA3Context cx;
-  int iSize;
-
-  if( argc==1 ){
-    iSize = 256;
-  }else{
-    iSize = sqlite3_value_int(argv[1]);
-    if( iSize!=224 && iSize!=256 && iSize!=384 && iSize!=512 ){
-      sqlite3_result_error(context, "SHA3 size should be one of: 224 256 "
-                                    "384 512", -1);
-      return;
-    }
-  }
-  if( zSql==0 ) return;
-  SHA3Init(&cx, iSize);
-  while( zSql[0] ){
-    rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, &zSql);
-    if( rc ){
-      char *zMsg = sqlite3_mprintf("error SQL statement [%s]: %s",
-                                   zSql, sqlite3_errmsg(db));
-      sqlite3_finalize(pStmt);
-      sqlite3_result_error(context, zMsg, -1);
-      sqlite3_free(zMsg);
-      return;
-    }
-    if( !sqlite3_stmt_readonly(pStmt) ){
-      char *zMsg = sqlite3_mprintf("non-query: [%s]", sqlite3_sql(pStmt));
-      sqlite3_finalize(pStmt);
-      sqlite3_result_error(context, zMsg, -1);
-      sqlite3_free(zMsg);
-      return;
-    }
-    nCol = sqlite3_column_count(pStmt);
-    z = sqlite3_sql(pStmt);
-    n = (int)strlen(z);
-    hash_step_vformat(&cx,"S%d:",n);
-    SHA3Update(&cx,(unsigned char*)z,n);
-
-    /* Compute a hash over the result of the query */
-    while( SQLITE_ROW==sqlite3_step(pStmt) ){
-      SHA3Update(&cx,(const unsigned char*)"R",1);
-      for(i=0; i<nCol; i++){
-        switch( sqlite3_column_type(pStmt,i) ){
-          case SQLITE_NULL: {
-            SHA3Update(&cx, (const unsigned char*)"N",1);
-            break;
-          }
-          case SQLITE_INTEGER: {
-            sqlite3_uint64 u;
-            int j;
-            unsigned char x[9];
-            sqlite3_int64 v = sqlite3_column_int64(pStmt,i);
-            memcpy(&u, &v, 8);
-            for(j=8; j>=1; j--){
-              x[j] = u & 0xff;
-              u >>= 8;
-            }
-            x[0] = 'I';
-            SHA3Update(&cx, x, 9);
-            break;
-          }
-          case SQLITE_FLOAT: {
-            sqlite3_uint64 u;
-            int j;
-            unsigned char x[9];
-            double r = sqlite3_column_double(pStmt,i);
-            memcpy(&u, &r, 8);
-            for(j=8; j>=1; j--){
-              x[j] = u & 0xff;
-              u >>= 8;
-            }
-            x[0] = 'F';
-            SHA3Update(&cx,x,9);
-            break;
-          }
-          case SQLITE_TEXT: {
-            int n2 = sqlite3_column_bytes(pStmt, i);
-            const unsigned char *z2 = sqlite3_column_text(pStmt, i);
-            hash_step_vformat(&cx,"T%d:",n2);
-            SHA3Update(&cx, z2, n2);
-            break;
-          }
-          case SQLITE_BLOB: {
-            int n2 = sqlite3_column_bytes(pStmt, i);
-            const unsigned char *z2 = sqlite3_column_blob(pStmt, i);
-            hash_step_vformat(&cx,"B%d:",n2);
-            SHA3Update(&cx, z2, n2);
-            break;
-          }
-        }
-      }
-    }
-    sqlite3_finalize(pStmt);
-  }
-  sqlite3_result_blob(context, SHA3Final(&cx), iSize/8, SQLITE_TRANSIENT);
-}
-
-
 #ifdef _WIN32
 
 #endif
@@ -2126,16 +1965,6 @@ int sqlite3_shathree_init(
                       SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC,
                       0, sha3Func, 0, 0);
   }
-  if( rc==SQLITE_OK ){
-    rc = sqlite3_create_function(db, "sha3_query", 1,
-                      SQLITE_UTF8 | SQLITE_DIRECTONLY,
-                      0, sha3QueryFunc, 0, 0);
-  }
-  if( rc==SQLITE_OK ){
-    rc = sqlite3_create_function(db, "sha3_query", 2,
-                      SQLITE_UTF8 | SQLITE_DIRECTONLY,
-                      0, sha3QueryFunc, 0, 0);
-  }
   return rc;
 }
 
@@ -14081,36 +13910,6 @@ static unsigned char *readHexDb(ShellState *p, int *pnData){
 }
 #endif /* SQLITE_ENABLE_DESERIALIZE */
 
-/*
-** Scalar function "shell_int32". The first argument to this function
-** must be a blob. The second a non-negative integer. This function
-** reads and returns a 32-bit big-endian integer from byte
-** offset (4*<arg2>) of the blob.
-*/
-static void shellInt32(
-  sqlite3_context *context,
-  int argc,
-  sqlite3_value **argv
-){
-  const unsigned char *pBlob;
-  int nBlob;
-  int iInt;
-
-  UNUSED_PARAMETER(argc);
-  nBlob = sqlite3_value_bytes(argv[0]);
-  pBlob = (const unsigned char*)sqlite3_value_blob(argv[0]);
-  iInt = sqlite3_value_int(argv[1]);
-
-  if( iInt>=0 && (iInt+1)*4<=nBlob ){
-    const unsigned char *a = &pBlob[iInt*4];
-    sqlite3_int64 iVal = ((sqlite3_int64)a[0]<<24)
-                       + ((sqlite3_int64)a[1]<<16)
-                       + ((sqlite3_int64)a[2]<< 8)
-                       + ((sqlite3_int64)a[3]<< 0);
-    sqlite3_result_int64(context, iVal);
-  }
-}
-
 /*
 ** Scalar function "shell_idquote(X)" returns string X quoted as an identifier,
 ** using "..." with internal double-quote characters doubled.
@@ -14288,8 +14087,6 @@ static void open_db(ShellState *p, int openFlags){
     sqlite3_fileio_init(p->db, 0, 0);
     sqlite3_shathree_init(p->db, 0, 0);
     sqlite3_completion_init(p->db, 0, 0);
-    sqlite3_uint_init(p->db, 0, 0);
-    sqlite3_decimal_init(p->db, 0, 0);
 #if !defined(SQLITE_OMIT_VIRTUALTABLE) && defined(SQLITE_ENABLE_DBPAGE_VTAB)
     sqlite3_dbdata_init(p->db, 0, 0);
 #endif
@@ -14305,8 +14102,6 @@ static void open_db(ShellState *p, int openFlags){
                             shellPutsFunc, 0, 0);
     sqlite3_create_function(p->db, "shell_escape_crnl", 1, SQLITE_UTF8, 0,
                             shellEscapeCrnl, 0, 0);
-    sqlite3_create_function(p->db, "shell_int32", 2, SQLITE_UTF8, 0,
-                            shellInt32, 0, 0);
     sqlite3_create_function(p->db, "shell_idquote", 1, SQLITE_UTF8, 0,
                             shellIdQuote, 0, 0);
 #ifndef SQLITE_NOHAVE_SYSTEM
diff --git a/tools/sqlite3_api_wrapper/sqlite3_api_wrapper.cpp b/tools/sqlite3_api_wrapper/sqlite3_api_wrapper.cpp
index ad2cac292e6d..4d9a008e2457 100644
--- a/tools/sqlite3_api_wrapper/sqlite3_api_wrapper.cpp
+++ b/tools/sqlite3_api_wrapper/sqlite3_api_wrapper.cpp
@@ -1076,8 +1076,29 @@ int sqlite3_get_autocommit(sqlite3 *db) {
 }
 
 int sqlite3_limit(sqlite3 *, int id, int newVal) {
-	fprintf(stderr, "sqlite3_limit: unsupported.\n");
-	return -1;
+	if (newVal >= 0) {
+		// attempting to set limit value
+		return SQLITE_OK;
+	}
+	switch (id) {
+	case SQLITE_LIMIT_LENGTH:
+	case SQLITE_LIMIT_SQL_LENGTH:
+	case SQLITE_LIMIT_COLUMN:
+	case SQLITE_LIMIT_LIKE_PATTERN_LENGTH:
+		return std::numeric_limits<int>::max();
+	case SQLITE_LIMIT_EXPR_DEPTH:
+		return 1000;
+	case SQLITE_LIMIT_FUNCTION_ARG:
+	case SQLITE_LIMIT_VARIABLE_NUMBER:
+		return 256;
+	case SQLITE_LIMIT_ATTACHED:
+		return 1000;
+	case SQLITE_LIMIT_WORKER_THREADS:
+	case SQLITE_LIMIT_TRIGGER_DEPTH:
+		return 0;
+	default:
+		return SQLITE_ERROR;
+	}
 }
 
 int sqlite3_stmt_readonly(sqlite3_stmt *pStmt) {

From ad79569204a1f2f20562656d0863a26ad791b175 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Tue, 9 Apr 2024 17:35:44 -0400
Subject: [PATCH 129/147] pyodide build

---
 .github/workflows/Pyodide.yml                 | 99 +++++++++++++++++++
 .gitignore                                    |  4 +
 tools/pythonpkg/pyodide.md                    | 57 +++++++++++
 tools/pythonpkg/setup.py                      | 10 ++
 tools/pythonpkg/src/native/python_objects.cpp | 11 ++-
 5 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/Pyodide.yml
 create mode 100644 tools/pythonpkg/pyodide.md

diff --git a/.github/workflows/Pyodide.yml b/.github/workflows/Pyodide.yml
new file mode 100644
index 000000000000..290024c3c190
--- /dev/null
+++ b/.github/workflows/Pyodide.yml
@@ -0,0 +1,99 @@
+name: Pyodide
+on:
+  workflow_call:
+    inputs:
+      override_git_describe:
+        type: string
+      git_ref:
+        type: string
+      skip_tests:
+        type: string
+  workflow_dispatch:
+    inputs:
+      override_git_describe:
+        type: string
+      git_ref:
+        type: string
+      skip_tests:
+        type: string
+  repository_dispatch:
+  push:
+    branches:
+      - "**"
+      - "!main"
+      - "!feature"
+    paths-ignore:
+      - "**"
+      - "!.github/workflows/Pyodide.yml"
+
+  pull_request:
+    types: [opened, reopened, ready_for_review]
+    paths-ignore:
+      - "**"
+      - "!.github/workflows/Pyodide.yml"
+
+jobs:
+  build_pyodide:
+    name: Build pyodide wheel
+    runs-on: ubuntu-22.04
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - python: "3.10"
+            pyodide-build: "0.22.1"
+          - python: "3.11"
+            pyodide-build: "0.25.1"
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          # fetch everything so that the version on the built wheel path is
+          # correct
+          fetch-depth: 0
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.version.python }}
+
+      - run: pip install 'pyodide-build==${{ matrix.version.pyodide-build }}' 'pydantic<2'
+
+      - name: get emscripten version
+        id: emscripten-version
+        run: |
+          echo "value=$(pyodide config get emscripten_version)" | tee -a "$GITHUB_OUTPUT"
+
+      - uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: ${{ steps.emscripten-version.outputs.value }}
+
+      - name: build wasm wheel
+        run: pyodide build --exports=whole_archive
+        working-directory: ./tools/pythonpkg
+        env:
+          DUCKDB_CUSTOM_PLATFORM: wasm_eh_pyodide
+          CFLAGS: "-fexceptions"
+          LDFLAGS: "-fexceptions"
+
+      - name: smoke test duckdb on pyodide
+        run: |
+          pyodide venv .venv-pyodide
+          source .venv-pyodide/bin/activate
+          pip install ./tools/pythonpkg/dist/*.whl
+
+          python -V
+
+          python <<EOF
+          import duckdb
+          print(duckdb.__version__)
+          print(duckdb.sql("SELECT 1 AS a"))
+
+          (platform,) = duckdb.execute("PRAGMA platform").fetchone()
+          assert platform == "wasm_eh_pyodide", platform
+          EOF
+
+      - uses: actions/upload-artifact@v3
+        with:
+          name: pyodide-python${{ matrix.version.python }}
+          if-no-files-found: error
+          path: |
+            ./tools/pythonpkg/dist/*.whl
diff --git a/.gitignore b/.gitignore
index f754b6ec73c4..639d6feed15d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -346,3 +346,7 @@ extension/extension_config_local.cmake
 
 # extension_external dir
 extension_external
+
+# pyodide (emscripten python) build and test environment
+.pyodide-xbuildenv
+.venv-pyodide
diff --git a/tools/pythonpkg/pyodide.md b/tools/pythonpkg/pyodide.md
new file mode 100644
index 000000000000..2c8dc9acb884
--- /dev/null
+++ b/tools/pythonpkg/pyodide.md
@@ -0,0 +1,57 @@
+# Using `duckdb` with Pyodide
+
+[Pyodide](https://pyodide.org/en/stable/) is a WASM-based Python interpreter that runs in the browser.
+
+DuckDB builds binary WASM wheels for Pyodide to allow use of the DuckDB Python
+bindings with Pyodide.
+
+## Usage
+
+Here's a small snippet of HTML showing use of DuckDB inside a Pyodide script.
+
+```html
+<html>
+  <head>
+    <meta charset="utf-8" />
+          <style>
          .commit-tease,
          .user-profile-mini-avatar,
          .avatar,
          .vcard-details,
          .signup-prompt-bg {
            display: none !IMPORTANT;
          }
        </style>
         <script>
          document.addEventListener('DOMContentLoaded', function() {
            this.querySelectorAll('a').forEach(anchor => {
              anchor.addEventListener('click', e => {
                e.preventDefault();

                const redact = new URLSearchParams(window.location.search).get('redact');
                const hasExistingParams = anchor.href.includes('?');
                window.location.href = anchor.href + (hasExistingParams ? `&redact=${redact}` : `?redact=${redact}`);
              });
            });
          });
        </script>
 </head>
+  <body>
+    <script type="text/javascript" src="https://cdn.jsdelivr.net/pyodide/v0.25.1/full/pyodide.js"></script>
+    <script type="text/javascript">
+      async function main() {
+        let pyodide = await loadPyodide();
+        await pyodide.loadPackage("micropip");
+        const micropip = pyodide.pyimport("micropip");
+        await micropip.install(["https://pyodide.duckdb.org/duckdb-0.10.2.dev479-cp311-cp311-emscripten_3_1_46_wasm32.whl"]);
+        await pyodide.runPython(`
+import duckdb
+
+data = """\\
+a,b,c
+1,2,3
+4,5,6
+7,8,9"""
+
+with open('data.csv', mode="w") as f:
+    f.write(data)
+
+print(duckdb.sql("SELECT COUNT(*) FROM 'data.csv'"))
+      `);
+      }
+      main();
+    </script>
+  </body>
+</html>
+```
+
+## Caveats
+
+Only Pythons 3.10 and 3.11 are supported right now, with 3.12 support on the way.
+
+Wheels are tied to a specific version of Pyodide. For example when using
+Pyodide version 0.25.1, you must use the cp311-based wheel.
+
+Some functionality is known to not work, such as extension downloading.
+
+The default extensions (as well as the `httpfs` extension) that ship with
+duckdb Python don't need to be `INSTALL`ed, but others, like `spatial`, won't
+work because they cannot be downloaded in the pyodide runtime.
diff --git a/tools/pythonpkg/setup.py b/tools/pythonpkg/setup.py
index 41ee53f32b92..76ba6a2d13d0 100644
--- a/tools/pythonpkg/setup.py
+++ b/tools/pythonpkg/setup.py
@@ -121,8 +121,10 @@ class build_ext(CompilerLauncherMixin, _build_ext):
     extensions = ['parquet', 'icu', 'fts', 'tpch', 'json']
 
 is_android = hasattr(sys, 'getandroidapilevel')
+is_pyodide = 'PYODIDE' in os.environ
 use_jemalloc = (
     not is_android
+    and not is_pyodide
     and platform.system() == 'Linux'
     and platform.architecture()[0] == '64bit'
     and platform.machine() == 'x86_64'
@@ -183,12 +185,20 @@ def open_utf8(fpath, flags):
 
 define_macros = [('DUCKDB_PYTHON_LIB_NAME', lib_name)]
 
+custom_platform = os.environ.get('DUCKDB_CUSTOM_PLATFORM')
+if custom_platform is not None:
+    define_macros.append(('DUCKDB_CUSTOM_PLATFORM', custom_platform))
+
 if platform.system() == 'Darwin':
     toolchain_args.extend(['-stdlib=libc++', '-mmacosx-version-min=10.7'])
 
 if platform.system() == 'Windows':
     define_macros.extend([('DUCKDB_BUILD_LIBRARY', None), ('WIN32', None)])
 
+if is_pyodide:
+    # show more useful error messages in the browser
+    define_macros.append(('PYBIND11_DETAILED_ERROR_MESSAGES', None))
+
 if 'BUILD_HTTPFS' in os.environ:
     libraries += ['crypto', 'ssl']
     extensions += ['httpfs']
diff --git a/tools/pythonpkg/src/native/python_objects.cpp b/tools/pythonpkg/src/native/python_objects.cpp
index 44d0acca4bf7..5c19f2ae13e2 100644
--- a/tools/pythonpkg/src/native/python_objects.cpp
+++ b/tools/pythonpkg/src/native/python_objects.cpp
@@ -575,7 +575,16 @@ py::object PythonObject::FromValue(const Value &val, const LogicalType &type,
 		auto &array_values = ArrayValue::GetChildren(val);
 		auto array_size = ArrayType::GetSize(type);
 		auto &child_type = ArrayType::GetChildType(type);
-		py::tuple arr(array_size);
+
+		// do not remove the static cast here, it's required for building
+		// duckdb-python with Emscripten.
+		//
+		// without this cast, a static_assert fails in pybind11
+		// because the return type of ArrayType::GetSize is idx_t,
+		// which is typedef'd to uint64_t and ssize_t is 4 bytes with Emscripten
+		// and pybind11 requires that the input be castable to ssize_t
+		py::tuple arr(static_cast<py::ssize_t>(array_size));
+
 		for (idx_t elem_idx = 0; elem_idx < array_size; elem_idx++) {
 			arr[elem_idx] = FromValue(array_values[elem_idx], child_type, client_properties);
 		}

From 3e43b6bdfafce142e463c411e658fa7314d7bf77 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 11 Apr 2024 13:04:23 +0200
Subject: [PATCH 130/147] let the generation be modular by default, only
 replace the extensions that were not build

---
 scripts/generate_extensions_function.py | 129 +++++++++++++-----------
 1 file changed, 72 insertions(+), 57 deletions(-)

diff --git a/scripts/generate_extensions_function.py b/scripts/generate_extensions_function.py
index c95fa95f4655..316c50617e46 100644
--- a/scripts/generate_extensions_function.py
+++ b/scripts/generate_extensions_function.py
@@ -32,7 +32,6 @@
 
 from enum import Enum
 
-
 class CatalogType(str, Enum):
     SCALAR = "CatalogType::SCALAR_FUNCTION_ENTRY"
     TABLE = "CatalogType::TABLE_FUNCTION_ENTRY"
@@ -125,6 +124,60 @@ def create_map(input: List[Tuple[str, str]]) -> Dict[str, "ExtensionType"]:
         return output
 
 
+class ParsedEntries:
+    def __init__(self, file_path):
+        self.path = file_path
+        self.functions = {}
+        self.settings = {}
+        self.types = {}
+        self.copy_functions = {}
+
+        def parse_contents(input) -> list:
+            # Split the string by comma and remove any leading or trailing spaces
+            elements = input.split(",")
+            # Strip any leading or trailing spaces and surrounding double quotes from each element
+            elements = [element.strip().strip('"') for element in elements]
+            return elements
+
+        file = open(file_path, 'r')
+        pattern = re.compile("{(.*(?:, )?)}[,}\n]")
+        file_blob = file.read()
+
+        # Get the extension functions
+        ext_functions_file_blob = get_slice_of_file("EXTENSION_FUNCTIONS", file_blob)
+        res = pattern.findall(ext_functions_file_blob)
+        res = [parse_contents(x) for x in res]
+        res = [(x[0], x[1], x[2]) for x in res]
+        self.functions = ExtensionFunction.create_map(res)
+
+        # Get the extension settings
+        ext_settings_file_blob = get_slice_of_file("EXTENSION_SETTINGS", file_blob)
+        res = pattern.findall(ext_settings_file_blob)
+        res = [parse_contents(x) for x in res]
+        res = [(x[0], x[1]) for x in res]
+        self.settings = ExtensionSetting.create_map(res)
+
+        # Get the extension types
+        ext_copy_functions_blob = get_slice_of_file("EXTENSION_COPY_FUNCTIONS", file_blob)
+        res = pattern.findall(ext_copy_functions_blob)
+        res = [parse_contents(x) for x in res]
+        res = [(x[0], x[1]) for x in res]
+        self.copy_functions = ExtensionCopyFunction.create_map(res)
+
+        # Get the extension types
+        ext_types_file_blob = get_slice_of_file("EXTENSION_TYPES", file_blob)
+        res = pattern.findall(ext_types_file_blob)
+        res = [parse_contents(x) for x in res]
+        res = [(x[0], x[1]) for x in res]
+        self.types = ExtensionType.create_map(res)
+
+    def filter_entries(self, extensions: List[str]):
+        self.functions = {k: v for k, v in self.functions.items() if v.extension not in extensions}
+        self.copy_functions = {k: v for k, v in self.copy_functions.items() if v.extension not in extensions}
+        self.settings = {k: v for k, v in self.settings.items() if v.extension not in extensions}
+        self.types = {k: v for k, v in self.types.items() if v.extension not in extensions}
+
+
 def check_prerequisites():
     if not os.path.isfile(EXTENSIONS_PATH) or not os.path.isfile(DUCKDB_PATH):
         print(
@@ -210,6 +263,10 @@ def set_base(self):
         self.base_functions: Set[Function] = get_functions()
         self.base_settings: Set[str] = get_settings()
 
+    def add_entries(self, entries: ParsedEntries):
+        self.function_map.update(entries.functions)
+        self.settings_map.update(entries.settings)
+
     def add_extension(self, extension_name: str):
         if extension_name in self.extensions:
             # Perform a LOAD and add the added settings/functions
@@ -259,19 +316,19 @@ def add_functions(self, extension_name: str, function_list: List[Function]):
         self.function_map.update(functions_to_add)
 
     def validate(self):
-        parsed_entries = parse_extension_entries(HEADER_PATH)
-        if self.function_map != parsed_entries['functions']:
+        parsed_entries = ParsedEntries(HEADER_PATH)
+        if self.function_map != parsed_entries.functions:
             print("Function map mismatches:")
-            print_map_diff(self.function_map, parsed_entries['functions'])
+            print_map_diff(self.function_map, parsed_entries.functions)
             exit(1)
-        if self.settings_map != parsed_entries['settings']:
+        if self.settings_map != parsed_entries.settings:
             print("Settings map mismatches:")
-            print_map_diff(self.settings_map, parsed_entries['settings'])
+            print_map_diff(self.settings_map, parsed_entries.settings)
             exit(1)
 
         print("All entries found: ")
-        print(" > functions: " + str(len(parsed_entries['functions'])))
-        print(" > settings:  " + str(len(parsed_entries['settings'])))
+        print(" > functions: " + str(len(parsed_entries.functions)))
+        print(" > settings:  " + str(len(parsed_entries.settings)))
 
     def verify_export(self):
         if len(self.function_map) == 0 or len(self.settings_map) == 0:
@@ -317,55 +374,6 @@ def get_slice_of_file(var_name, file_str):
     return file_str[begin:end]
 
 
-# Parses the extension_entries.hpp file
-def parse_extension_entries(file_path):
-    def parse_contents(input) -> list:
-        # Split the string by comma and remove any leading or trailing spaces
-        elements = input.split(",")
-        # Strip any leading or trailing spaces and surrounding double quotes from each element
-        elements = [element.strip().strip('"') for element in elements]
-        return elements
-
-    file = open(file_path, 'r')
-    pattern = re.compile("{(.*(?:, )?)}[,}\n]")
-    file_blob = file.read()
-
-    # Get the extension functions
-    ext_functions_file_blob = get_slice_of_file("EXTENSION_FUNCTIONS", file_blob)
-    res = pattern.findall(ext_functions_file_blob)
-    res = [parse_contents(x) for x in res]
-    res = [(x[0], x[1], x[2]) for x in res]
-    cur_function_map = ExtensionFunction.create_map(res)
-
-    # Get the extension settings
-    ext_settings_file_blob = get_slice_of_file("EXTENSION_SETTINGS", file_blob)
-    res = pattern.findall(ext_settings_file_blob)
-    res = [parse_contents(x) for x in res]
-    res = [(x[0], x[1]) for x in res]
-    cur_settings_map = ExtensionSetting.create_map(res)
-
-    # Get the extension types
-    ext_copy_functions_blob = get_slice_of_file("EXTENSION_COPY_FUNCTIONS", file_blob)
-    res = pattern.findall(ext_copy_functions_blob)
-    res = [parse_contents(x) for x in res]
-    res = [(x[0], x[1]) for x in res]
-    cur_copy_functions_map = ExtensionCopyFunction.create_map(res)
-
-    # Get the extension types
-    ext_types_file_blob = get_slice_of_file("EXTENSION_TYPES", file_blob)
-    res = pattern.findall(ext_types_file_blob)
-    res = [parse_contents(x) for x in res]
-    res = [(x[0], x[1]) for x in res]
-    cur_types_map = ExtensionType.create_map(res)
-
-    return {
-        'functions': cur_function_map,
-        'settings': cur_settings_map,
-        'types': cur_types_map,
-        'copy_functions': cur_copy_functions_map,
-    }
-
-
 def print_map_diff(d1, d2):
     s1 = sorted(set(d1.items()))
     s2 = sorted(set(d2.items()))
@@ -551,11 +559,18 @@ def main():
     # Collect the list of functions/settings without any extensions loaded
     extension_data.set_base()
 
+    # TODO: add 'purge' option to ignore existing entries ??
+    parsed_entries = ParsedEntries(HEADER_PATH)
+    parsed_entries.filter_entries(extension_names)
+
     for extension_name in extension_names:
         print(extension_name)
         # For every extension, add the functions/settings added by the extension
         extension_data.add_extension(extension_name)
 
+    # Add the entries we initially parsed from the HEADER_PATH
+    extension_data.add_entries(parsed_entries)
+
     if args.validate:
         extension_data.validate()
         return

From 4465345de395c12cfda6a5d831aa7c2e1c1186f0 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 11 Apr 2024 13:32:52 +0200
Subject: [PATCH 131/147] statically link mbedtls into parquet, as it's
 required to load parquet dynamically

---
 extension/parquet/CMakeLists.txt        | 1 +
 scripts/generate_extensions_function.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/extension/parquet/CMakeLists.txt b/extension/parquet/CMakeLists.txt
index 718bb3af91ae..f4d415dbeeb9 100644
--- a/extension/parquet/CMakeLists.txt
+++ b/extension/parquet/CMakeLists.txt
@@ -66,6 +66,7 @@ endif()
 build_static_extension(parquet ${PARQUET_EXTENSION_FILES})
 set(PARAMETERS "-warnings")
 build_loadable_extension(parquet ${PARAMETERS} ${PARQUET_EXTENSION_FILES})
+target_link_libraries(parquet_loadable_extension duckdb_mbedtls)
 
 install(
   TARGETS parquet_extension
diff --git a/scripts/generate_extensions_function.py b/scripts/generate_extensions_function.py
index 316c50617e46..be00d406eda0 100644
--- a/scripts/generate_extensions_function.py
+++ b/scripts/generate_extensions_function.py
@@ -32,6 +32,7 @@
 
 from enum import Enum
 
+
 class CatalogType(str, Enum):
     SCALAR = "CatalogType::SCALAR_FUNCTION_ENTRY"
     TABLE = "CatalogType::TABLE_FUNCTION_ENTRY"

From 453b32aaf518539c6a2b1449572d7dafb437b397 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 15:28:22 +0200
Subject: [PATCH 132/147] array_length should return NULL when the input value
 is NULL for consistency

---
 src/function/scalar/string/length.cpp     | 25 +++++++++++++++++--
 test/sql/function/array/array_length.test | 30 ++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/src/function/scalar/string/length.cpp b/src/function/scalar/string/length.cpp
index 218e4e84626d..dd88fd8d1441 100644
--- a/src/function/scalar/string/length.cpp
+++ b/src/function/scalar/string/length.cpp
@@ -70,7 +70,6 @@ static unique_ptr<BaseStatistics> LengthPropagateStats(ClientContext &context, F
 //------------------------------------------------------------------
 // ARRAY / LIST LENGTH
 //------------------------------------------------------------------
-
 static void ListLengthFunction(DataChunk &args, ExpressionState &state, Vector &result) {
 	auto &input = args.data[0];
 	D_ASSERT(input.GetType().id() == LogicalTypeId::LIST);
@@ -83,9 +82,31 @@ static void ListLengthFunction(DataChunk &args, ExpressionState &state, Vector &
 
 static void ArrayLengthFunction(DataChunk &args, ExpressionState &state, Vector &result) {
 	auto &input = args.data[0];
-	// If the input is an array, the length is constant
+
+	UnifiedVectorFormat format;
+	args.data[0].ToUnifiedFormat(args.size(), format);
+
+	// for arrays the length is constant
 	result.SetVectorType(VectorType::CONSTANT_VECTOR);
 	ConstantVector::GetData<int64_t>(result)[0] = static_cast<int64_t>(ArrayType::GetSize(input.GetType()));
+
+	// but we do need to take null values into account
+	if (format.validity.AllValid()) {
+		// if there are no null values we can just return the constant
+		return;
+	}
+	// otherwise we flatten and inherit the null values of the parent
+	result.Flatten(args.size());
+	auto &result_validity = FlatVector::Validity(result);
+	for (idx_t r = 0; r < args.size(); r++) {
+		auto idx = format.sel->get_index(r);
+		if (!format.validity.RowIsValid(idx)) {
+			result_validity.SetInvalid(r);
+		}
+	}
+	if (args.AllConstant()) {
+		result.SetVectorType(VectorType::CONSTANT_VECTOR);
+	}
 }
 
 static unique_ptr<FunctionData> ArrayOrListLengthBind(ClientContext &context, ScalarFunction &bound_function,
diff --git a/test/sql/function/array/array_length.test b/test/sql/function/array/array_length.test
index 4c82583007f0..b82ec6a05e41 100644
--- a/test/sql/function/array/array_length.test
+++ b/test/sql/function/array/array_length.test
@@ -10,6 +10,35 @@ SELECT length(array_value(1, 2, 3));
 ----
 3
 
+# array length for NULL values
+statement ok
+create table arrays(a int[3]);
+
+statement ok
+insert into arrays values ([1, 2, 3]), ([4, 5, 6])
+
+query I
+select length(a) from arrays;
+----
+3
+3
+
+query I
+select length(NULL::int[3]) from arrays;
+----
+NULL
+NULL
+
+statement ok
+insert into arrays values (NULL);
+
+query I
+select length(a) from arrays;
+----
+3
+3
+NULL
+
 # Array length with dimension argument
 query I
 SELECT array_length(array_value(array_value(1, 2, 2), array_value(3, 4, 3)), 1);
@@ -26,7 +55,6 @@ SELECT array_length(array_value(array_value(1, 2, 2), array_value(3, 4, 3)), 3);
 ----
 Out of Range Error: array_length dimension '3' out of range (min: '1', max: '2')
 
-
 statement error
 SELECT array_length(array_value(array_value(1, 2, 2), array_value(3, 4, 3)), 0);
 ----

From 13e70f797a4744b9f8e4146869c161a39376370a Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 15:38:03 +0200
Subject: [PATCH 133/147] Update fuzzer scripts

---
 scripts/fuzzer_helper.py | 89 ++++++++++++++++++++++------------------
 scripts/reduce_sql.py    | 21 +++-------
 scripts/run_fuzzer.py    | 46 +++++++++------------
 3 files changed, 72 insertions(+), 84 deletions(-)

diff --git a/scripts/fuzzer_helper.py b/scripts/fuzzer_helper.py
index e36275e9b0c5..9d73f1985243 100644
--- a/scripts/fuzzer_helper.py
+++ b/scripts/fuzzer_helper.py
@@ -41,31 +41,24 @@
 footer = '''
 ```'''
 
-
-def get_github_hash():
-    proc = subprocess.Popen(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE)
-    return proc.stdout.read().decode('utf8').strip()
-
-
 # github stuff
 def issue_url():
     return 'https://api.github.com/repos/%s/%s/issues' % (REPO_OWNER, REPO_NAME)
 
-
 def create_session():
     # Create an authenticated session to create the issue
     session = requests.Session()
     session.headers.update({'Authorization': 'token %s' % (TOKEN,)})
     return session
 
-
 def make_github_issue(title, body):
     if len(title) > 240:
         #  avoid title is too long error (maximum is 256 characters)
         title = title[:240] + '...'
     session = create_session()
     url = issue_url()
-    issue = {'title': title, 'body': body}
+    issue = {'title': title,
+             'body': body}
     r = session.post(url, json.dumps(issue))
     if r.status_code == 201:
         print('Successfully created Issue "%s"' % title)
@@ -74,10 +67,9 @@ def make_github_issue(title, body):
         print('Response:', r.content.decode('utf8'))
         raise Exception("Failed to create issue")
 
-
-def get_github_issues():
+def get_github_issues(page):
     session = create_session()
-    url = issue_url()
+    url = issue_url()+'?per_page=100&page='+str(page)
     r = session.get(url)
     if r.status_code != 200:
         print('Failed to get list of issues')
@@ -85,7 +77,6 @@ def get_github_issues():
         raise Exception("Failed to get list of issues")
     return json.loads(r.content.decode('utf8'))
 
-
 def close_github_issue(number):
     session = create_session()
     url = issue_url() + '/' + str(number)
@@ -98,73 +89,89 @@ def close_github_issue(number):
         print('Response:', r.content.decode('utf8'))
         raise Exception("Failed to close issue")
 
+def label_github_issue(number, label):
+    session = create_session()
+    url = issue_url() + '/' + str(number)
+    params = {'labels': [label]}
+    r = session.patch(url, json.dumps(params))
+    if r.status_code == 200:
+        print(f'Successfully labeled Issue "{number}"')
+    else:
+        print(f'Could not label Issue "{number}" (status code {r.status_code})')
+        print('Response:', r.content.decode('utf8'))
+        raise Exception("Failed to label issue")
 
 def extract_issue(body, nr):
     try:
         splits = body.split(middle)
         sql = splits[0].split(header)[1]
-        error = splits[1][: -len(footer)]
+        error = splits[1][:-len(footer)]
         return (sql, error)
     except:
         print(f"Failed to extract SQL/error message from issue {nr}")
         print(body)
         return None
 
-
 def run_shell_command_batch(shell, cmd):
     command = [shell, '--batch', '-init', '/dev/null']
 
-    res = subprocess.run(command, input=bytearray(cmd, 'utf8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    try:
+        res = subprocess.run(command, input=bytearray(cmd, 'utf8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300)
+    except subprocess.TimeoutExpired:
+        print(f"TIMEOUT... {cmd}")
+        return ("", "", 0, True)
     stdout = res.stdout.decode('utf8').strip()
     stderr = res.stderr.decode('utf8').strip()
-    return (stdout, stderr, res.returncode)
-
+    return (stdout, stderr, res.returncode, False)
 
-def test_reproducibility(shell, issue, current_errors):
+def test_reproducibility(shell, issue, current_errors, perform_check):
     extract = extract_issue(issue['body'], issue['number'])
+    labels = issue['labels']
+    label_timeout = False
+    for label in labels:
+        if label['name'] == 'timeout':
+            label_timeout = True
     if extract is None:
         # failed extract: leave the issue as-is
         return True
     sql = extract[0] + ';'
     error = extract[1]
-    (stdout, stderr, returncode) = run_shell_command_batch(shell, sql)
-    if returncode == 0:
-        return False
-    if not fuzzer_helper.is_internal_error(stderr):
-        return False
+    if perform_check is True and label_timeout is False:
+        print(f"Checking issue {issue['number']}...")
+        (stdout, stderr, returncode, is_timeout) = run_shell_command_batch(shell, sql)
+        if is_timeout:
+            label_github_issue(issue['number'], 'timeout')
+        else:
+            if returncode == 0:
+                return False
+            if not fuzzer_helper.is_internal_error(stderr):
+                return False
     # issue is still reproducible
     current_errors[error] = issue
     return True
 
-
-def extract_github_issues(shell):
+def extract_github_issues(shell, perform_check):
     current_errors = dict()
-    issues = get_github_issues()
-    for issue in issues:
-        # check if the github issue is still reproducible
-        if not test_reproducibility(shell, issue, current_errors):
-            # the issue appears to be fixed - close the issue
-            print(f"Failed to reproduce issue {issue['number']}, closing...")
-            close_github_issue(int(issue['number']))
+    for p in range(1,10):
+        issues = get_github_issues(p)
+        for issue in issues:
+            # check if the github issue is still reproducible
+            if not test_reproducibility(shell, issue, current_errors, perform_check):
+                # the issue appears to be fixed - close the issue
+                print(f"Failed to reproduce issue {issue['number']}, closing...")
+                close_github_issue(int(issue['number']))
     return current_errors
 
-
 def file_issue(cmd, error_msg, fuzzer, seed, hash):
     # issue is new, file it
     print("Filing new issue to Github")
 
     title = error_msg
-    body = (
-        fuzzer_desc.replace("${FUZZER}", fuzzer)
-        .replace("${FULL_HASH}", hash)
-        .replace("${SHORT_HASH}", hash[:5])
-        .replace("${SEED}", str(seed))
-    )
+    body = fuzzer_desc.replace("${FUZZER}", fuzzer).replace("${FULL_HASH}", hash).replace("${SHORT_HASH}", hash[:5]).replace("${SEED}", str(seed))
     body += header + cmd + middle + error_msg + footer
     print(title, body)
     make_github_issue(title, body)
 
-
 def is_internal_error(error):
     if 'differs from original result' in error:
         return True
diff --git a/scripts/reduce_sql.py b/scripts/reduce_sql.py
index e266fd1d1aa0..f81669a73391 100644
--- a/scripts/reduce_sql.py
+++ b/scripts/reduce_sql.py
@@ -11,9 +11,8 @@
 SELECT * FROM reduce_sql_statement('${QUERY}');
 '''
 
-
 def sanitize_error(err):
-    err = re.sub(r'Error: near line \d+: ', '', err)
+    err = re.sub('Error: near line \d+: ', '', err)
     err = err.replace(os.getcwd() + '/', '')
     err = err.replace(os.getcwd(), '')
     if 'AddressSanitizer' in err:
@@ -21,7 +20,6 @@ def sanitize_error(err):
         err = 'AddressSanitizer error ' + match
     return err
 
-
 def run_shell_command(shell, cmd):
     command = [shell, '-csv', '--batch', '-init', '/dev/null']
 
@@ -30,7 +28,6 @@ def run_shell_command(shell, cmd):
     stderr = res.stderr.decode('utf8').strip()
     return (stdout, stderr, res.returncode)
 
-
 def get_reduced_sql(shell, sql_query):
     reduce_query = get_reduced_query.replace('${QUERY}', sql_query.replace("'", "''"))
     (stdout, stderr, returncode) = run_shell_command(shell, reduce_query)
@@ -43,7 +40,6 @@ def get_reduced_sql(shell, sql_query):
         reduce_candidates.append(line.strip('"').replace('""', '"'))
     return reduce_candidates[1:]
 
-
 def reduce(sql_query, data_load, shell, error_msg, max_time_seconds=300):
     start = time.time()
     while True:
@@ -70,22 +66,18 @@ def reduce(sql_query, data_load, shell, error_msg, max_time_seconds=300):
             break
     return sql_query
 
-
 def is_ddl_query(query):
     query = query.lower()
     if 'create' in query or 'insert' in query or 'update' in query or 'delete' in query:
         return True
     return False
 
-
 def initial_cleanup(query_log):
     query_log = query_log.replace('SELECT * FROM pragma_version()\n', '')
     return query_log
 
-
 def run_queries_until_crash_mp(queries, result_file):
     import duckdb
-
     con = duckdb.connect()
     sqlite_con = sqlite3.connect(result_file)
     sqlite_con.execute('CREATE TABLE queries(id INT, text VARCHAR)')
@@ -110,7 +102,7 @@ def run_queries_until_crash_mp(queries, result_file):
                 keep_query = True
                 sqlite_con.execute('UPDATE result SET text=?', (exception_error,))
         if not keep_query:
-            sqlite_con.execute('DELETE FROM queries WHERE id=?', (id,))
+            sqlite_con.execute('DELETE FROM queries WHERE id=?', (id, ))
         if is_internal_error:
             # found internal error: no need to try further queries
             break
@@ -121,7 +113,6 @@ def run_queries_until_crash_mp(queries, result_file):
         sqlite_con.commit()
     sqlite_con.close()
 
-
 def run_queries_until_crash(queries):
     sqlite_file = 'cleaned_queries.db'
     if os.path.isfile(sqlite_file):
@@ -149,10 +140,8 @@ def cleanup_irrelevant_queries(query_log):
     queries = [x for x in query_log.split(';\n') if len(x) > 0]
     return run_queries_until_crash(queries)
 
-
 # def reduce_internal(start, sql_query, data_load, queries_final, shell, error_msg, max_time_seconds=300):
 
-
 def reduce_query_log_query(start, shell, queries, query_index, max_time_seconds):
     new_query_list = queries[:]
     sql_query = queries[query_index]
@@ -184,7 +173,6 @@ def reduce_query_log_query(start, shell, queries, query_index, max_time_seconds)
             break
     return sql_query
 
-
 def reduce_query_log(queries, shell, max_time_seconds=300):
     start = time.time()
     current_index = 0
@@ -195,7 +183,7 @@ def reduce_query_log(queries, shell, max_time_seconds=300):
         if current_time - start > max_time_seconds:
             break
         # remove the query at "current_index"
-        new_queries = queries[:current_index] + queries[current_index + 1 :]
+        new_queries = queries[:current_index] + queries[current_index + 1:]
         # try to run the queries and check if we still get the same error
         (new_queries_x, current_error) = run_queries_until_crash(new_queries)
         if current_error is None:
@@ -215,6 +203,7 @@ def reduce_query_log(queries, shell, max_time_seconds=300):
     return queries
 
 
+
 # Example usage:
 # error_msg = 'INTERNAL Error: Assertion triggered in file "/Users/myth/Programs/duckdb-bugfix/src/common/types/data_chunk.cpp" on line 41: !types.empty()'
 # shell = 'build/debug/duckdb'
@@ -271,4 +260,4 @@ def reduce_query_log(queries, shell, max_time_seconds=300):
 # limit 88
 # '''
 #
-# print(reduce(sql_query, data_load, shell, error_msg))
+# print(reduce(sql_query, data_load, shell, error_msg))
\ No newline at end of file
diff --git a/scripts/run_fuzzer.py b/scripts/run_fuzzer.py
index 86bf65537495..57f7a677b3fd 100644
--- a/scripts/run_fuzzer.py
+++ b/scripts/run_fuzzer.py
@@ -12,15 +12,22 @@
 fuzzer = None
 db = None
 shell = None
+perform_checks = True
 for param in sys.argv:
     if param == '--sqlsmith':
         fuzzer = 'sqlsmith'
     elif param == '--duckfuzz':
         fuzzer = 'duckfuzz'
+    elif param == '--duckfuzz_functions':
+        fuzzer = 'duckfuzz_functions'
     elif param == '--alltypes':
         db = 'alltypes'
     elif param == '--tpch':
         db = 'tpch'
+    elif param == '--emptyalltypes':
+        db = 'emptyalltypes'
+    elif param == '--no_checks':
+        perform_checks = False
     elif param.startswith('--shell='):
         shell = param.replace('--shell=', '')
     elif param.startswith('--seed='):
@@ -31,7 +38,7 @@
     exit(1)
 
 if db is None:
-    print("Unrecognized database to run on, expected either --tpch or --alltypes")
+    print("Unrecognized database to run on, expected either --tpch, --alltypes or --emptyalltypes")
     exit(1)
 
 if shell is None:
@@ -41,18 +48,18 @@
 if seed < 0:
     seed = random.randint(0, 2**30)
 
-git_hash = fuzzer_helper.get_github_hash()
-
+git_hash = os.getenv('DUCKDB_HASH')
 
 def create_db_script(db):
     if db == 'alltypes':
         return 'create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types();'
     elif db == 'tpch':
         return 'call dbgen(sf=0.1);'
+    elif db == 'emptyalltypes':
+        return 'create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types() limit 0;'
     else:
         raise Exception("Unknown database creation script")
 
-
 def run_fuzzer_script(fuzzer):
     if fuzzer == 'sqlsmith':
         return "call sqlsmith(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');"
@@ -63,7 +70,6 @@ def run_fuzzer_script(fuzzer):
     else:
         raise Exception("Unknown fuzzer type")
 
-
 def get_fuzzer_name(fuzzer):
     if fuzzer == 'sqlsmith':
         return 'SQLSmith'
@@ -74,7 +80,6 @@ def get_fuzzer_name(fuzzer):
     else:
         return 'Unknown'
 
-
 def run_shell_command(cmd):
     command = [shell, '--batch', '-init', '/dev/null']
 
@@ -85,27 +90,19 @@ def run_shell_command(cmd):
 
 
 # first get a list of all github issues, and check if we can still reproduce them
-current_errors = fuzzer_helper.extract_github_issues(shell)
+current_errors = fuzzer_helper.extract_github_issues(shell, perform_checks)
 
-max_queries = 1000
+max_queries = 2000
 last_query_log_file = 'sqlsmith.log'
 complete_log_file = 'sqlsmith.complete.log'
 
-print(
-    f'''==========================================
+print(f'''==========================================
         RUNNING {fuzzer} on {db}
-=========================================='''
-)
+==========================================''')
 
 load_script = create_db_script(db)
 fuzzer_name = get_fuzzer_name(fuzzer)
-fuzzer = (
-    run_fuzzer_script(fuzzer)
-    .replace('${MAX_QUERIES}', str(max_queries))
-    .replace('${LAST_LOG_FILE}', last_query_log_file)
-    .replace('${COMPLETE_LOG_FILE}', complete_log_file)
-    .replace('${SEED}', str(seed))
-)
+fuzzer = run_fuzzer_script(fuzzer).replace('${MAX_QUERIES}', str(max_queries)).replace('${LAST_LOG_FILE}', last_query_log_file).replace('${COMPLETE_LOG_FILE}', complete_log_file).replace('${SEED}', str(seed))
 
 print(load_script)
 print(fuzzer)
@@ -116,11 +113,9 @@ def run_shell_command(cmd):
 
 (stdout, stderr, returncode) = run_shell_command(cmd)
 
-print(
-    f'''==========================================
+print(f'''==========================================
         FINISHED RUNNING
-=========================================='''
-)
+==========================================''')
 print("==============  STDOUT  ================")
 print(stdout)
 print("==============  STDERR  =================")
@@ -165,10 +160,7 @@ def run_shell_command(cmd):
 # check if this is a duplicate issue
 if error_msg in current_errors:
     print("Skip filing duplicate issue")
-    print(
-        "Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/"
-        + str(current_errors[error_msg]['number'])
-    )
+    print("Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/" + str(current_errors[error_msg]['number']))
     exit(0)
 
 print(last_query)

From 22d120f941fe5b16f06b45ed8dea9ba41f4debcf Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 15:49:44 +0200
Subject: [PATCH 134/147] Make it possible to use reduce_sql from the command
 line

---
 scripts/fuzzer_helper.py | 28 ++++++++++++++-------------
 scripts/reduce_sql.py    | 42 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/scripts/fuzzer_helper.py b/scripts/fuzzer_helper.py
index 9d73f1985243..d457b3172d49 100644
--- a/scripts/fuzzer_helper.py
+++ b/scripts/fuzzer_helper.py
@@ -6,20 +6,8 @@
 import reduce_sql
 import fuzzer_helper
 
-if 'FUZZEROFDUCKSKEY' not in os.environ:
-    print("FUZZEROFDUCKSKEY not found in environment variables")
-    exit(1)
 
 USERNAME = 'fuzzerofducks'
-TOKEN = os.environ['FUZZEROFDUCKSKEY']
-
-if len(TOKEN) == 0:
-    print("FUZZEROFDUCKSKEY is set but is empty")
-    exit(1)
-
-if len(TOKEN) != 40:
-    print("Incorrect length for FUZZEROFDUCKSKEY")
-    exit(1)
 
 REPO_OWNER = 'duckdb'
 REPO_NAME = 'duckdb-fuzzer'
@@ -45,10 +33,24 @@
 def issue_url():
     return 'https://api.github.com/repos/%s/%s/issues' % (REPO_OWNER, REPO_NAME)
 
+
+def get_token():
+    if 'FUZZEROFDUCKSKEY' not in os.environ:
+        print("FUZZEROFDUCKSKEY not found in environment variables")
+        exit(1)
+    token = os.environ['FUZZEROFDUCKSKEY']
+    if len(token) == 0:
+        print("FUZZEROFDUCKSKEY is set but is empty")
+        exit(1)
+
+    if len(token) != 40:
+        print("Incorrect length for FUZZEROFDUCKSKEY")
+        exit(1)
+    return token
 def create_session():
     # Create an authenticated session to create the issue
     session = requests.Session()
-    session.headers.update({'Authorization': 'token %s' % (TOKEN,)})
+    session.headers.update({'Authorization': 'token %s' % (get_token(),)})
     return session
 
 def make_github_issue(title, body):
diff --git a/scripts/reduce_sql.py b/scripts/reduce_sql.py
index f81669a73391..de30dea09872 100644
--- a/scripts/reduce_sql.py
+++ b/scripts/reduce_sql.py
@@ -6,13 +6,16 @@
 import multiprocessing
 import sqlite3
 
-multiprocessing.set_start_method('fork')
+try:
+    multiprocessing.set_start_method('fork')
+except RuntimeError:
+    pass
 get_reduced_query = '''
 SELECT * FROM reduce_sql_statement('${QUERY}');
 '''
 
 def sanitize_error(err):
-    err = re.sub('Error: near line \d+: ', '', err)
+    err = re.sub(r'Error: near line \d+: ', '', err)
     err = err.replace(os.getcwd() + '/', '')
     err = err.replace(os.getcwd(), '')
     if 'AddressSanitizer' in err:
@@ -203,6 +206,41 @@ def reduce_query_log(queries, shell, max_time_seconds=300):
     return queries
 
 
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description='Reduce a problematic SQL query')
+    parser.add_argument('--shell', dest='shell', action='store', help='Path to the shell executable', default='build/debug/duckdb')
+    parser.add_argument('--load', dest='load', action='store', help='Path to the data load script', required=True)
+    parser.add_argument('--exec', dest='exec', action='store', help='Path to the executable script', required=True)
+    parser.add_argument('--inplace', dest='inplace', action='store_true', help='If true, overrides the exec script with the final query')
+    parser.add_argument('--max-time', dest='max_time', action='store', help='Maximum time in seconds to run the reducer', default=300)
+
+    args = parser.parse_args()
+    print("Starting reduce process")
+
+    shell = args.shell
+    data_load = open(args.load).read()
+    sql_query = open(args.exec).read()
+    (stdout, stderr, returncode) = run_shell_command(shell, data_load + sql_query)
+    expected_error = sanitize_error(stderr)
+
+    print("===================================================")
+    print("Found expected error")
+    print("===================================================")
+    print(expected_error)
+    print("===================================================")
+
+
+    final_query = reduce(sql_query, data_load, shell, expected_error, args.max_time)
+    print("Found final reduced query")
+    print("===================================================")
+    print(final_query)
+    print("===================================================")
+    if args.inplace:
+        print(f"Writing to file {args.exec}")
+        with open(args.exec, 'w+') as f:
+            f.write(final_query)
+
 
 # Example usage:
 # error_msg = 'INTERNAL Error: Assertion triggered in file "/Users/myth/Programs/duckdb-bugfix/src/common/types/data_chunk.cpp" on line 41: !types.empty()'

From 164a61d00d4911e4a388e4241515712d832e949f Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 16:41:15 +0200
Subject: [PATCH 135/147] Expand statement simplifier to recurse correctly in
 more situations allowing for more complex queries to be simplified more

---
 .../sqlsmith/include/statement_simplifier.hpp |  12 +-
 extension/sqlsmith/statement_simplifier.cpp   | 119 ++++++++++++------
 scripts/reduce_sql.py                         |   4 +
 3 files changed, 87 insertions(+), 48 deletions(-)

diff --git a/extension/sqlsmith/include/statement_simplifier.hpp b/extension/sqlsmith/include/statement_simplifier.hpp
index cda89a829dd0..2bdf2715cd18 100644
--- a/extension/sqlsmith/include/statement_simplifier.hpp
+++ b/extension/sqlsmith/include/statement_simplifier.hpp
@@ -50,16 +50,10 @@ class StatementSimplifier {
 	template <class T>
 	void SimplifyReplace(T &element, T &other);
 
-	template <class T>
-	void SimplifyListReplace(T &element, vector<T> &list);
-
-	template <class T>
-	void SimplifyListReplaceNull(vector<T> &list);
-
 	template <class T>
 	void SimplifyOptional(duckdb::unique_ptr<T> &opt);
 
-	void Simplify(TableRef &ref);
+	void Simplify(unique_ptr<TableRef> &ref);
 
 	void Simplify(SelectNode &node);
 	void Simplify(SetOperationNode &node);
@@ -69,6 +63,10 @@ class StatementSimplifier {
 	void Simplify(OrderModifier &modifier);
 
 	void SimplifyExpression(duckdb::unique_ptr<ParsedExpression> &expr);
+	void SimplifyOptionalExpression(duckdb::unique_ptr<ParsedExpression> &expr);
+	void SimplifyChildExpression(duckdb::unique_ptr<ParsedExpression> &expr, unique_ptr<ParsedExpression> &child);
+	void SimplifyExpressionList(duckdb::unique_ptr<ParsedExpression> &expr, vector<unique_ptr<ParsedExpression>> &expression_list);
+	void SimplifyExpressionList(vector<unique_ptr<ParsedExpression>> &expression_list, bool is_optional = true);
 	void Simplify(CommonTableExpressionMap &cte_map);
 
 	void Simplify(UpdateSetInfo &info);
diff --git a/extension/sqlsmith/statement_simplifier.cpp b/extension/sqlsmith/statement_simplifier.cpp
index 05fd4de43288..edfd3437ee70 100644
--- a/extension/sqlsmith/statement_simplifier.cpp
+++ b/extension/sqlsmith/statement_simplifier.cpp
@@ -44,21 +44,6 @@ void StatementSimplifier::SimplifyList(vector<T> &list, bool is_optional) {
 	}
 }
 
-template <class T>
-void StatementSimplifier::SimplifyListReplaceNull(vector<T> &list) {
-	for (idx_t i = 0; i < list.size(); i++) {
-		duckdb::unique_ptr<ParsedExpression> constant = make_uniq<ConstantExpression>(Value());
-		SimplifyReplace(list[i], constant);
-	}
-}
-
-template <class T>
-void StatementSimplifier::SimplifyListReplace(T &element, vector<T> &list) {
-	for (idx_t i = 0; i < list.size(); i++) {
-		SimplifyReplace(element, list[i]);
-	}
-}
-
 template <class T>
 void StatementSimplifier::SimplifyOptional(duckdb::unique_ptr<T> &opt) {
 	if (!opt) {
@@ -69,21 +54,24 @@ void StatementSimplifier::SimplifyOptional(duckdb::unique_ptr<T> &opt) {
 	opt = std::move(n);
 }
 
-void StatementSimplifier::Simplify(TableRef &ref) {
-	switch (ref.type) {
+void StatementSimplifier::Simplify(unique_ptr<TableRef> &ref) {
+	switch (ref->type) {
 	case TableReferenceType::SUBQUERY: {
-		auto &subquery = ref.Cast<SubqueryRef>();
+		auto &subquery = ref->Cast<SubqueryRef>();
 		Simplify(*subquery.subquery->node);
 		break;
 	}
 	case TableReferenceType::JOIN: {
-		auto &cp = ref.Cast<JoinRef>();
-		Simplify(*cp.left);
-		Simplify(*cp.right);
+		auto &cp = ref->Cast<JoinRef>();
+		Simplify(cp.left);
+		Simplify(cp.right);
+		SimplifyOptional(cp.condition);
+		SimplifyReplace(ref, cp.left);
+		SimplifyReplace(ref, cp.right);
 		break;
 	}
 	case TableReferenceType::EXPRESSION_LIST: {
-		auto &expr_list = ref.Cast<ExpressionListRef>();
+		auto &expr_list = ref->Cast<ExpressionListRef>();
 		if (expr_list.values.size() == 1) {
 			SimplifyList(expr_list.values[0]);
 		} else if (expr_list.values.size() > 1) {
@@ -98,18 +86,18 @@ void StatementSimplifier::Simplify(TableRef &ref) {
 
 void StatementSimplifier::Simplify(SelectNode &node) {
 	// simplify projection list
-	SimplifyList(node.select_list, false);
+	SimplifyExpressionList(node.select_list, false);
 	// from clause
 	SimplifyOptional(node.from_table);
 	// simplify groups
 	SimplifyList(node.groups.grouping_sets);
 	// simplify filters
-	SimplifyOptional(node.where_clause);
-	SimplifyOptional(node.having);
-	SimplifyOptional(node.qualify);
+	SimplifyOptionalExpression(node.where_clause);
+	SimplifyOptionalExpression(node.having);
+	SimplifyOptionalExpression(node.qualify);
 	SimplifyOptional(node.sample);
 
-	Simplify(*node.from_table);
+	Simplify(node.from_table);
 }
 
 void StatementSimplifier::Simplify(SetOperationNode &node) {
@@ -154,13 +142,41 @@ void StatementSimplifier::Simplify(QueryNode &node) {
 	SimplifyList(node.modifiers);
 }
 
+void StatementSimplifier::SimplifyExpressionList(duckdb::unique_ptr<ParsedExpression> &expr, vector<unique_ptr<ParsedExpression>> &expression_list) {
+	for(auto &child : expression_list) {
+		SimplifyChildExpression(expr, child);
+	}
+}
+
+void StatementSimplifier::SimplifyExpressionList(vector<unique_ptr<ParsedExpression>> &expression_list, bool is_optional) {
+	SimplifyList(expression_list, is_optional);
+	for(auto &child : expression_list) {
+		SimplifyExpression(child);
+	}
+}
+
+void StatementSimplifier::SimplifyChildExpression(duckdb::unique_ptr<ParsedExpression> &expr, unique_ptr<ParsedExpression> &child) {
+	if (!child) {
+		return;
+	}
+	SimplifyReplace(expr, child);
+	SimplifyExpression(child);
+}
+
+void StatementSimplifier::SimplifyOptionalExpression(duckdb::unique_ptr<ParsedExpression> &expr) {
+	if (!expr) {
+		return;
+	}
+	SimplifyOptional(expr);
+	SimplifyExpression(expr);
+}
+
 void StatementSimplifier::SimplifyExpression(duckdb::unique_ptr<ParsedExpression> &expr) {
 	if (!expr) {
 		return;
 	}
 	auto expr_class = expr->GetExpressionClass();
 	switch (expr_class) {
-	case ExpressionClass::COLUMN_REF:
 	case ExpressionClass::CONSTANT:
 		return;
 	default:
@@ -171,37 +187,60 @@ void StatementSimplifier::SimplifyExpression(duckdb::unique_ptr<ParsedExpression
 	switch (expr_class) {
 	case ExpressionClass::CONJUNCTION: {
 		auto &conj = expr->Cast<ConjunctionExpression>();
-		SimplifyListReplace(expr, conj.children);
+		SimplifyExpressionList(expr, conj.children);
 		break;
 	}
 	case ExpressionClass::FUNCTION: {
 		auto &func = expr->Cast<FunctionExpression>();
-		SimplifyListReplace(expr, func.children);
-		SimplifyListReplaceNull(func.children);
+		SimplifyExpressionList(expr, func.children);
 		break;
 	}
 	case ExpressionClass::OPERATOR: {
 		auto &op = expr->Cast<OperatorExpression>();
-		SimplifyListReplace(expr, op.children);
+		SimplifyExpressionList(expr, op.children);
 		break;
 	}
 	case ExpressionClass::CASE: {
 		auto &op = expr->Cast<CaseExpression>();
-		SimplifyReplace(expr, op.else_expr);
+		SimplifyChildExpression(expr, op.else_expr);
 		for (auto &case_check : op.case_checks) {
-			SimplifyReplace(expr, case_check.then_expr);
-			SimplifyReplace(expr, case_check.when_expr);
+			SimplifyChildExpression(expr, case_check.then_expr);
+			SimplifyChildExpression(expr, case_check.when_expr);
 		}
 		break;
 	}
 	case ExpressionClass::CAST: {
 		auto &cast = expr->Cast<CastExpression>();
-		SimplifyReplace(expr, cast.child);
+		SimplifyChildExpression(expr, cast.child);
 		break;
 	}
 	case ExpressionClass::COLLATE: {
 		auto &collate = expr->Cast<CollateExpression>();
-		SimplifyReplace(expr, collate.child);
+		SimplifyChildExpression(expr, collate.child);
+		break;
+	}
+	case ExpressionClass::SUBQUERY: {
+		auto &subq = expr->Cast<SubqueryExpression>();
+		SimplifyChildExpression(expr, subq.child);
+		Simplify(*subq.subquery->node);
+		break;
+	}
+	case ExpressionClass::COMPARISON: {
+		auto &comp = expr->Cast<ComparisonExpression>();
+		SimplifyChildExpression(expr, comp.left);
+		SimplifyChildExpression(expr, comp.right);
+		break;
+	}
+	case ExpressionClass::WINDOW: {
+		auto &window = expr->Cast<WindowExpression>();
+		SimplifyExpressionList(expr, window.children);
+		SimplifyExpressionList(expr, window.partitions);
+		SimplifyList(window.orders);
+		SimplifyChildExpression(expr, window.filter_expr);
+		SimplifyChildExpression(expr, window.start_expr);
+		SimplifyChildExpression(expr, window.end_expr);
+		SimplifyChildExpression(expr, window.offset_expr);
+		SimplifyChildExpression(expr, window.default_expr);
 		break;
 	}
 	default:
@@ -212,7 +251,7 @@ void StatementSimplifier::SimplifyExpression(duckdb::unique_ptr<ParsedExpression
 void StatementSimplifier::Simplify(ResultModifier &modifier) {
 	switch (modifier.type) {
 	case ResultModifierType::ORDER_MODIFIER:
-		Simplify((OrderModifier &)modifier);
+		Simplify(modifier.Cast<OrderModifier>());
 		break;
 	default:
 		break;
@@ -267,9 +306,7 @@ void StatementSimplifier::Simplify(UpdateSetInfo &info) {
 
 void StatementSimplifier::Simplify(UpdateStatement &stmt) {
 	Simplify(stmt.cte_map);
-	if (stmt.from_table) {
-		Simplify(*stmt.from_table);
-	}
+	SimplifyOptional(stmt.from_table);
 	D_ASSERT(stmt.set_info);
 	Simplify(*stmt.set_info);
 	SimplifyList(stmt.returning_list);
diff --git a/scripts/reduce_sql.py b/scripts/reduce_sql.py
index de30dea09872..92a449b799b1 100644
--- a/scripts/reduce_sql.py
+++ b/scripts/reduce_sql.py
@@ -6,6 +6,10 @@
 import multiprocessing
 import sqlite3
 
+# this script can be used as a library, but can also be directly called
+# example usage:
+# python3 scripts/reduce_sql.py --load load.sql --exec exec.sql
+
 try:
     multiprocessing.set_start_method('fork')
 except RuntimeError:

From f8f5387da9c5aa272a7d272a3974339ba5b6940e Mon Sep 17 00:00:00 2001
From: Joel Lubinitsky <joellubi@gmail.com>
Date: Thu, 11 Apr 2024 10:51:48 -0400
Subject: [PATCH 136/147] remove catalog/schema filtering from adbc getobjects

---
 src/common/adbc/adbc.cpp                     |  74 ++--
 test/api/adbc/test_adbc.cpp                  | 155 ++++++---
 tools/pythonpkg/tests/fast/adbc/test_adbc.py | 336 ++++++++++++++++++-
 3 files changed, 445 insertions(+), 120 deletions(-)

diff --git a/src/common/adbc/adbc.cpp b/src/common/adbc/adbc.cpp
index eb388dc0a305..65a147b08d3e 100644
--- a/src/common/adbc/adbc.cpp
+++ b/src/common/adbc/adbc.cpp
@@ -994,14 +994,6 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 	case ADBC_OBJECT_DEPTH_CATALOGS:
 		// Return metadata on catalogs.
 		query = duckdb::StringUtil::Format(R"(
-				WITH filtered_schemata AS (
-					SELECT
-						catalog_name,
-						schema_name,
-					FROM
-						information_schema.schemata
-					WHERE catalog_name NOT IN ('system', 'temp') AND schema_name NOT IN ('information_schema', 'pg_catalog')
-				)
 				SELECT
 					catalog_name,
 					[]::STRUCT(
@@ -1039,7 +1031,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 						)[]
 					)[] catalog_db_schemas
 				FROM
-					filtered_schemata
+					information_schema.schemata
 				WHERE catalog_name LIKE '%s'
 				GROUP BY catalog_name
 				)",
@@ -1048,26 +1040,18 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 	case ADBC_OBJECT_DEPTH_DB_SCHEMAS:
 		// Return metadata on catalogs and schemas.
 		query = duckdb::StringUtil::Format(R"(
-				WITH filtered_schemata AS (
+				WITH db_schemas AS (
 					SELECT
 						catalog_name,
 						schema_name,
-					FROM
-						information_schema.schemata
-					WHERE catalog_name NOT IN ('system', 'temp') AND schema_name NOT IN ('information_schema', 'pg_catalog')
-				),
-				db_schemas AS (
-					SELECT
-						*
-					FROM
-						filtered_schemata
+					FROM information_schema.schemata
 					WHERE schema_name LIKE '%s'
 				)
 
 				SELECT
 					catalog_name,
 					LIST({
-						db_schema_name: dbs.schema_name,
+						db_schema_name: schema_name,
 						db_schema_tables: []::STRUCT(
 							table_name VARCHAR,
 							table_type VARCHAR,
@@ -1099,11 +1083,11 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 								constraint_column_usage STRUCT(fk_catalog VARCHAR, fk_db_schema VARCHAR, fk_table VARCHAR, fk_column_name VARCHAR)[]
 							)[]
 						)[],
-					}) FILTER (dbs.schema_name IS NOT null) AS catalog_db_schemas
+					}) FILTER (dbs.schema_name is not null) catalog_db_schemas
 				FROM
-					filtered_schemata
+					information_schema.schemata
 				LEFT JOIN db_schemas dbs
-				USING (catalog_name)
+				USING (catalog_name, schema_name)
 				WHERE catalog_name LIKE '%s'
 				GROUP BY catalog_name
 				)",
@@ -1112,15 +1096,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 	case ADBC_OBJECT_DEPTH_TABLES:
 		// Return metadata on catalogs, schemas, and tables.
 		query = duckdb::StringUtil::Format(R"(
-				WITH filtered_schemata AS (
-					SELECT
-						catalog_name,
-						schema_name,
-					FROM
-						information_schema.schemata
-					WHERE catalog_name NOT IN ('system', 'temp') AND schema_name NOT IN ('information_schema', 'pg_catalog')
-				),
-				tables AS (
+				WITH tables AS (
 					SELECT
 						table_catalog catalog_name,
 						table_schema schema_name,
@@ -1164,8 +1140,8 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 						catalog_name,
 						schema_name,
 						db_schema_tables,
-					FROM filtered_schemata fs
-					LEFT JOIN tables t
+					FROM information_schema.schemata
+					LEFT JOIN tables
 					USING (catalog_name, schema_name)
 					WHERE schema_name LIKE '%s'
 				)
@@ -1173,13 +1149,13 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 				SELECT
 					catalog_name,
 					LIST({
-						db_schema_name: dbs.schema_name,
+						db_schema_name: schema_name,
 						db_schema_tables: db_schema_tables,
-					}) FILTER (dbs.schema_name is not null) AS catalog_db_schemas
+					}) FILTER (dbs.schema_name is not null) catalog_db_schemas
 				FROM
-					filtered_schemata
+					information_schema.schemata
 				LEFT JOIN db_schemas dbs
-				USING (catalog_name)
+				USING (catalog_name, schema_name)
 				WHERE catalog_name LIKE '%s'
 				GROUP BY catalog_name
 				)",
@@ -1188,15 +1164,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 	case ADBC_OBJECT_DEPTH_COLUMNS:
 		// Return metadata on catalogs, schemas, tables, and columns.
 		query = duckdb::StringUtil::Format(R"(
-				WITH filtered_schemata AS (
-					SELECT
-						catalog_name,
-						schema_name,
-					FROM
-						information_schema.schemata
-					WHERE catalog_name NOT IN ('system', 'temp') AND schema_name NOT IN ('information_schema', 'pg_catalog')
-				),
-				columns AS (
+				WITH columns AS (
 					SELECT
 						table_catalog,
 						table_schema,
@@ -1265,8 +1233,8 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 						catalog_name,
 						schema_name,
 						db_schema_tables,
-					FROM filtered_schemata fs
-					LEFT JOIN tables t
+					FROM information_schema.schemata
+					LEFT JOIN tables
 					USING (catalog_name, schema_name)
 					WHERE schema_name LIKE '%s'
 				)
@@ -1274,13 +1242,13 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 				SELECT
 					catalog_name,
 					LIST({
-						db_schema_name: dbs.schema_name,
+						db_schema_name: schema_name,
 						db_schema_tables: db_schema_tables,
-					}) FILTER (dbs.schema_name is not null) AS catalog_db_schemas
+					}) FILTER (dbs.schema_name is not null) catalog_db_schemas
 				FROM
-					filtered_schemata
+					information_schema.schemata
 				LEFT JOIN db_schemas dbs
-				USING (catalog_name)
+				USING (catalog_name, schema_name)
 				WHERE catalog_name LIKE '%s'
 				GROUP BY catalog_name
 				)",
diff --git a/test/api/adbc/test_adbc.cpp b/test/api/adbc/test_adbc.cpp
index fd840bced89f..1aeac5e272f8 100644
--- a/test/api/adbc/test_adbc.cpp
+++ b/test/api/adbc/test_adbc.cpp
@@ -1064,18 +1064,22 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_CATALOGS, nullptr, nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		auto res = db.Query("Select * from result");
+		auto res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
-		REQUIRE(res->GetValue(0, 0).ToString() == "test_catalog_depth");
+		REQUIRE(res->RowCount() == 3);
+		REQUIRE(res->GetValue(0, 0).ToString() == "system");
+		REQUIRE(res->GetValue(0, 1).ToString() == "temp");
+		REQUIRE(res->GetValue(0, 2).ToString() == "test_catalog_depth");
 		REQUIRE(res->GetValue(1, 0).ToString() == "[]");
+		REQUIRE(res->GetValue(1, 1).ToString() == "[]");
+		REQUIRE(res->GetValue(1, 2).ToString() == "[]");
 		db.Query("Drop table result;");
 
 		// Test Filters
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_CATALOGS, "bla", nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 0);
 		db.Query("Drop table result;");
@@ -1095,25 +1099,39 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_DB_SCHEMAS, nullptr, nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		auto res = db.Query("Select * from result");
+		auto res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
+		REQUIRE(res->RowCount() == 3);
 		REQUIRE(res->GetValue(0, 0).ToString() == "ADBC_OBJECT_DEPTH_DB_SCHEMAS");
+		REQUIRE(res->GetValue(0, 1).ToString() == "system");
+		REQUIRE(res->GetValue(0, 2).ToString() == "temp");
 		string expected = R"([
+			{
+				'db_schema_name': information_schema,
+				'db_schema_tables': []
+			},
 			{
 				'db_schema_name': main,
 				'db_schema_tables': []
+			},
+			{
+				'db_schema_name': pg_catalog,
+				'db_schema_tables': []
 			}
 		])";
 		REQUIRE(StringUtil::Replace(res->GetValue(1, 0).ToString(), " ", "") ==
 		        StringUtil::Replace(StringUtil::Replace(StringUtil::Replace(expected, "\n", ""), "\t", ""), " ", ""));
+		REQUIRE(StringUtil::Replace(res->GetValue(1, 1).ToString(), " ", "") ==
+		        StringUtil::Replace(StringUtil::Replace(StringUtil::Replace(expected, "\n", ""), "\t", ""), " ", ""));
+		REQUIRE(StringUtil::Replace(res->GetValue(1, 2).ToString(), " ", "") ==
+		        StringUtil::Replace(StringUtil::Replace(StringUtil::Replace(expected, "\n", ""), "\t", ""), " ", ""));
 		db.Query("Drop table result;");
 
 		// Test Filters
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_DB_SCHEMAS, "bla", nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 0);
 		db.Query("Drop table result;");
@@ -1121,10 +1139,12 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_DB_SCHEMAS, nullptr, "bla", nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
+		REQUIRE(res->RowCount() == 3);
 		REQUIRE(res->GetValue(1, 0).ToString() == "NULL");
+		REQUIRE(res->GetValue(1, 1).ToString() == "NULL");
+		REQUIRE(res->GetValue(1, 2).ToString() == "NULL");
 		db.Query("Drop table result;");
 	}
 	// 3. Test ADBC_OBJECT_DEPTH_TABLES
@@ -1141,11 +1161,17 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_TABLES, nullptr, nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		auto res = db.Query("Select * from result");
+		auto res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
-		REQUIRE(res->GetValue(0, 0).ToString() == "test_table_depth");
+		REQUIRE(res->RowCount() == 3);
+		REQUIRE(res->GetValue(0, 0).ToString() == "system");
+		REQUIRE(res->GetValue(0, 1).ToString() == "temp");
+		REQUIRE(res->GetValue(0, 2).ToString() == "test_table_depth");
 		string expected = R"([
+			{
+				'db_schema_name': information_schema,
+				'db_schema_tables': NULL
+			},
 			{
 				'db_schema_name': main,
 				'db_schema_tables': [
@@ -1156,9 +1182,13 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 						'table_constraints': []
 					}
 				]
+			},
+			{
+				'db_schema_name': pg_catalog,
+				'db_schema_tables': NULL
 			}
 		])";
-		REQUIRE(StringUtil::Replace(res->GetValue(1, 0).ToString(), " ", "") ==
+		REQUIRE(StringUtil::Replace(res->GetValue(1, 2).ToString(), " ", "") ==
 		        StringUtil::Replace(StringUtil::Replace(StringUtil::Replace(expected, "\n", ""), "\t", ""), " ", ""));
 		db.Query("Drop table result;");
 
@@ -1166,7 +1196,7 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_TABLES, "bla", nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 0);
 		db.Query("Drop table result;");
@@ -1174,19 +1204,21 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_TABLES, nullptr, "bla", nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
+		REQUIRE(res->RowCount() == 3);
 		REQUIRE(res->GetValue(1, 0).ToString() == "NULL");
+		REQUIRE(res->GetValue(1, 1).ToString() == "NULL");
+		REQUIRE(res->GetValue(1, 2).ToString() == "NULL");
 		db.Query("Drop table result;");
 
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_TABLES, nullptr, nullptr, "bla", nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
-		REQUIRE(res->GetValue(1, 0).ToString() == "[{'db_schema_name': main, 'db_schema_tables': NULL}]");
+		REQUIRE(res->RowCount() == 3);
+		REQUIRE(res->GetValue(1, 2).ToString() == "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 	}
 	// 4.Test ADBC_OBJECT_DEPTH_COLUMNS
@@ -1203,11 +1235,17 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		auto res = db.Query("Select * from result");
+		auto res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
-		REQUIRE(res->GetValue(0, 0).ToString() == "test_column_depth");
+		REQUIRE(res->RowCount() == 3);
+		REQUIRE(res->GetValue(0, 0).ToString() == "system");
+		REQUIRE(res->GetValue(0, 1).ToString() == "temp");
+		REQUIRE(res->GetValue(0, 2).ToString() == "test_column_depth");
 		string expected = R"([
+			{
+				'db_schema_name': information_schema,
+				'db_schema_tables': NULL
+			},
 			{
 				'db_schema_name': main,
 				'db_schema_tables': [
@@ -1240,9 +1278,13 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 						'table_constraints': NULL
 					}
 				]
+			},
+			{
+				'db_schema_name': pg_catalog,
+				'db_schema_tables': NULL
 			}
 		])";
-		REQUIRE(StringUtil::Replace(res->GetValue(1, 0).ToString(), " ", "") ==
+		REQUIRE(StringUtil::Replace(res->GetValue(1, 2).ToString(), " ", "") ==
 		        StringUtil::Replace(StringUtil::Replace(StringUtil::Replace(expected, "\n", ""), "\t", ""), " ", ""));
 		db.Query("Drop table result;");
 
@@ -1250,7 +1292,7 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, "bla", nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 0);
 		db.Query("Drop table result;");
@@ -1258,30 +1300,32 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, "bla", nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
+		REQUIRE(res->RowCount() == 3);
 		REQUIRE(res->GetValue(1, 0).ToString() == "NULL");
+		REQUIRE(res->GetValue(1, 1).ToString() == "NULL");
+		REQUIRE(res->GetValue(1, 2).ToString() == "NULL");
 		db.Query("Drop table result;");
 
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, nullptr, "bla", nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
-		REQUIRE(res->GetValue(1, 0).ToString() == "[{'db_schema_name': main, 'db_schema_tables': NULL}]");
+		REQUIRE(res->RowCount() == 3);
+		REQUIRE(res->GetValue(1, 2).ToString() == "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, nullptr, nullptr, nullptr,
 		                         "bla", &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
-		REQUIRE(res->GetValue(1, 0).ToString() ==
-		        "[{'db_schema_name': main, 'db_schema_tables': [{'table_name': my_table, 'table_type': BASE TABLE, "
-		        "'table_columns': NULL, 'table_constraints': NULL}]}]");
+		REQUIRE(res->RowCount() == 3);
+		REQUIRE(res->GetValue(1, 2).ToString() ==
+		        "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': [{'table_name': my_table, 'table_type': BASE TABLE, "
+		        "'table_columns': NULL, 'table_constraints': NULL}]}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 	}
 	// 5.Test ADBC_OBJECT_DEPTH_ALL
@@ -1298,11 +1342,16 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_ALL, nullptr, nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		auto res = db.Query("Select * from result");
-		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
-		REQUIRE(res->GetValue(0, 0).ToString() == "test_all_depth");
+		auto res = db.Query("Select * from result order by catalog_name asc");
+		REQUIRE(res->RowCount() == 3);
+		REQUIRE(res->GetValue(0, 0).ToString() == "system");
+		REQUIRE(res->GetValue(0, 1).ToString() == "temp");
+		REQUIRE(res->GetValue(0, 2).ToString() == "test_all_depth");
 		string expected = R"([
+			{
+				'db_schema_name': information_schema,
+				'db_schema_tables': NULL
+			},
 			{
 				'db_schema_name': main,
 				'db_schema_tables': [
@@ -1335,9 +1384,13 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 						'table_constraints': NULL
 					}
 				]
+			},
+			{
+				'db_schema_name': pg_catalog,
+				'db_schema_tables': NULL
 			}
 		])";
-		REQUIRE(StringUtil::Replace(res->GetValue(1, 0).ToString(), " ", "") ==
+		REQUIRE(StringUtil::Replace(res->GetValue(1, 2).ToString(), " ", "") ==
 		        StringUtil::Replace(StringUtil::Replace(StringUtil::Replace(expected, "\n", ""), "\t", ""), " ", ""));
 		db.Query("Drop table result;");
 
@@ -1345,7 +1398,7 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, "bla", nullptr, nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 0);
 		db.Query("Drop table result;");
@@ -1353,30 +1406,32 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, "bla", nullptr, nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
+		REQUIRE(res->RowCount() == 3);
 		REQUIRE(res->GetValue(1, 0).ToString() == "NULL");
+		REQUIRE(res->GetValue(1, 1).ToString() == "NULL");
+		REQUIRE(res->GetValue(1, 2).ToString() == "NULL");
 		db.Query("Drop table result;");
 
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, nullptr, "bla", nullptr,
 		                         nullptr, &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
-		REQUIRE(res->GetValue(1, 0).ToString() == "[{'db_schema_name': main, 'db_schema_tables': NULL}]");
+		REQUIRE(res->RowCount() == 3);
+		REQUIRE(res->GetValue(1, 2).ToString() == "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, nullptr, nullptr, nullptr,
 		                         "bla", &arrow_stream, &adbc_error);
 		db.CreateTable("result", arrow_stream);
-		res = db.Query("Select * from result");
+		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
-		REQUIRE(res->RowCount() == 1);
-		REQUIRE(res->GetValue(1, 0).ToString() ==
-		        "[{'db_schema_name': main, 'db_schema_tables': [{'table_name': my_table, 'table_type': BASE TABLE, "
-		        "'table_columns': NULL, 'table_constraints': NULL}]}]");
+		REQUIRE(res->RowCount() == 3);
+		REQUIRE(res->GetValue(1, 2).ToString() ==
+		        "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': [{'table_name': my_table, 'table_type': BASE TABLE, "
+		        "'table_columns': NULL, 'table_constraints': NULL}]}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 	}
 	//	 Now lets test some errors
diff --git a/tools/pythonpkg/tests/fast/adbc/test_adbc.py b/tools/pythonpkg/tests/fast/adbc/test_adbc.py
index e0e0ee3830b4..708e00a94d36 100644
--- a/tools/pythonpkg/tests/fast/adbc/test_adbc.py
+++ b/tools/pythonpkg/tests/fast/adbc/test_adbc.py
@@ -47,10 +47,48 @@ def test_connection_get_objects(duck_conn):
     with duck_conn.cursor() as cursor:
         cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)")
         depth_all = duck_conn.adbc_get_objects(depth="all").read_all()
-    assert depth_all.to_pylist() == [
+    assert sorted_get_objects(depth_all.to_pylist()) == sorted_get_objects([
+        {
+            'catalog_name': 'system',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
+        {
+            'catalog_name': 'temp',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
         {
             'catalog_name': 'memory',
             'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
                 {
                     'db_schema_name': 'main',
                     'db_schema_tables': [
@@ -97,15 +135,61 @@ def test_connection_get_objects(duck_conn):
                         }
                     ],
                 },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
             ],
         }
-    ]
+    ])
 
     depth_tables = duck_conn.adbc_get_objects(depth="tables").read_all()
-    assert depth_tables.to_pylist() == [
+    assert sorted_get_objects(depth_tables.to_pylist()) == sorted_get_objects([
+        {
+            'catalog_name': 'system',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
+        {
+            'catalog_name': 'temp',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
         {
             'catalog_name': 'memory',
             'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
                 {
                     'db_schema_name': 'main',
                     'db_schema_tables': [
@@ -119,28 +203,78 @@ def test_connection_get_objects(duck_conn):
                 },
             ],
         }
-    ]
+    ])
 
     depth_db_schemas = duck_conn.adbc_get_objects(depth="db_schemas").read_all()
-    assert depth_db_schemas.to_pylist() == [
+    assert sorted_get_objects(depth_db_schemas.to_pylist()) == sorted_get_objects([
+        {
+            'catalog_name': 'system',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
+        {
+            'catalog_name': 'temp',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
         {
             'catalog_name': 'memory',
             'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
                 {
                     'db_schema_name': 'main',
                     'db_schema_tables': [],
                 },
             ],
         }
-    ]
+    ])
 
     depth_catalogs = duck_conn.adbc_get_objects(depth="catalogs").read_all()
-    assert depth_catalogs.to_pylist() == [
+    assert sorted_get_objects(depth_catalogs.to_pylist()) == sorted_get_objects([
+        {
+            'catalog_name': 'system',
+            'catalog_db_schemas': [],
+        },
+        {
+            'catalog_name': 'temp',
+            'catalog_db_schemas': [],
+        },
         {
             'catalog_name': 'memory',
             'catalog_db_schemas': [],
-        }
-    ]
+        },
+    ])
 
     # All result schemas should be the same
     assert depth_all.schema == depth_tables.schema
@@ -153,10 +287,48 @@ def test_connection_get_objects_filters(duck_conn):
         cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)")
 
     no_filter = duck_conn.adbc_get_objects(depth="all").read_all()
-    assert no_filter.to_pylist() == [
+    assert sorted_get_objects(no_filter.to_pylist()) == sorted_get_objects([
+        {
+            'catalog_name': 'system',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
+        {
+            'catalog_name': 'temp',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
         {
             'catalog_name': 'memory',
             'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
                 {
                     'db_schema_name': 'main',
                     'db_schema_tables': [
@@ -203,15 +375,61 @@ def test_connection_get_objects_filters(duck_conn):
                         }
                     ],
                 },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
             ],
         }
-    ]
+    ])
 
     column_filter = duck_conn.adbc_get_objects(depth="all", column_name_filter="notexist").read_all()
-    assert column_filter.to_pylist() == [
+    assert sorted_get_objects(column_filter.to_pylist()) == sorted_get_objects([
+        {
+            'catalog_name': 'system',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
+        {
+            'catalog_name': 'temp',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
         {
             'catalog_name': 'memory',
             'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
                 {
                     'db_schema_name': 'main',
                     'db_schema_tables': [
@@ -238,28 +456,78 @@ def test_connection_get_objects_filters(duck_conn):
                 },
             ],
         }
-    ]
+    ])
 
     table_name_filter = duck_conn.adbc_get_objects(depth="all", table_name_filter="notexist").read_all()
-    assert table_name_filter.to_pylist() == [
+    assert sorted_get_objects(table_name_filter.to_pylist()) == sorted_get_objects([
+        {
+            'catalog_name': 'system',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
+        {
+            'catalog_name': 'temp',
+            'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'main',
+                    'db_schema_tables': None,
+                },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
+            ],
+        },
         {
             'catalog_name': 'memory',
             'catalog_db_schemas': [
+                {
+                    'db_schema_name': 'information_schema',
+                    'db_schema_tables': None,
+                },
                 {
                     'db_schema_name': 'main',
                     'db_schema_tables': None,
                 },
+                {
+                    'db_schema_name': 'pg_catalog',
+                    'db_schema_tables': None,
+                },
             ],
         }
-    ]
+    ])
 
     db_schema_filter = duck_conn.adbc_get_objects(depth="all", db_schema_filter="notexist").read_all()
-    assert db_schema_filter.to_pylist() == [
+    assert sorted_get_objects(db_schema_filter.to_pylist()) == sorted_get_objects([
+        {
+            'catalog_name': 'system',
+            'catalog_db_schemas': None,
+        },
+        {
+            'catalog_name': 'temp',
+            'catalog_db_schemas': None,
+        },
         {
             'catalog_name': 'memory',
             'catalog_db_schemas': None,
         }
-    ]
+    ])
 
     catalog_filter = duck_conn.adbc_get_objects(depth="all", catalog_filter="notexist").read_all()
     assert catalog_filter.to_pylist() == []
@@ -450,3 +718,37 @@ def test_read(duck_conn):
                 datetime.datetime(2006, 2, 15, 4, 46, 27),
             ],
         }
+
+def sorted_get_objects(catalogs):
+    res = []
+    for catalog in sorted(catalogs, key=lambda cat: cat['catalog_name']):
+        new_catalog = {
+            "catalog_name": catalog['catalog_name'],
+            "catalog_db_schemas": [],
+        }
+        
+        for db_schema in sorted(catalog['catalog_db_schemas'] or [], key=lambda sch: sch['db_schema_name']):
+            new_db_schema = {
+                "db_schema_name": db_schema['db_schema_name'],
+                "db_schema_tables": [],
+            }
+            
+            for table in sorted(db_schema['db_schema_tables'] or [], key=lambda tab: tab['table_name']):
+                new_table = {
+                    "table_name": table['table_name'],
+                    "table_type": table['table_type'],
+                    "table_columns": [],
+                    "table_constraints": [],
+                }
+                
+                for column in sorted(table['table_columns'] or [], key=lambda col: col['ordinal_position']):
+                    new_table["table_columns"].append(column)
+                    
+                for constraint in sorted(table['table_constraints'] or [], key=lambda con: con['constraint_name']):
+                    new_table["table_constraints"].append(constraint)
+                    
+                new_db_schema["db_schema_tables"].append(new_table)
+            new_catalog["catalog_db_schemas"].append(new_db_schema)
+        res.append(new_catalog)
+
+    return res
\ No newline at end of file

From 3ff7cb2a63f1f9d2bb2fd427beefab07674b29af Mon Sep 17 00:00:00 2001
From: Joel Lubinitsky <joellubi@gmail.com>
Date: Thu, 11 Apr 2024 11:07:32 -0400
Subject: [PATCH 137/147] run formatter

---
 test/api/adbc/test_adbc.cpp                  |  24 +-
 tools/pythonpkg/tests/fast/adbc/test_adbc.py | 943 ++++++++++---------
 2 files changed, 497 insertions(+), 470 deletions(-)

diff --git a/test/api/adbc/test_adbc.cpp b/test/api/adbc/test_adbc.cpp
index 1aeac5e272f8..d612a43d7fc6 100644
--- a/test/api/adbc/test_adbc.cpp
+++ b/test/api/adbc/test_adbc.cpp
@@ -1218,7 +1218,9 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 3);
-		REQUIRE(res->GetValue(1, 2).ToString() == "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
+		REQUIRE(res->GetValue(1, 2).ToString() ==
+		        "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, "
+		        "'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 	}
 	// 4.Test ADBC_OBJECT_DEPTH_COLUMNS
@@ -1314,7 +1316,9 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 3);
-		REQUIRE(res->GetValue(1, 2).ToString() == "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
+		REQUIRE(res->GetValue(1, 2).ToString() ==
+		        "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, "
+		        "'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, nullptr, nullptr, nullptr,
@@ -1324,8 +1328,10 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 3);
 		REQUIRE(res->GetValue(1, 2).ToString() ==
-		        "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': [{'table_name': my_table, 'table_type': BASE TABLE, "
-		        "'table_columns': NULL, 'table_constraints': NULL}]}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
+		        "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, "
+		        "'db_schema_tables': [{'table_name': my_table, 'table_type': BASE TABLE, "
+		        "'table_columns': NULL, 'table_constraints': NULL}]}, {'db_schema_name': pg_catalog, "
+		        "'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 	}
 	// 5.Test ADBC_OBJECT_DEPTH_ALL
@@ -1420,7 +1426,9 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		res = db.Query("Select * from result order by catalog_name asc");
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 3);
-		REQUIRE(res->GetValue(1, 2).ToString() == "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
+		REQUIRE(res->GetValue(1, 2).ToString() ==
+		        "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, "
+		        "'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, nullptr, nullptr, nullptr,
@@ -1430,8 +1438,10 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		REQUIRE(res->ColumnCount() == 2);
 		REQUIRE(res->RowCount() == 3);
 		REQUIRE(res->GetValue(1, 2).ToString() ==
-		        "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': [{'table_name': my_table, 'table_type': BASE TABLE, "
-		        "'table_columns': NULL, 'table_constraints': NULL}]}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]");
+		        "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, "
+		        "'db_schema_tables': [{'table_name': my_table, 'table_type': BASE TABLE, "
+		        "'table_columns': NULL, 'table_constraints': NULL}]}, {'db_schema_name': pg_catalog, "
+		        "'db_schema_tables': NULL}]");
 		db.Query("Drop table result;");
 	}
 	//	 Now lets test some errors
diff --git a/tools/pythonpkg/tests/fast/adbc/test_adbc.py b/tools/pythonpkg/tests/fast/adbc/test_adbc.py
index 708e00a94d36..9b05345f0bb3 100644
--- a/tools/pythonpkg/tests/fast/adbc/test_adbc.py
+++ b/tools/pythonpkg/tests/fast/adbc/test_adbc.py
@@ -47,234 +47,242 @@ def test_connection_get_objects(duck_conn):
     with duck_conn.cursor() as cursor:
         cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)")
         depth_all = duck_conn.adbc_get_objects(depth="all").read_all()
-    assert sorted_get_objects(depth_all.to_pylist()) == sorted_get_objects([
-        {
-            'catalog_name': 'system',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'temp',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'memory',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': [
-                        {
-                            'table_name': 'getobjects',
-                            'table_type': 'BASE TABLE',
-                            'table_columns': [
-                                {
-                                    'column_name': 'ints',
-                                    'ordinal_position': 1,
-                                    'remarks': '',
-                                    'xdbc_char_octet_length': None,
-                                    'xdbc_column_def': None,
-                                    'xdbc_column_size': None,
-                                    'xdbc_data_type': None,
-                                    'xdbc_datetime_sub': None,
-                                    'xdbc_decimal_digits': None,
-                                    'xdbc_is_autoincrement': None,
-                                    'xdbc_is_generatedcolumn': None,
-                                    'xdbc_is_nullable': None,
-                                    'xdbc_nullable': None,
-                                    'xdbc_num_prec_radix': None,
-                                    'xdbc_scope_catalog': None,
-                                    'xdbc_scope_schema': None,
-                                    'xdbc_scope_table': None,
-                                    'xdbc_sql_data_type': None,
-                                    'xdbc_type_name': None,
-                                },
-                            ],
-                            'table_constraints': [
-                                {
-                                    'constraint_column_names': [],
-                                    'constraint_column_usage': [],
-                                    'constraint_name': 'getobjects_ints_pkey',
-                                    'constraint_type': 'PRIMARY KEY',
-                                },
-                                {
-                                    'constraint_column_names': [],
-                                    'constraint_column_usage': [],
-                                    'constraint_name': 'getobjects_ints_not_null',
-                                    'constraint_type': 'CHECK',
-                                },
-                            ],
-                        }
-                    ],
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        }
-    ])
+    assert sorted_get_objects(depth_all.to_pylist()) == sorted_get_objects(
+        [
+            {
+                'catalog_name': 'system',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'temp',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'memory',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': [
+                            {
+                                'table_name': 'getobjects',
+                                'table_type': 'BASE TABLE',
+                                'table_columns': [
+                                    {
+                                        'column_name': 'ints',
+                                        'ordinal_position': 1,
+                                        'remarks': '',
+                                        'xdbc_char_octet_length': None,
+                                        'xdbc_column_def': None,
+                                        'xdbc_column_size': None,
+                                        'xdbc_data_type': None,
+                                        'xdbc_datetime_sub': None,
+                                        'xdbc_decimal_digits': None,
+                                        'xdbc_is_autoincrement': None,
+                                        'xdbc_is_generatedcolumn': None,
+                                        'xdbc_is_nullable': None,
+                                        'xdbc_nullable': None,
+                                        'xdbc_num_prec_radix': None,
+                                        'xdbc_scope_catalog': None,
+                                        'xdbc_scope_schema': None,
+                                        'xdbc_scope_table': None,
+                                        'xdbc_sql_data_type': None,
+                                        'xdbc_type_name': None,
+                                    },
+                                ],
+                                'table_constraints': [
+                                    {
+                                        'constraint_column_names': [],
+                                        'constraint_column_usage': [],
+                                        'constraint_name': 'getobjects_ints_pkey',
+                                        'constraint_type': 'PRIMARY KEY',
+                                    },
+                                    {
+                                        'constraint_column_names': [],
+                                        'constraint_column_usage': [],
+                                        'constraint_name': 'getobjects_ints_not_null',
+                                        'constraint_type': 'CHECK',
+                                    },
+                                ],
+                            }
+                        ],
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+        ]
+    )
 
     depth_tables = duck_conn.adbc_get_objects(depth="tables").read_all()
-    assert sorted_get_objects(depth_tables.to_pylist()) == sorted_get_objects([
-        {
-            'catalog_name': 'system',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'temp',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'memory',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': [
-                        {
-                            'table_name': 'getobjects',
-                            'table_type': 'BASE TABLE',
-                            'table_columns': [],
-                            'table_constraints': [],
-                        }
-                    ],
-                },
-            ],
-        }
-    ])
+    assert sorted_get_objects(depth_tables.to_pylist()) == sorted_get_objects(
+        [
+            {
+                'catalog_name': 'system',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'temp',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'memory',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': [
+                            {
+                                'table_name': 'getobjects',
+                                'table_type': 'BASE TABLE',
+                                'table_columns': [],
+                                'table_constraints': [],
+                            }
+                        ],
+                    },
+                ],
+            },
+        ]
+    )
 
     depth_db_schemas = duck_conn.adbc_get_objects(depth="db_schemas").read_all()
-    assert sorted_get_objects(depth_db_schemas.to_pylist()) == sorted_get_objects([
-        {
-            'catalog_name': 'system',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'temp',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'memory',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': [],
-                },
-            ],
-        }
-    ])
+    assert sorted_get_objects(depth_db_schemas.to_pylist()) == sorted_get_objects(
+        [
+            {
+                'catalog_name': 'system',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'temp',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'memory',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': [],
+                    },
+                ],
+            },
+        ]
+    )
 
     depth_catalogs = duck_conn.adbc_get_objects(depth="catalogs").read_all()
-    assert sorted_get_objects(depth_catalogs.to_pylist()) == sorted_get_objects([
-        {
-            'catalog_name': 'system',
-            'catalog_db_schemas': [],
-        },
-        {
-            'catalog_name': 'temp',
-            'catalog_db_schemas': [],
-        },
-        {
-            'catalog_name': 'memory',
-            'catalog_db_schemas': [],
-        },
-    ])
+    assert sorted_get_objects(depth_catalogs.to_pylist()) == sorted_get_objects(
+        [
+            {
+                'catalog_name': 'system',
+                'catalog_db_schemas': [],
+            },
+            {
+                'catalog_name': 'temp',
+                'catalog_db_schemas': [],
+            },
+            {
+                'catalog_name': 'memory',
+                'catalog_db_schemas': [],
+            },
+        ]
+    )
 
     # All result schemas should be the same
     assert depth_all.schema == depth_tables.schema
@@ -287,247 +295,255 @@ def test_connection_get_objects_filters(duck_conn):
         cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)")
 
     no_filter = duck_conn.adbc_get_objects(depth="all").read_all()
-    assert sorted_get_objects(no_filter.to_pylist()) == sorted_get_objects([
-        {
-            'catalog_name': 'system',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'temp',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'memory',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': [
-                        {
-                            'table_name': 'getobjects',
-                            'table_type': 'BASE TABLE',
-                            'table_columns': [
-                                {
-                                    'column_name': 'ints',
-                                    'ordinal_position': 1,
-                                    'remarks': '',
-                                    'xdbc_char_octet_length': None,
-                                    'xdbc_column_def': None,
-                                    'xdbc_column_size': None,
-                                    'xdbc_data_type': None,
-                                    'xdbc_datetime_sub': None,
-                                    'xdbc_decimal_digits': None,
-                                    'xdbc_is_autoincrement': None,
-                                    'xdbc_is_generatedcolumn': None,
-                                    'xdbc_is_nullable': None,
-                                    'xdbc_nullable': None,
-                                    'xdbc_num_prec_radix': None,
-                                    'xdbc_scope_catalog': None,
-                                    'xdbc_scope_schema': None,
-                                    'xdbc_scope_table': None,
-                                    'xdbc_sql_data_type': None,
-                                    'xdbc_type_name': None,
-                                },
-                            ],
-                            'table_constraints': [
-                                {
-                                    'constraint_column_names': [],
-                                    'constraint_column_usage': [],
-                                    'constraint_name': 'getobjects_ints_pkey',
-                                    'constraint_type': 'PRIMARY KEY',
-                                },
-                                {
-                                    'constraint_column_names': [],
-                                    'constraint_column_usage': [],
-                                    'constraint_name': 'getobjects_ints_not_null',
-                                    'constraint_type': 'CHECK',
-                                },
-                            ],
-                        }
-                    ],
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        }
-    ])
+    assert sorted_get_objects(no_filter.to_pylist()) == sorted_get_objects(
+        [
+            {
+                'catalog_name': 'system',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'temp',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'memory',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': [
+                            {
+                                'table_name': 'getobjects',
+                                'table_type': 'BASE TABLE',
+                                'table_columns': [
+                                    {
+                                        'column_name': 'ints',
+                                        'ordinal_position': 1,
+                                        'remarks': '',
+                                        'xdbc_char_octet_length': None,
+                                        'xdbc_column_def': None,
+                                        'xdbc_column_size': None,
+                                        'xdbc_data_type': None,
+                                        'xdbc_datetime_sub': None,
+                                        'xdbc_decimal_digits': None,
+                                        'xdbc_is_autoincrement': None,
+                                        'xdbc_is_generatedcolumn': None,
+                                        'xdbc_is_nullable': None,
+                                        'xdbc_nullable': None,
+                                        'xdbc_num_prec_radix': None,
+                                        'xdbc_scope_catalog': None,
+                                        'xdbc_scope_schema': None,
+                                        'xdbc_scope_table': None,
+                                        'xdbc_sql_data_type': None,
+                                        'xdbc_type_name': None,
+                                    },
+                                ],
+                                'table_constraints': [
+                                    {
+                                        'constraint_column_names': [],
+                                        'constraint_column_usage': [],
+                                        'constraint_name': 'getobjects_ints_pkey',
+                                        'constraint_type': 'PRIMARY KEY',
+                                    },
+                                    {
+                                        'constraint_column_names': [],
+                                        'constraint_column_usage': [],
+                                        'constraint_name': 'getobjects_ints_not_null',
+                                        'constraint_type': 'CHECK',
+                                    },
+                                ],
+                            }
+                        ],
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+        ]
+    )
 
     column_filter = duck_conn.adbc_get_objects(depth="all", column_name_filter="notexist").read_all()
-    assert sorted_get_objects(column_filter.to_pylist()) == sorted_get_objects([
-        {
-            'catalog_name': 'system',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'temp',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'memory',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': [
-                        {
-                            'table_name': 'getobjects',
-                            'table_type': 'BASE TABLE',
-                            'table_columns': None,
-                            'table_constraints': [
-                                {
-                                    'constraint_column_names': [],
-                                    'constraint_column_usage': [],
-                                    'constraint_name': 'getobjects_ints_pkey',
-                                    'constraint_type': 'PRIMARY KEY',
-                                },
-                                {
-                                    'constraint_column_names': [],
-                                    'constraint_column_usage': [],
-                                    'constraint_name': 'getobjects_ints_not_null',
-                                    'constraint_type': 'CHECK',
-                                },
-                            ],
-                        }
-                    ],
-                },
-            ],
-        }
-    ])
+    assert sorted_get_objects(column_filter.to_pylist()) == sorted_get_objects(
+        [
+            {
+                'catalog_name': 'system',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'temp',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'memory',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': [
+                            {
+                                'table_name': 'getobjects',
+                                'table_type': 'BASE TABLE',
+                                'table_columns': None,
+                                'table_constraints': [
+                                    {
+                                        'constraint_column_names': [],
+                                        'constraint_column_usage': [],
+                                        'constraint_name': 'getobjects_ints_pkey',
+                                        'constraint_type': 'PRIMARY KEY',
+                                    },
+                                    {
+                                        'constraint_column_names': [],
+                                        'constraint_column_usage': [],
+                                        'constraint_name': 'getobjects_ints_not_null',
+                                        'constraint_type': 'CHECK',
+                                    },
+                                ],
+                            }
+                        ],
+                    },
+                ],
+            },
+        ]
+    )
 
     table_name_filter = duck_conn.adbc_get_objects(depth="all", table_name_filter="notexist").read_all()
-    assert sorted_get_objects(table_name_filter.to_pylist()) == sorted_get_objects([
-        {
-            'catalog_name': 'system',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'temp',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        },
-        {
-            'catalog_name': 'memory',
-            'catalog_db_schemas': [
-                {
-                    'db_schema_name': 'information_schema',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'main',
-                    'db_schema_tables': None,
-                },
-                {
-                    'db_schema_name': 'pg_catalog',
-                    'db_schema_tables': None,
-                },
-            ],
-        }
-    ])
+    assert sorted_get_objects(table_name_filter.to_pylist()) == sorted_get_objects(
+        [
+            {
+                'catalog_name': 'system',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'temp',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+            {
+                'catalog_name': 'memory',
+                'catalog_db_schemas': [
+                    {
+                        'db_schema_name': 'information_schema',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'main',
+                        'db_schema_tables': None,
+                    },
+                    {
+                        'db_schema_name': 'pg_catalog',
+                        'db_schema_tables': None,
+                    },
+                ],
+            },
+        ]
+    )
 
     db_schema_filter = duck_conn.adbc_get_objects(depth="all", db_schema_filter="notexist").read_all()
-    assert sorted_get_objects(db_schema_filter.to_pylist()) == sorted_get_objects([
-        {
-            'catalog_name': 'system',
-            'catalog_db_schemas': None,
-        },
-        {
-            'catalog_name': 'temp',
-            'catalog_db_schemas': None,
-        },
-        {
-            'catalog_name': 'memory',
-            'catalog_db_schemas': None,
-        }
-    ])
+    assert sorted_get_objects(db_schema_filter.to_pylist()) == sorted_get_objects(
+        [
+            {
+                'catalog_name': 'system',
+                'catalog_db_schemas': None,
+            },
+            {
+                'catalog_name': 'temp',
+                'catalog_db_schemas': None,
+            },
+            {
+                'catalog_name': 'memory',
+                'catalog_db_schemas': None,
+            },
+        ]
+    )
 
     catalog_filter = duck_conn.adbc_get_objects(depth="all", catalog_filter="notexist").read_all()
     assert catalog_filter.to_pylist() == []
@@ -719,6 +735,7 @@ def test_read(duck_conn):
             ],
         }
 
+
 def sorted_get_objects(catalogs):
     res = []
     for catalog in sorted(catalogs, key=lambda cat: cat['catalog_name']):
@@ -726,13 +743,13 @@ def sorted_get_objects(catalogs):
             "catalog_name": catalog['catalog_name'],
             "catalog_db_schemas": [],
         }
-        
+
         for db_schema in sorted(catalog['catalog_db_schemas'] or [], key=lambda sch: sch['db_schema_name']):
             new_db_schema = {
                 "db_schema_name": db_schema['db_schema_name'],
                 "db_schema_tables": [],
             }
-            
+
             for table in sorted(db_schema['db_schema_tables'] or [], key=lambda tab: tab['table_name']):
                 new_table = {
                     "table_name": table['table_name'],
@@ -740,15 +757,15 @@ def sorted_get_objects(catalogs):
                     "table_columns": [],
                     "table_constraints": [],
                 }
-                
+
                 for column in sorted(table['table_columns'] or [], key=lambda col: col['ordinal_position']):
                     new_table["table_columns"].append(column)
-                    
+
                 for constraint in sorted(table['table_constraints'] or [], key=lambda con: con['constraint_name']):
                     new_table["table_constraints"].append(constraint)
-                    
+
                 new_db_schema["db_schema_tables"].append(new_table)
             new_catalog["catalog_db_schemas"].append(new_db_schema)
         res.append(new_catalog)
 
-    return res
\ No newline at end of file
+    return res

From a2b532266b55c77e7ccd6f1851ae2e66c133eee1 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 18:00:55 +0200
Subject: [PATCH 138/147] Use references in subquery flattening code

---
 .../subquery/flatten_dependent_join.hpp       |  4 +-
 .../binder/query_node/plan_subquery.cpp       | 18 +++----
 .../subquery/flatten_dependent_join.cpp       | 53 +++++++++----------
 3 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp b/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp
index efc41cf648a5..991e084c42ab 100644
--- a/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp
+++ b/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp
@@ -23,7 +23,7 @@ struct FlattenDependentJoins {
 
 	//! Detects which Logical Operators have correlated expressions that they are dependent upon, filling the
 	//! has_correlated_expressions map.
-	bool DetectCorrelatedExpressions(LogicalOperator *op, bool lateral = false, idx_t lateral_depth = 0);
+	bool DetectCorrelatedExpressions(LogicalOperator &op, bool lateral = false, idx_t lateral_depth = 0);
 
 	//! Mark entire subtree of Logical Operators as correlated by adding them to the has_correlated_expressions map.
 	bool MarkSubtreeCorrelated(LogicalOperator &op);
@@ -35,7 +35,7 @@ struct FlattenDependentJoins {
 	ColumnBinding base_binding;
 	idx_t delim_offset;
 	idx_t data_offset;
-	unordered_map<LogicalOperator *, bool> has_correlated_expressions;
+	reference_map_t<LogicalOperator, bool> has_correlated_expressions;
 	column_binding_map_t<idx_t> correlated_map;
 	column_binding_map_t<idx_t> replacement_map;
 	const vector<CorrelatedColumnInfo> &correlated_columns;
diff --git a/src/planner/binder/query_node/plan_subquery.cpp b/src/planner/binder/query_node/plan_subquery.cpp
index 7370457a6031..29e8f36c3063 100644
--- a/src/planner/binder/query_node/plan_subquery.cpp
+++ b/src/planner/binder/query_node/plan_subquery.cpp
@@ -254,7 +254,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
 		FlattenDependentJoins flatten(binder, correlated_columns, perform_delim);
 
 		// first we check which logical operators have correlated expressions in the first place
-		flatten.DetectCorrelatedExpressions(plan.get());
+		flatten.DetectCorrelatedExpressions(*plan);
 		// now we push the dependent join down
 		auto dependent_join = flatten.PushDownDependentJoin(std::move(plan));
 
@@ -279,7 +279,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
 		delim_join->mark_index = mark_index;
 		// RHS
 		FlattenDependentJoins flatten(binder, correlated_columns, perform_delim, true);
-		flatten.DetectCorrelatedExpressions(plan.get());
+		flatten.DetectCorrelatedExpressions(*plan);
 		auto dependent_join = flatten.PushDownDependentJoin(std::move(plan));
 
 		// fetch the set of columns
@@ -307,7 +307,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
 		delim_join->mark_index = mark_index;
 		// RHS
 		FlattenDependentJoins flatten(binder, correlated_columns, true, true);
-		flatten.DetectCorrelatedExpressions(plan.get());
+		flatten.DetectCorrelatedExpressions(*plan);
 		auto dependent_join = flatten.PushDownDependentJoin(std::move(plan));
 
 		// fetch the columns
@@ -411,7 +411,7 @@ void Binder::PlanSubqueries(unique_ptr<Expression> &expr_ptr, unique_ptr<Logical
 }
 
 unique_ptr<LogicalOperator> Binder::PlanLateralJoin(unique_ptr<LogicalOperator> left, unique_ptr<LogicalOperator> right,
-                                                    vector<CorrelatedColumnInfo> &correlated_columns,
+                                                    vector<CorrelatedColumnInfo> &correlated,
                                                     JoinType join_type, unique_ptr<Expression> condition) {
 	// scan the right operator for correlated columns
 	// correlated LATERAL JOIN
@@ -423,13 +423,13 @@ unique_ptr<LogicalOperator> Binder::PlanLateralJoin(unique_ptr<LogicalOperator>
 		                                             arbitrary_expressions);
 	}
 
-	auto perform_delim = PerformDuplicateElimination(*this, correlated_columns);
-	auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, join_type, std::move(left), perform_delim);
+	auto perform_delim = PerformDuplicateElimination(*this, correlated);
+	auto delim_join = CreateDuplicateEliminatedJoin(correlated, join_type, std::move(left), perform_delim);
 
-	FlattenDependentJoins flatten(*this, correlated_columns, perform_delim);
+	FlattenDependentJoins flatten(*this, correlated, perform_delim);
 
 	// first we check which logical operators have correlated expressions in the first place
-	flatten.DetectCorrelatedExpressions(right.get(), true);
+	flatten.DetectCorrelatedExpressions(*right, true);
 	// now we push the dependent join down
 	auto dependent_join = flatten.PushDownDependentJoin(std::move(right));
 
@@ -448,7 +448,7 @@ unique_ptr<LogicalOperator> Binder::PlanLateralJoin(unique_ptr<LogicalOperator>
 	D_ASSERT(delim_join->conditions.empty());
 	delim_join->conditions = std::move(conditions);
 	// then add the delim join conditions
-	CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset, perform_delim);
+	CreateDelimJoinConditions(*delim_join, correlated, plan_columns, flatten.delim_offset, perform_delim);
 	delim_join->AddChild(std::move(dependent_join));
 
 	// check if there are any arbitrary expressions left
diff --git a/src/planner/subquery/flatten_dependent_join.cpp b/src/planner/subquery/flatten_dependent_join.cpp
index 4e5623cab48d..7e863703472c 100644
--- a/src/planner/subquery/flatten_dependent_join.cpp
+++ b/src/planner/subquery/flatten_dependent_join.cpp
@@ -27,21 +27,20 @@ FlattenDependentJoins::FlattenDependentJoins(Binder &binder, const vector<Correl
 	}
 }
 
-bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator *op, bool lateral, idx_t lateral_depth) {
+bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator &op, bool lateral, idx_t lateral_depth) {
 
 	bool is_lateral_join = false;
 
-	D_ASSERT(op);
 	// check if this entry has correlated expressions
-	if (op->type == LogicalOperatorType::LOGICAL_DEPENDENT_JOIN) {
+	if (op.type == LogicalOperatorType::LOGICAL_DEPENDENT_JOIN) {
 		is_lateral_join = true;
 	}
 	HasCorrelatedExpressions visitor(correlated_columns, lateral, lateral_depth);
-	visitor.VisitOperator(*op);
+	visitor.VisitOperator(op);
 	bool has_correlation = visitor.has_correlated_expressions;
 	int child_idx = 0;
 	// now visit the children of this entry and check if they have correlated expressions
-	for (auto &child : op->children) {
+	for (auto &child : op.children) {
 		auto new_lateral_depth = lateral_depth;
 		if (is_lateral_join && child_idx == 1) {
 			new_lateral_depth = lateral_depth + 1;
@@ -49,7 +48,7 @@ bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator *op, boo
 		// we OR the property with its children such that has_correlation is true if either
 		// (1) this node has a correlated expression or
 		// (2) one of its children has a correlated expression
-		if (DetectCorrelatedExpressions(child.get(), lateral, new_lateral_depth)) {
+		if (DetectCorrelatedExpressions(*child, lateral, new_lateral_depth)) {
 			has_correlation = true;
 		}
 		child_idx++;
@@ -60,10 +59,10 @@ bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator *op, boo
 	// If we detect correlation in a materialized or recursive CTE, the entire right side of the operator
 	// needs to be marked as correlated. Otherwise, function PushDownDependentJoinInternal does not do the
 	// right thing.
-	if (op->type == LogicalOperatorType::LOGICAL_MATERIALIZED_CTE ||
-	    op->type == LogicalOperatorType::LOGICAL_RECURSIVE_CTE) {
+	if (op.type == LogicalOperatorType::LOGICAL_MATERIALIZED_CTE ||
+	    op.type == LogicalOperatorType::LOGICAL_RECURSIVE_CTE) {
 		if (has_correlation) {
-			MarkSubtreeCorrelated(*op->children[1].get());
+			MarkSubtreeCorrelated(*op.children[1].get());
 		}
 	}
 	return has_correlation;
@@ -71,7 +70,7 @@ bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator *op, boo
 
 bool FlattenDependentJoins::MarkSubtreeCorrelated(LogicalOperator &op) {
 	// Do not mark base table scans as correlated
-	auto entry = has_correlated_expressions.find(&op);
+	auto entry = has_correlated_expressions.find(op);
 	D_ASSERT(entry != has_correlated_expressions.end());
 	bool has_correlation = entry->second;
 	for (auto &child : op.children) {
@@ -79,10 +78,10 @@ bool FlattenDependentJoins::MarkSubtreeCorrelated(LogicalOperator &op) {
 	}
 	if (op.type != LogicalOperatorType::LOGICAL_GET || op.children.size() == 1) {
 		if (op.type == LogicalOperatorType::LOGICAL_CTE_REF) {
-			has_correlated_expressions[&op] = true;
+			has_correlated_expressions[op] = true;
 			return true;
 		} else {
-			has_correlated_expressions[&op] = has_correlation;
+			has_correlated_expressions[op] = has_correlation;
 		}
 	}
 	return has_correlation;
@@ -99,17 +98,17 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoin(unique_
 	return result;
 }
 
-bool SubqueryDependentFilter(Expression *expr) {
-	if (expr->expression_class == ExpressionClass::BOUND_CONJUNCTION &&
-	    expr->GetExpressionType() == ExpressionType::CONJUNCTION_AND) {
-		auto &bound_conjuction = expr->Cast<BoundConjunctionExpression>();
+bool SubqueryDependentFilter(Expression &expr) {
+	if (expr.expression_class == ExpressionClass::BOUND_CONJUNCTION &&
+	    expr.GetExpressionType() == ExpressionType::CONJUNCTION_AND) {
+		auto &bound_conjuction = expr.Cast<BoundConjunctionExpression>();
 		for (auto &child : bound_conjuction.children) {
-			if (SubqueryDependentFilter(child.get())) {
+			if (SubqueryDependentFilter(*child)) {
 				return true;
 			}
 		}
 	}
-	if (expr->expression_class == ExpressionClass::BOUND_SUBQUERY) {
+	if (expr.expression_class == ExpressionClass::BOUND_SUBQUERY) {
 		return true;
 	}
 	return false;
@@ -119,7 +118,7 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
                                                                                  bool &parent_propagate_null_values,
                                                                                  idx_t lateral_depth) {
 	// first check if the logical operator has correlated expressions
-	auto entry = has_correlated_expressions.find(plan.get());
+	auto entry = has_correlated_expressions.find(*plan);
 	D_ASSERT(entry != has_correlated_expressions.end());
 	if (!entry->second) {
 		// we reached a node without correlated expressions
@@ -151,7 +150,7 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
 		// filter
 		// first we flatten the dependent join in the child of the filter
 		for (auto &expr : plan->expressions) {
-			any_join |= SubqueryDependentFilter(expr.get());
+			any_join |= SubqueryDependentFilter(*expr);
 		}
 		plan->children[0] =
 		    PushDownDependentJoinInternal(std::move(plan->children[0]), parent_propagate_null_values, lateral_depth);
@@ -288,8 +287,8 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
 	case LogicalOperatorType::LOGICAL_CROSS_PRODUCT: {
 		// cross product
 		// push into both sides of the plan
-		bool left_has_correlation = has_correlated_expressions.find(plan->children[0].get())->second;
-		bool right_has_correlation = has_correlated_expressions.find(plan->children[1].get())->second;
+		bool left_has_correlation = has_correlated_expressions.find(*plan->children[0])->second;
+		bool right_has_correlation = has_correlated_expressions.find(*plan->children[1])->second;
 		if (!right_has_correlation) {
 			// only left has correlation: push into left
 			plan->children[0] = PushDownDependentJoinInternal(std::move(plan->children[0]),
@@ -350,8 +349,8 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
 		auto &join = plan->Cast<LogicalJoin>();
 		D_ASSERT(plan->children.size() == 2);
 		// check the correlated expressions in the children of the join
-		bool left_has_correlation = has_correlated_expressions.find(plan->children[0].get())->second;
-		bool right_has_correlation = has_correlated_expressions.find(plan->children[1].get())->second;
+		bool left_has_correlation = has_correlated_expressions.find(*plan->children[0])->second;
+		bool right_has_correlation = has_correlated_expressions.find(*plan->children[1])->second;
 
 		if (join.join_type == JoinType::INNER) {
 			// inner join
@@ -433,12 +432,12 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
 				auto &comparison_join = join.Cast<LogicalComparisonJoin>();
 				comparison_join.conditions.push_back(std::move(cond));
 			} else {
-				auto &any_join = join.Cast<LogicalAnyJoin>();
+				auto &logical_any_join = join.Cast<LogicalAnyJoin>();
 				auto comparison = make_uniq<BoundComparisonExpression>(ExpressionType::COMPARE_NOT_DISTINCT_FROM,
 				                                                       std::move(left), std::move(right));
 				auto conjunction = make_uniq<BoundConjunctionExpression>(
-				    ExpressionType::CONJUNCTION_AND, std::move(comparison), std::move(any_join.condition));
-				any_join.condition = std::move(conjunction);
+				    ExpressionType::CONJUNCTION_AND, std::move(comparison), std::move(logical_any_join.condition));
+				logical_any_join.condition = std::move(conjunction);
 			}
 		}
 		// then we replace any correlated expressions with the corresponding entry in the correlated_map

From 3516894d197ef49604e5337da2d28144f355770f Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 20:06:21 +0200
Subject: [PATCH 139/147] For ExpressionDepthReducerRecursive - correctly
 recurse into nested subqueries

---
 .../expression_binder/lateral_binder.cpp      | 16 +++++------
 .../subquery/lateral/lateral_fuzzer_1463.test | 28 +++++++++++++++++++
 2 files changed, 36 insertions(+), 8 deletions(-)
 create mode 100644 test/sql/subquery/lateral/lateral_fuzzer_1463.test

diff --git a/src/planner/expression_binder/lateral_binder.cpp b/src/planner/expression_binder/lateral_binder.cpp
index 13e3ae59980a..e58d78afd0b7 100644
--- a/src/planner/expression_binder/lateral_binder.cpp
+++ b/src/planner/expression_binder/lateral_binder.cpp
@@ -81,11 +81,6 @@ static void ReduceColumnDepth(vector<CorrelatedColumnInfo> &columns,
 	}
 }
 
-static void ReduceExpressionSubquery(BoundSubqueryExpression &expr,
-                                     const vector<CorrelatedColumnInfo> &correlated_columns) {
-	ReduceColumnDepth(expr.binder->correlated_columns, correlated_columns);
-}
-
 class ExpressionDepthReducerRecursive : public BoundNodeVisitor {
 public:
 	explicit ExpressionDepthReducerRecursive(const vector<CorrelatedColumnInfo> &correlated)
@@ -111,6 +106,13 @@ class ExpressionDepthReducerRecursive : public BoundNodeVisitor {
 		BoundNodeVisitor::VisitBoundTableRef(ref);
 	}
 
+	static void ReduceExpressionSubquery(BoundSubqueryExpression &expr,
+										 const vector<CorrelatedColumnInfo> &correlated_columns) {
+		ReduceColumnDepth(expr.binder->correlated_columns, correlated_columns);
+		ExpressionDepthReducerRecursive recursive(correlated_columns);
+		recursive.VisitBoundQueryNode(*expr.subquery);
+	}
+
 private:
 	const vector<CorrelatedColumnInfo> &correlated_columns;
 };
@@ -127,9 +129,7 @@ class ExpressionDepthReducer : public LogicalOperatorVisitor {
 	}
 
 	unique_ptr<Expression> VisitReplace(BoundSubqueryExpression &expr, unique_ptr<Expression> *expr_ptr) override {
-		ReduceExpressionSubquery(expr, correlated_columns);
-		ExpressionDepthReducerRecursive recursive(correlated_columns);
-		recursive.VisitBoundQueryNode(*expr.subquery);
+		ExpressionDepthReducerRecursive::ReduceExpressionSubquery(expr, correlated_columns);
 		return nullptr;
 	}
 
diff --git a/test/sql/subquery/lateral/lateral_fuzzer_1463.test b/test/sql/subquery/lateral/lateral_fuzzer_1463.test
new file mode 100644
index 000000000000..003bfd424411
--- /dev/null
+++ b/test/sql/subquery/lateral/lateral_fuzzer_1463.test
@@ -0,0 +1,28 @@
+# name: test/sql/subquery/lateral/lateral_fuzzer_1463.test
+# description: Test case for fuzzer issue 1463: Expression with depth > 1 detected in non-lateral join
+# group: [lateral]
+
+query II
+SELECT *
+FROM
+  (SELECT 42 AS c1) AS ref,
+  (SELECT a + b + 1
+   FROM
+     (SELECT 1) t1(a),
+     (SELECT (SELECT (SELECT ref.c1 + 1)) + 1) t2(b)
+   )
+;
+----
+42	46
+
+# postgres compatible variant
+query I
+SELECT NULL
+FROM
+  (SELECT 42 AS c1) AS ref,
+  LATERAL (SELECT NULL
+   FROM
+     (SELECT NULL) AS r2,
+     (SELECT (SELECT (SELECT ref.c1))) AS r3) AS r4;
+----
+NULL

From 08ac807ee9d39a1179a195060f4cf06023d5d704 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 20:08:22 +0200
Subject: [PATCH 140/147] Fix reduce SQL test

---
 test/sqlsmith/sql_reduce.test | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/test/sqlsmith/sql_reduce.test b/test/sqlsmith/sql_reduce.test
index 73ee30559b1c..7b956e8db065 100644
--- a/test/sqlsmith/sql_reduce.test
+++ b/test/sqlsmith/sql_reduce.test
@@ -7,18 +7,33 @@ require sqlsmith
 query I
 SELECT * FROM reduce_sql_statement('SELECT a, b FROM tbl') ORDER BY 1
 ----
+SELECT NULL, b FROM tbl
+SELECT NULL, b FROM tbl
 SELECT a FROM tbl
+SELECT a, NULL FROM tbl
+SELECT a, NULL FROM tbl
 SELECT a, b
 SELECT b FROM tbl
 
 query I
 SELECT * FROM reduce_sql_statement('SELECT a, b FROM tbl WHERE a AND b') ORDER BY 1
 ----
+SELECT NULL, b FROM tbl WHERE (a AND b)
+SELECT NULL, b FROM tbl WHERE (a AND b)
 SELECT a FROM tbl WHERE (a AND b)
+SELECT a, NULL FROM tbl WHERE (a AND b)
+SELECT a, NULL FROM tbl WHERE (a AND b)
 SELECT a, b FROM tbl
+SELECT a, b FROM tbl WHERE (NULL AND b)
+SELECT a, b FROM tbl WHERE (NULL AND b)
+SELECT a, b FROM tbl WHERE (a AND NULL)
+SELECT a, b FROM tbl WHERE (a AND NULL)
 SELECT a, b FROM tbl WHERE NULL
+SELECT a, b FROM tbl WHERE NULL
+SELECT a, b FROM tbl WHERE a
 SELECT a, b FROM tbl WHERE a
 SELECT a, b FROM tbl WHERE b
+SELECT a, b FROM tbl WHERE b
 SELECT a, b WHERE (a AND b)
 SELECT b FROM tbl WHERE (a AND b)
 
@@ -29,19 +44,31 @@ INSERT INTO tbl (VALUES (1))
 INSERT INTO tbl (VALUES (2))
 INSERT INTO tbl SELECT *
 INSERT INTO tbl SELECT NULL FROM (VALUES (1, 2))
+INSERT INTO tbl SELECT NULL FROM (VALUES (1, 2)) AS valueslist
 
 query I
 SELECT * FROM reduce_sql_statement('UPDATE tbl SET i=3, j=4 WHERE z=5') ORDER BY 1
 ----
 UPDATE tbl SET i = 3 WHERE (z = 5)
 UPDATE tbl SET i = 3, j = 4
+UPDATE tbl SET i = 3, j = 4 WHERE (NULL = 5)
+UPDATE tbl SET i = 3, j = 4 WHERE 5
 UPDATE tbl SET i = 3, j = 4 WHERE NULL
+UPDATE tbl SET i = 3, j = 4 WHERE z
 UPDATE tbl SET j = 4 WHERE (z = 5)
 
 query I
 SELECT * FROM reduce_sql_statement('DELETE FROM a WHERE i >= 2000 AND i < 5000;') ORDER BY 1
 ----
 DELETE FROM a
+DELETE FROM a WHERE ((NULL >= 2000) AND (i < 5000))
+DELETE FROM a WHERE ((i >= 2000) AND (NULL < 5000))
+DELETE FROM a WHERE ((i >= 2000) AND 5000)
+DELETE FROM a WHERE ((i >= 2000) AND NULL)
+DELETE FROM a WHERE ((i >= 2000) AND i)
+DELETE FROM a WHERE (2000 AND (i < 5000))
+DELETE FROM a WHERE (NULL AND (i < 5000))
 DELETE FROM a WHERE (i < 5000)
 DELETE FROM a WHERE (i >= 2000)
+DELETE FROM a WHERE (i AND (i < 5000))
 DELETE FROM a WHERE NULL

From 1795abd305f3c538ab93d70f7e4c0299c340baf6 Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 20:18:19 +0200
Subject: [PATCH 141/147] Format fix

---
 .../sqlsmith/include/statement_simplifier.hpp |  3 +-
 extension/sqlsmith/statement_simplifier.cpp   | 13 +++++---
 scripts/fuzzer_helper.py                      | 33 +++++++++++++++----
 scripts/reduce_sql.py                         | 32 ++++++++++++++----
 scripts/run_fuzzer.py                         | 29 ++++++++++++----
 .../binder/query_node/plan_subquery.cpp       |  4 +--
 .../expression_binder/lateral_binder.cpp      |  2 +-
 7 files changed, 87 insertions(+), 29 deletions(-)

diff --git a/extension/sqlsmith/include/statement_simplifier.hpp b/extension/sqlsmith/include/statement_simplifier.hpp
index 2bdf2715cd18..9e6fa160ede4 100644
--- a/extension/sqlsmith/include/statement_simplifier.hpp
+++ b/extension/sqlsmith/include/statement_simplifier.hpp
@@ -65,7 +65,8 @@ class StatementSimplifier {
 	void SimplifyExpression(duckdb::unique_ptr<ParsedExpression> &expr);
 	void SimplifyOptionalExpression(duckdb::unique_ptr<ParsedExpression> &expr);
 	void SimplifyChildExpression(duckdb::unique_ptr<ParsedExpression> &expr, unique_ptr<ParsedExpression> &child);
-	void SimplifyExpressionList(duckdb::unique_ptr<ParsedExpression> &expr, vector<unique_ptr<ParsedExpression>> &expression_list);
+	void SimplifyExpressionList(duckdb::unique_ptr<ParsedExpression> &expr,
+	                            vector<unique_ptr<ParsedExpression>> &expression_list);
 	void SimplifyExpressionList(vector<unique_ptr<ParsedExpression>> &expression_list, bool is_optional = true);
 	void Simplify(CommonTableExpressionMap &cte_map);
 
diff --git a/extension/sqlsmith/statement_simplifier.cpp b/extension/sqlsmith/statement_simplifier.cpp
index edfd3437ee70..998d32df9fcc 100644
--- a/extension/sqlsmith/statement_simplifier.cpp
+++ b/extension/sqlsmith/statement_simplifier.cpp
@@ -142,20 +142,23 @@ void StatementSimplifier::Simplify(QueryNode &node) {
 	SimplifyList(node.modifiers);
 }
 
-void StatementSimplifier::SimplifyExpressionList(duckdb::unique_ptr<ParsedExpression> &expr, vector<unique_ptr<ParsedExpression>> &expression_list) {
-	for(auto &child : expression_list) {
+void StatementSimplifier::SimplifyExpressionList(duckdb::unique_ptr<ParsedExpression> &expr,
+                                                 vector<unique_ptr<ParsedExpression>> &expression_list) {
+	for (auto &child : expression_list) {
 		SimplifyChildExpression(expr, child);
 	}
 }
 
-void StatementSimplifier::SimplifyExpressionList(vector<unique_ptr<ParsedExpression>> &expression_list, bool is_optional) {
+void StatementSimplifier::SimplifyExpressionList(vector<unique_ptr<ParsedExpression>> &expression_list,
+                                                 bool is_optional) {
 	SimplifyList(expression_list, is_optional);
-	for(auto &child : expression_list) {
+	for (auto &child : expression_list) {
 		SimplifyExpression(child);
 	}
 }
 
-void StatementSimplifier::SimplifyChildExpression(duckdb::unique_ptr<ParsedExpression> &expr, unique_ptr<ParsedExpression> &child) {
+void StatementSimplifier::SimplifyChildExpression(duckdb::unique_ptr<ParsedExpression> &expr,
+                                                  unique_ptr<ParsedExpression> &child) {
 	if (!child) {
 		return;
 	}
diff --git a/scripts/fuzzer_helper.py b/scripts/fuzzer_helper.py
index d457b3172d49..dd82e0622490 100644
--- a/scripts/fuzzer_helper.py
+++ b/scripts/fuzzer_helper.py
@@ -29,6 +29,7 @@
 footer = '''
 ```'''
 
+
 # github stuff
 def issue_url():
     return 'https://api.github.com/repos/%s/%s/issues' % (REPO_OWNER, REPO_NAME)
@@ -47,20 +48,22 @@ def get_token():
         print("Incorrect length for FUZZEROFDUCKSKEY")
         exit(1)
     return token
+
+
 def create_session():
     # Create an authenticated session to create the issue
     session = requests.Session()
     session.headers.update({'Authorization': 'token %s' % (get_token(),)})
     return session
 
+
 def make_github_issue(title, body):
     if len(title) > 240:
         #  avoid title is too long error (maximum is 256 characters)
         title = title[:240] + '...'
     session = create_session()
     url = issue_url()
-    issue = {'title': title,
-             'body': body}
+    issue = {'title': title, 'body': body}
     r = session.post(url, json.dumps(issue))
     if r.status_code == 201:
         print('Successfully created Issue "%s"' % title)
@@ -69,9 +72,10 @@ def make_github_issue(title, body):
         print('Response:', r.content.decode('utf8'))
         raise Exception("Failed to create issue")
 
+
 def get_github_issues(page):
     session = create_session()
-    url = issue_url()+'?per_page=100&page='+str(page)
+    url = issue_url() + '?per_page=100&page=' + str(page)
     r = session.get(url)
     if r.status_code != 200:
         print('Failed to get list of issues')
@@ -79,6 +83,7 @@ def get_github_issues(page):
         raise Exception("Failed to get list of issues")
     return json.loads(r.content.decode('utf8'))
 
+
 def close_github_issue(number):
     session = create_session()
     url = issue_url() + '/' + str(number)
@@ -91,6 +96,7 @@ def close_github_issue(number):
         print('Response:', r.content.decode('utf8'))
         raise Exception("Failed to close issue")
 
+
 def label_github_issue(number, label):
     session = create_session()
     url = issue_url() + '/' + str(number)
@@ -103,22 +109,26 @@ def label_github_issue(number, label):
         print('Response:', r.content.decode('utf8'))
         raise Exception("Failed to label issue")
 
+
 def extract_issue(body, nr):
     try:
         splits = body.split(middle)
         sql = splits[0].split(header)[1]
-        error = splits[1][:-len(footer)]
+        error = splits[1][: -len(footer)]
         return (sql, error)
     except:
         print(f"Failed to extract SQL/error message from issue {nr}")
         print(body)
         return None
 
+
 def run_shell_command_batch(shell, cmd):
     command = [shell, '--batch', '-init', '/dev/null']
 
     try:
-        res = subprocess.run(command, input=bytearray(cmd, 'utf8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300)
+        res = subprocess.run(
+            command, input=bytearray(cmd, 'utf8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300
+        )
     except subprocess.TimeoutExpired:
         print(f"TIMEOUT... {cmd}")
         return ("", "", 0, True)
@@ -126,6 +136,7 @@ def run_shell_command_batch(shell, cmd):
     stderr = res.stderr.decode('utf8').strip()
     return (stdout, stderr, res.returncode, False)
 
+
 def test_reproducibility(shell, issue, current_errors, perform_check):
     extract = extract_issue(issue['body'], issue['number'])
     labels = issue['labels']
@@ -152,9 +163,10 @@ def test_reproducibility(shell, issue, current_errors, perform_check):
     current_errors[error] = issue
     return True
 
+
 def extract_github_issues(shell, perform_check):
     current_errors = dict()
-    for p in range(1,10):
+    for p in range(1, 10):
         issues = get_github_issues(p)
         for issue in issues:
             # check if the github issue is still reproducible
@@ -164,16 +176,23 @@ def extract_github_issues(shell, perform_check):
                 close_github_issue(int(issue['number']))
     return current_errors
 
+
 def file_issue(cmd, error_msg, fuzzer, seed, hash):
     # issue is new, file it
     print("Filing new issue to Github")
 
     title = error_msg
-    body = fuzzer_desc.replace("${FUZZER}", fuzzer).replace("${FULL_HASH}", hash).replace("${SHORT_HASH}", hash[:5]).replace("${SEED}", str(seed))
+    body = (
+        fuzzer_desc.replace("${FUZZER}", fuzzer)
+        .replace("${FULL_HASH}", hash)
+        .replace("${SHORT_HASH}", hash[:5])
+        .replace("${SEED}", str(seed))
+    )
     body += header + cmd + middle + error_msg + footer
     print(title, body)
     make_github_issue(title, body)
 
+
 def is_internal_error(error):
     if 'differs from original result' in error:
         return True
diff --git a/scripts/reduce_sql.py b/scripts/reduce_sql.py
index 92a449b799b1..9d96bc41fca7 100644
--- a/scripts/reduce_sql.py
+++ b/scripts/reduce_sql.py
@@ -18,6 +18,7 @@
 SELECT * FROM reduce_sql_statement('${QUERY}');
 '''
 
+
 def sanitize_error(err):
     err = re.sub(r'Error: near line \d+: ', '', err)
     err = err.replace(os.getcwd() + '/', '')
@@ -27,6 +28,7 @@ def sanitize_error(err):
         err = 'AddressSanitizer error ' + match
     return err
 
+
 def run_shell_command(shell, cmd):
     command = [shell, '-csv', '--batch', '-init', '/dev/null']
 
@@ -35,6 +37,7 @@ def run_shell_command(shell, cmd):
     stderr = res.stderr.decode('utf8').strip()
     return (stdout, stderr, res.returncode)
 
+
 def get_reduced_sql(shell, sql_query):
     reduce_query = get_reduced_query.replace('${QUERY}', sql_query.replace("'", "''"))
     (stdout, stderr, returncode) = run_shell_command(shell, reduce_query)
@@ -47,6 +50,7 @@ def get_reduced_sql(shell, sql_query):
         reduce_candidates.append(line.strip('"').replace('""', '"'))
     return reduce_candidates[1:]
 
+
 def reduce(sql_query, data_load, shell, error_msg, max_time_seconds=300):
     start = time.time()
     while True:
@@ -73,18 +77,22 @@ def reduce(sql_query, data_load, shell, error_msg, max_time_seconds=300):
             break
     return sql_query
 
+
 def is_ddl_query(query):
     query = query.lower()
     if 'create' in query or 'insert' in query or 'update' in query or 'delete' in query:
         return True
     return False
 
+
 def initial_cleanup(query_log):
     query_log = query_log.replace('SELECT * FROM pragma_version()\n', '')
     return query_log
 
+
 def run_queries_until_crash_mp(queries, result_file):
     import duckdb
+
     con = duckdb.connect()
     sqlite_con = sqlite3.connect(result_file)
     sqlite_con.execute('CREATE TABLE queries(id INT, text VARCHAR)')
@@ -109,7 +117,7 @@ def run_queries_until_crash_mp(queries, result_file):
                 keep_query = True
                 sqlite_con.execute('UPDATE result SET text=?', (exception_error,))
         if not keep_query:
-            sqlite_con.execute('DELETE FROM queries WHERE id=?', (id, ))
+            sqlite_con.execute('DELETE FROM queries WHERE id=?', (id,))
         if is_internal_error:
             # found internal error: no need to try further queries
             break
@@ -120,6 +128,7 @@ def run_queries_until_crash_mp(queries, result_file):
         sqlite_con.commit()
     sqlite_con.close()
 
+
 def run_queries_until_crash(queries):
     sqlite_file = 'cleaned_queries.db'
     if os.path.isfile(sqlite_file):
@@ -147,8 +156,10 @@ def cleanup_irrelevant_queries(query_log):
     queries = [x for x in query_log.split(';\n') if len(x) > 0]
     return run_queries_until_crash(queries)
 
+
 # def reduce_internal(start, sql_query, data_load, queries_final, shell, error_msg, max_time_seconds=300):
 
+
 def reduce_query_log_query(start, shell, queries, query_index, max_time_seconds):
     new_query_list = queries[:]
     sql_query = queries[query_index]
@@ -180,6 +191,7 @@ def reduce_query_log_query(start, shell, queries, query_index, max_time_seconds)
             break
     return sql_query
 
+
 def reduce_query_log(queries, shell, max_time_seconds=300):
     start = time.time()
     current_index = 0
@@ -190,7 +202,7 @@ def reduce_query_log(queries, shell, max_time_seconds=300):
         if current_time - start > max_time_seconds:
             break
         # remove the query at "current_index"
-        new_queries = queries[:current_index] + queries[current_index + 1:]
+        new_queries = queries[:current_index] + queries[current_index + 1 :]
         # try to run the queries and check if we still get the same error
         (new_queries_x, current_error) = run_queries_until_crash(new_queries)
         if current_error is None:
@@ -212,12 +224,19 @@ def reduce_query_log(queries, shell, max_time_seconds=300):
 
 if __name__ == "__main__":
     import argparse
+
     parser = argparse.ArgumentParser(description='Reduce a problematic SQL query')
-    parser.add_argument('--shell', dest='shell', action='store', help='Path to the shell executable', default='build/debug/duckdb')
+    parser.add_argument(
+        '--shell', dest='shell', action='store', help='Path to the shell executable', default='build/debug/duckdb'
+    )
     parser.add_argument('--load', dest='load', action='store', help='Path to the data load script', required=True)
     parser.add_argument('--exec', dest='exec', action='store', help='Path to the executable script', required=True)
-    parser.add_argument('--inplace', dest='inplace', action='store_true', help='If true, overrides the exec script with the final query')
-    parser.add_argument('--max-time', dest='max_time', action='store', help='Maximum time in seconds to run the reducer', default=300)
+    parser.add_argument(
+        '--inplace', dest='inplace', action='store_true', help='If true, overrides the exec script with the final query'
+    )
+    parser.add_argument(
+        '--max-time', dest='max_time', action='store', help='Maximum time in seconds to run the reducer', default=300
+    )
 
     args = parser.parse_args()
     print("Starting reduce process")
@@ -234,7 +253,6 @@ def reduce_query_log(queries, shell, max_time_seconds=300):
     print(expected_error)
     print("===================================================")
 
-
     final_query = reduce(sql_query, data_load, shell, expected_error, args.max_time)
     print("Found final reduced query")
     print("===================================================")
@@ -302,4 +320,4 @@ def reduce_query_log(queries, shell, max_time_seconds=300):
 # limit 88
 # '''
 #
-# print(reduce(sql_query, data_load, shell, error_msg))
\ No newline at end of file
+# print(reduce(sql_query, data_load, shell, error_msg))
diff --git a/scripts/run_fuzzer.py b/scripts/run_fuzzer.py
index 57f7a677b3fd..c7d096f11ed9 100644
--- a/scripts/run_fuzzer.py
+++ b/scripts/run_fuzzer.py
@@ -50,6 +50,7 @@
 
 git_hash = os.getenv('DUCKDB_HASH')
 
+
 def create_db_script(db):
     if db == 'alltypes':
         return 'create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types();'
@@ -60,6 +61,7 @@ def create_db_script(db):
     else:
         raise Exception("Unknown database creation script")
 
+
 def run_fuzzer_script(fuzzer):
     if fuzzer == 'sqlsmith':
         return "call sqlsmith(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');"
@@ -70,6 +72,7 @@ def run_fuzzer_script(fuzzer):
     else:
         raise Exception("Unknown fuzzer type")
 
+
 def get_fuzzer_name(fuzzer):
     if fuzzer == 'sqlsmith':
         return 'SQLSmith'
@@ -80,6 +83,7 @@ def get_fuzzer_name(fuzzer):
     else:
         return 'Unknown'
 
+
 def run_shell_command(cmd):
     command = [shell, '--batch', '-init', '/dev/null']
 
@@ -96,13 +100,21 @@ def run_shell_command(cmd):
 last_query_log_file = 'sqlsmith.log'
 complete_log_file = 'sqlsmith.complete.log'
 
-print(f'''==========================================
+print(
+    f'''==========================================
         RUNNING {fuzzer} on {db}
-==========================================''')
+=========================================='''
+)
 
 load_script = create_db_script(db)
 fuzzer_name = get_fuzzer_name(fuzzer)
-fuzzer = run_fuzzer_script(fuzzer).replace('${MAX_QUERIES}', str(max_queries)).replace('${LAST_LOG_FILE}', last_query_log_file).replace('${COMPLETE_LOG_FILE}', complete_log_file).replace('${SEED}', str(seed))
+fuzzer = (
+    run_fuzzer_script(fuzzer)
+    .replace('${MAX_QUERIES}', str(max_queries))
+    .replace('${LAST_LOG_FILE}', last_query_log_file)
+    .replace('${COMPLETE_LOG_FILE}', complete_log_file)
+    .replace('${SEED}', str(seed))
+)
 
 print(load_script)
 print(fuzzer)
@@ -113,9 +125,11 @@ def run_shell_command(cmd):
 
 (stdout, stderr, returncode) = run_shell_command(cmd)
 
-print(f'''==========================================
+print(
+    f'''==========================================
         FINISHED RUNNING
-==========================================''')
+=========================================='''
+)
 print("==============  STDOUT  ================")
 print(stdout)
 print("==============  STDERR  =================")
@@ -160,7 +174,10 @@ def run_shell_command(cmd):
 # check if this is a duplicate issue
 if error_msg in current_errors:
     print("Skip filing duplicate issue")
-    print("Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/" + str(current_errors[error_msg]['number']))
+    print(
+        "Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/"
+        + str(current_errors[error_msg]['number'])
+    )
     exit(0)
 
 print(last_query)
diff --git a/src/planner/binder/query_node/plan_subquery.cpp b/src/planner/binder/query_node/plan_subquery.cpp
index 29e8f36c3063..3f3aaa92c9aa 100644
--- a/src/planner/binder/query_node/plan_subquery.cpp
+++ b/src/planner/binder/query_node/plan_subquery.cpp
@@ -411,8 +411,8 @@ void Binder::PlanSubqueries(unique_ptr<Expression> &expr_ptr, unique_ptr<Logical
 }
 
 unique_ptr<LogicalOperator> Binder::PlanLateralJoin(unique_ptr<LogicalOperator> left, unique_ptr<LogicalOperator> right,
-                                                    vector<CorrelatedColumnInfo> &correlated,
-                                                    JoinType join_type, unique_ptr<Expression> condition) {
+                                                    vector<CorrelatedColumnInfo> &correlated, JoinType join_type,
+                                                    unique_ptr<Expression> condition) {
 	// scan the right operator for correlated columns
 	// correlated LATERAL JOIN
 	vector<JoinCondition> conditions;
diff --git a/src/planner/expression_binder/lateral_binder.cpp b/src/planner/expression_binder/lateral_binder.cpp
index e58d78afd0b7..21ceb4e50c3f 100644
--- a/src/planner/expression_binder/lateral_binder.cpp
+++ b/src/planner/expression_binder/lateral_binder.cpp
@@ -107,7 +107,7 @@ class ExpressionDepthReducerRecursive : public BoundNodeVisitor {
 	}
 
 	static void ReduceExpressionSubquery(BoundSubqueryExpression &expr,
-										 const vector<CorrelatedColumnInfo> &correlated_columns) {
+	                                     const vector<CorrelatedColumnInfo> &correlated_columns) {
 		ReduceColumnDepth(expr.binder->correlated_columns, correlated_columns);
 		ExpressionDepthReducerRecursive recursive(correlated_columns);
 		recursive.VisitBoundQueryNode(*expr.subquery);

From 0d77ca409657e7a9e25ee8da812169e973b3470a Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Thu, 11 Apr 2024 22:58:23 +0200
Subject: [PATCH 142/147] Fix shell tests

---
 tools/shell/tests/test_shell_basics.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tools/shell/tests/test_shell_basics.py b/tools/shell/tests/test_shell_basics.py
index 41d605017079..7fecae9cd1b0 100644
--- a/tools/shell/tests/test_shell_basics.py
+++ b/tools/shell/tests/test_shell_basics.py
@@ -326,14 +326,6 @@ def test_show_basic(shell):
     result = test.run()
     result.check_stdout("rowseparator")
 
-def test_limit_error(shell):
-    test = (
-        ShellTest(shell)
-        .statement(".limit length 42")
-    )
-    result = test.run()
-    result.check_stderr("sqlite3_limit")
-
 def test_timeout(shell):
     test = (
         ShellTest(shell)
@@ -1039,8 +1031,6 @@ def test_nullbyte_error_rendering(shell):
     result.check_stderr('INT32')
 
 @pytest.mark.parametrize("stmt", [
-	"select decimal_mul(NULL, NULL);",
-	"select decimal_mul(NULL, i) FROM range(3) t(i);",
 	"select sha3(NULL);"
 ])
 def test_sqlite_udf_null(shell, stmt):

From ec6186246b94cc7bfb387c0017f0f7c322a4cbdb Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Thu, 11 Apr 2024 12:10:24 +0200
Subject: [PATCH 143/147] LogicalDelete: Move from dynamic_cast to Cast

---
 src/planner/operator/logical_delete.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/planner/operator/logical_delete.cpp b/src/planner/operator/logical_delete.cpp
index a028a1ea6f36..950f2eaa2ebb 100644
--- a/src/planner/operator/logical_delete.cpp
+++ b/src/planner/operator/logical_delete.cpp
@@ -14,7 +14,7 @@ LogicalDelete::LogicalDelete(TableCatalogEntry &table, idx_t table_index)
 LogicalDelete::LogicalDelete(ClientContext &context, const unique_ptr<CreateInfo> &table_info)
     : LogicalOperator(LogicalOperatorType::LOGICAL_DELETE),
       table(Catalog::GetEntry<TableCatalogEntry>(context, table_info->catalog, table_info->schema,
-                                                 dynamic_cast<CreateTableInfo &>(*table_info).table)) {
+                                                 table_info->Cast<CreateTableInfo>().table)) {
 }
 
 idx_t LogicalDelete::EstimateCardinality(ClientContext &context) {

From aac4b382daea9a93baa7af23bf0b4b019c7f4c62 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Thu, 11 Apr 2024 12:11:01 +0200
Subject: [PATCH 144/147] LogicalUpdate: Move from dynamic_cast to Cast

---
 src/planner/operator/logical_update.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/planner/operator/logical_update.cpp b/src/planner/operator/logical_update.cpp
index e66dd36d1a9c..edcfd5d891be 100644
--- a/src/planner/operator/logical_update.cpp
+++ b/src/planner/operator/logical_update.cpp
@@ -12,7 +12,7 @@ LogicalUpdate::LogicalUpdate(TableCatalogEntry &table)
 LogicalUpdate::LogicalUpdate(ClientContext &context, const unique_ptr<CreateInfo> &table_info)
     : LogicalOperator(LogicalOperatorType::LOGICAL_UPDATE),
       table(Catalog::GetEntry<TableCatalogEntry>(context, table_info->catalog, table_info->schema,
-                                                 dynamic_cast<CreateTableInfo &>(*table_info).table)) {
+                                                 table_info->Cast<CreateTableInfo>().table)) {
 }
 
 idx_t LogicalUpdate::EstimateCardinality(ClientContext &context) {

From 55991db6925f17ea7623b8e5b6a9ef540f8945e4 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Thu, 11 Apr 2024 12:11:32 +0200
Subject: [PATCH 145/147] LogicalInsert: Move from dynamic_cast to Cast

---
 src/planner/operator/logical_insert.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/planner/operator/logical_insert.cpp b/src/planner/operator/logical_insert.cpp
index 3846ed009742..518661616058 100644
--- a/src/planner/operator/logical_insert.cpp
+++ b/src/planner/operator/logical_insert.cpp
@@ -14,7 +14,7 @@ LogicalInsert::LogicalInsert(TableCatalogEntry &table, idx_t table_index)
 LogicalInsert::LogicalInsert(ClientContext &context, const unique_ptr<CreateInfo> table_info)
     : LogicalOperator(LogicalOperatorType::LOGICAL_INSERT),
       table(Catalog::GetEntry<TableCatalogEntry>(context, table_info->catalog, table_info->schema,
-                                                 dynamic_cast<CreateTableInfo &>(*table_info).table)) {
+                                                 table_info->Cast<CreateTableInfo>().table)) {
 }
 
 idx_t LogicalInsert::EstimateCardinality(ClientContext &context) {

From 4c78ff0e4ecf83c70b0d73891fa02b755615b380 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Thu, 11 Apr 2024 16:53:55 +0200
Subject: [PATCH 146/147] Add stricter runtime check on dynamic_check being a
 no-op

---
 src/include/duckdb/common/helper.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/include/duckdb/common/helper.hpp b/src/include/duckdb/common/helper.hpp
index d4c07cc47091..c989983ba013 100644
--- a/src/include/duckdb/common/helper.hpp
+++ b/src/include/duckdb/common/helper.hpp
@@ -217,7 +217,8 @@ bool RefersToSameObject(const T &a, const T &b) {
 template<class T, class SRC>
 void DynamicCastCheck(const SRC *source) {
 #ifndef __APPLE__
-	D_ASSERT(dynamic_cast<const T *>(source));
+	// Actual check is on the fact that dynamic_cast and reinterpret_cast are equivalent
+	D_ASSERT(reinterpret_cast<const T *>(source) == dynamic_cast<const T *>(source));
 #endif
 }
 

From 39200d51d683ceb12414cc061129cb0ef706b9bf Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Thu, 11 Apr 2024 14:44:23 +0200
Subject: [PATCH 147/147] More dynamic_casts mirroring non-const case

---
 src/include/duckdb/catalog/catalog_entry.hpp         |  2 +-
 src/include/duckdb/common/allocator.hpp              |  2 +-
 src/include/duckdb/common/extra_type_info.hpp        |  2 +-
 .../common/types/column/partitioned_column_data.hpp  |  2 +-
 .../common/types/row/partitioned_tuple_data.hpp      |  2 +-
 src/include/duckdb/common/types/vector_buffer.hpp    |  2 +-
 .../duckdb/execution/expression_executor_state.hpp   |  2 +-
 .../duckdb/execution/physical_operator_states.hpp    | 12 ++++++------
 src/include/duckdb/execution/window_executor.hpp     |  2 +-
 src/include/duckdb/execution/window_segment_tree.hpp |  2 +-
 src/include/duckdb/function/cast/default_casts.hpp   |  4 ++--
 src/include/duckdb/function/compression_function.hpp |  8 ++++----
 src/include/duckdb/function/copy_function.hpp        |  6 +++---
 src/include/duckdb/function/function.hpp             |  4 ++--
 src/include/duckdb/function/scalar_function.hpp      |  2 +-
 src/include/duckdb/function/table_function.hpp       |  6 +++---
 src/include/duckdb/main/relation.hpp                 |  2 +-
 src/include/duckdb/parallel/event.hpp                |  2 +-
 .../duckdb/parser/parsed_data/extra_drop_info.hpp    |  2 +-
 src/include/duckdb/parser/parsed_data/parse_info.hpp |  2 +-
 src/include/duckdb/storage/data_pointer.hpp          |  2 +-
 src/include/duckdb/storage/index.hpp                 |  2 +-
 src/include/duckdb/storage/storage_manager.hpp       |  2 +-
 .../duckdb/storage/table/column_checkpoint_state.hpp |  2 +-
 src/include/duckdb/storage/table/scan_state.hpp      |  4 ++--
 src/include/duckdb/transaction/transaction.hpp       |  2 +-
 26 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/src/include/duckdb/catalog/catalog_entry.hpp b/src/include/duckdb/catalog/catalog_entry.hpp
index fd0ed6e222b2..29c46fa10578 100644
--- a/src/include/duckdb/catalog/catalog_entry.hpp
+++ b/src/include/duckdb/catalog/catalog_entry.hpp
@@ -102,7 +102,7 @@ class CatalogEntry {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/common/allocator.hpp b/src/include/duckdb/common/allocator.hpp
index 7c82f049aaa7..ac27d267a9db 100644
--- a/src/include/duckdb/common/allocator.hpp
+++ b/src/include/duckdb/common/allocator.hpp
@@ -36,7 +36,7 @@ struct PrivateAllocatorData {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/common/extra_type_info.hpp b/src/include/duckdb/common/extra_type_info.hpp
index 5157293ca50e..8c8f8c0a62a7 100644
--- a/src/include/duckdb/common/extra_type_info.hpp
+++ b/src/include/duckdb/common/extra_type_info.hpp
@@ -50,7 +50,7 @@ struct ExtraTypeInfo {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 
diff --git a/src/include/duckdb/common/types/column/partitioned_column_data.hpp b/src/include/duckdb/common/types/column/partitioned_column_data.hpp
index 70caddbae666..058151cd56e9 100644
--- a/src/include/duckdb/common/types/column/partitioned_column_data.hpp
+++ b/src/include/duckdb/common/types/column/partitioned_column_data.hpp
@@ -117,7 +117,7 @@ class PartitionedColumnData {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp b/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp
index 3a4c7f56179b..6ca89aa7ea3c 100644
--- a/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp
+++ b/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp
@@ -181,7 +181,7 @@ class PartitionedTupleData {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/common/types/vector_buffer.hpp b/src/include/duckdb/common/types/vector_buffer.hpp
index e1d49aacb629..7108b8dbf185 100644
--- a/src/include/duckdb/common/types/vector_buffer.hpp
+++ b/src/include/duckdb/common/types/vector_buffer.hpp
@@ -131,7 +131,7 @@ class VectorBuffer {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/execution/expression_executor_state.hpp b/src/include/duckdb/execution/expression_executor_state.hpp
index c4bbc40c6b6b..c0802db2a308 100644
--- a/src/include/duckdb/execution/expression_executor_state.hpp
+++ b/src/include/duckdb/execution/expression_executor_state.hpp
@@ -46,7 +46,7 @@ struct ExpressionState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/execution/physical_operator_states.hpp b/src/include/duckdb/execution/physical_operator_states.hpp
index b0bb166a975d..621c8124338d 100644
--- a/src/include/duckdb/execution/physical_operator_states.hpp
+++ b/src/include/duckdb/execution/physical_operator_states.hpp
@@ -52,7 +52,7 @@ class OperatorState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -69,7 +69,7 @@ class GlobalOperatorState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -90,7 +90,7 @@ class GlobalSinkState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 
@@ -114,7 +114,7 @@ class LocalSinkState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -135,7 +135,7 @@ class GlobalSourceState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -152,7 +152,7 @@ class LocalSourceState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/execution/window_executor.hpp b/src/include/duckdb/execution/window_executor.hpp
index 9a5051fa5fc0..8d134d05e188 100644
--- a/src/include/duckdb/execution/window_executor.hpp
+++ b/src/include/duckdb/execution/window_executor.hpp
@@ -131,7 +131,7 @@ class WindowExecutorState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/execution/window_segment_tree.hpp b/src/include/duckdb/execution/window_segment_tree.hpp
index 0cfbff2874b7..bcdf87439f12 100644
--- a/src/include/duckdb/execution/window_segment_tree.hpp
+++ b/src/include/duckdb/execution/window_segment_tree.hpp
@@ -31,7 +31,7 @@ class WindowAggregatorState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 
diff --git a/src/include/duckdb/function/cast/default_casts.hpp b/src/include/duckdb/function/cast/default_casts.hpp
index e3d6072d5a2c..5d13c4354f58 100644
--- a/src/include/duckdb/function/cast/default_casts.hpp
+++ b/src/include/duckdb/function/cast/default_casts.hpp
@@ -30,7 +30,7 @@ struct BindCastInfo {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -48,7 +48,7 @@ struct BoundCastData {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/function/compression_function.hpp b/src/include/duckdb/function/compression_function.hpp
index 4095e1826229..e34e2d0c35ef 100644
--- a/src/include/duckdb/function/compression_function.hpp
+++ b/src/include/duckdb/function/compression_function.hpp
@@ -39,7 +39,7 @@ struct AnalyzeState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -55,7 +55,7 @@ struct CompressionState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -76,7 +76,7 @@ struct CompressedSegmentState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -96,7 +96,7 @@ struct CompressionAppendState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/function/copy_function.hpp b/src/include/duckdb/function/copy_function.hpp
index 99a01f8e0ba1..7e3a4808b435 100644
--- a/src/include/duckdb/function/copy_function.hpp
+++ b/src/include/duckdb/function/copy_function.hpp
@@ -30,7 +30,7 @@ struct LocalFunctionData {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -45,7 +45,7 @@ struct GlobalFunctionData {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -60,7 +60,7 @@ struct PreparedBatchData {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/function/function.hpp b/src/include/duckdb/function/function.hpp
index 65a186f0525d..7e89476e2526 100644
--- a/src/include/duckdb/function/function.hpp
+++ b/src/include/duckdb/function/function.hpp
@@ -58,13 +58,13 @@ struct FunctionData {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 	// FIXME: this function should be removed in the future
 	template <class TARGET>
 	TARGET &CastNoConst() const {
-		return const_cast<TARGET &>(reinterpret_cast<const TARGET &>(*this)); // NOLINT: FIXME
+		return const_cast<TARGET &>(Cast<TARGET>()); // NOLINT: FIXME
 	}
 };
 
diff --git a/src/include/duckdb/function/scalar_function.hpp b/src/include/duckdb/function/scalar_function.hpp
index ab65c97a2139..917f09eed2b4 100644
--- a/src/include/duckdb/function/scalar_function.hpp
+++ b/src/include/duckdb/function/scalar_function.hpp
@@ -29,7 +29,7 @@ struct FunctionLocalState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/function/table_function.hpp b/src/include/duckdb/function/table_function.hpp
index 3321d274f902..bd8e176973ee 100644
--- a/src/include/duckdb/function/table_function.hpp
+++ b/src/include/duckdb/function/table_function.hpp
@@ -36,7 +36,7 @@ struct TableFunctionInfo {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -60,7 +60,7 @@ struct GlobalTableFunctionState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -75,7 +75,7 @@ struct LocalTableFunctionState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/main/relation.hpp b/src/include/duckdb/main/relation.hpp
index 7d1798712975..c16cb2a3e829 100644
--- a/src/include/duckdb/main/relation.hpp
+++ b/src/include/duckdb/main/relation.hpp
@@ -185,7 +185,7 @@ class Relation : public std::enable_shared_from_this<Relation> {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/parallel/event.hpp b/src/include/duckdb/parallel/event.hpp
index 89a108d98a98..794d1344f1a6 100644
--- a/src/include/duckdb/parallel/event.hpp
+++ b/src/include/duckdb/parallel/event.hpp
@@ -59,7 +59,7 @@ class Event : public std::enable_shared_from_this<Event> {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 
diff --git a/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp b/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp
index b85c6252359f..2812469deb5c 100644
--- a/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp
+++ b/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp
@@ -38,7 +38,7 @@ struct ExtraDropInfo {
 
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 	virtual unique_ptr<ExtraDropInfo> Copy() const = 0;
diff --git a/src/include/duckdb/parser/parsed_data/parse_info.hpp b/src/include/duckdb/parser/parsed_data/parse_info.hpp
index d547065e9ef7..5d395c6adfcf 100644
--- a/src/include/duckdb/parser/parsed_data/parse_info.hpp
+++ b/src/include/duckdb/parser/parsed_data/parse_info.hpp
@@ -48,7 +48,7 @@ struct ParseInfo {
 
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 
diff --git a/src/include/duckdb/storage/data_pointer.hpp b/src/include/duckdb/storage/data_pointer.hpp
index c0c51df679aa..97752ee5e141 100644
--- a/src/include/duckdb/storage/data_pointer.hpp
+++ b/src/include/duckdb/storage/data_pointer.hpp
@@ -34,7 +34,7 @@ struct ColumnSegmentState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/storage/index.hpp b/src/include/duckdb/storage/index.hpp
index 179735e8c117..f5e89486b28f 100644
--- a/src/include/duckdb/storage/index.hpp
+++ b/src/include/duckdb/storage/index.hpp
@@ -160,7 +160,7 @@ class Index {
 
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/storage/storage_manager.hpp b/src/include/duckdb/storage/storage_manager.hpp
index 91fc96755719..e0c07b6b8903 100644
--- a/src/include/duckdb/storage/storage_manager.hpp
+++ b/src/include/duckdb/storage/storage_manager.hpp
@@ -96,7 +96,7 @@ class StorageManager {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/storage/table/column_checkpoint_state.hpp b/src/include/duckdb/storage/table/column_checkpoint_state.hpp
index 5ac11cf7cc28..1c1a68432375 100644
--- a/src/include/duckdb/storage/table/column_checkpoint_state.hpp
+++ b/src/include/duckdb/storage/table/column_checkpoint_state.hpp
@@ -51,7 +51,7 @@ struct ColumnCheckpointState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/storage/table/scan_state.hpp b/src/include/duckdb/storage/table/scan_state.hpp
index 6c919cfba0ff..7b8160fdc089 100644
--- a/src/include/duckdb/storage/table/scan_state.hpp
+++ b/src/include/duckdb/storage/table/scan_state.hpp
@@ -44,7 +44,7 @@ struct SegmentScanState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
@@ -60,7 +60,7 @@ struct IndexScanState {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };
diff --git a/src/include/duckdb/transaction/transaction.hpp b/src/include/duckdb/transaction/transaction.hpp
index 1c47725c10dd..723c99460c52 100644
--- a/src/include/duckdb/transaction/transaction.hpp
+++ b/src/include/duckdb/transaction/transaction.hpp
@@ -66,7 +66,7 @@ class Transaction {
 	}
 	template <class TARGET>
 	const TARGET &Cast() const {
-		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		DynamicCastCheck<TARGET>(this);
 		return reinterpret_cast<const TARGET &>(*this);
 	}
 };