Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
loadtest:
strategy:
matrix:
kind: ['csv_agg', 'csv_agg_delim', 'postgrest']
kind: ['csv_agg', 'csv_agg_delim', 'csv_agg_delim_bom', 'postgrest']
name: Loadtest
runs-on: ubuntu-24.04
steps:
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ else
endif

EXTENSION = pg_csv
EXTVERSION = 0.2
EXTVERSION = 0.3

DATA = $(wildcard sql/*--*.sql)

Expand Down
25 changes: 22 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ select csv_agg(x) from projects x;
(1 row)
```

It also supports adding a custom delimiter.
### Custom Delimiter

You can use a custom delimiter.

```psql
select csv_agg(x, '|') from projects x;
select csv_agg(x, csv_options(delimiter := '|')) from projects x;
csv_agg
-------------------
id|name|client_id+
Expand All @@ -50,5 +52,22 @@ select csv_agg(x, '|') from projects x;
(1 row)
```

> [!IMPORTANT]
> [!NOTE]
> Newline, carriage return and double quotes are not supported as delimiters to maintain the integrity of the separated values format.

### BOM

You can include a byte-order mark (BOM) to make the CSV compatible with Excel.

```psql
select csv_agg(x, csv_options(bom := true)) from projects x;
csv_agg
-------------------
id,name,client_id+
1,Windows 7,1 +
2,Windows 10,1 +
3,IOS,2 +
4,OSX,2 +
5,Orphan,
(1 row)
```
5 changes: 5 additions & 0 deletions bench/csv_agg_delim_bom.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
\set lim random(1000, 2000)

select csv_agg(t, csv_options(delimiter:=',', bom:=true)) from (
select * from student_emotion_assessments limit :lim
) as t;
5 changes: 5 additions & 0 deletions sql/pg_csv--0.2--0.3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
alter type csv_options add attribute bom bool;

create or replace function csv_options(delimiter "char" default NULL, bom bool default NULL) returns csv_options as $$
select row(delimiter, bom)::csv_options;
$$ language sql;
6 changes: 3 additions & 3 deletions sql/pg_csv.sql
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
create type csv_options as (
delimiter "char"
, bom bool
);

create function csv_options(delimiter "char" default ',') returns csv_options as $$
select row(delimiter)::csv_options;
create or replace function csv_options(delimiter "char" default NULL, bom bool default NULL) returns csv_options as $$
select row(delimiter, bom)::csv_options;
$$ language sql;

create function csv_agg_transfn(internal, anyelement)
Expand Down Expand Up @@ -34,4 +35,3 @@ create aggregate csv_agg(anyelement, csv_options) (
finalfunc = csv_agg_finalfn,
parallel = safe
);

15 changes: 12 additions & 3 deletions src/pg_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ PG_MODULE_MAGIC;
static const char NEWLINE = '\n';
static const char DQUOTE = '"';
static const char CR = '\r';
static const char BOM[3] = "\xEF\xBB\xBF";

typedef struct {
char delim;
bool with_bom;
} CsvOptions;

typedef struct {
Expand Down Expand Up @@ -55,15 +57,16 @@ static char *datum_to_cstring(Datum datum, Oid typeoid) {

static void parse_csv_options(HeapTupleHeader opts_hdr, CsvOptions *csv_opts) {
// defaults
csv_opts->delim = ',';
csv_opts->delim = ',';
csv_opts->with_bom = false;

if (opts_hdr == NULL) return;

TupleDesc desc = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(opts_hdr),
HeapTupleHeaderGetTypMod(opts_hdr));

Datum values[1];
bool nulls[1];
Datum values[2];
bool nulls[2];

heap_deform_tuple(
&(HeapTupleData){.t_len = HeapTupleHeaderGetDatumLength(opts_hdr), .t_data = opts_hdr}, desc,
Expand All @@ -77,6 +80,10 @@ static void parse_csv_options(HeapTupleHeader opts_hdr, CsvOptions *csv_opts) {
"double quote")));
}

if (!nulls[1]) {
csv_opts->with_bom = DatumGetBool(values[1]);
}

ReleaseTupleDesc(desc);
}

Expand Down Expand Up @@ -118,6 +125,8 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
TupleDesc tdesc =
lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(next), HeapTupleHeaderGetTypMod(next));

if (state->options->with_bom) appendBinaryStringInfo(&state->accum_buf, BOM, sizeof(BOM));

// build header row
for (int i = 0; i < tdesc->natts; i++) {
Form_pg_attribute att = TupleDescAttr(tdesc, i);
Expand Down
38 changes: 38 additions & 0 deletions test/expected/bom.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
-- this is done to avoid failing on a pure psql change that happened on postgres 16
-- on pg <= 15 the BOM output adds one extra space, on pg 16 it doesn't
\pset format unaligned
\pset tuples_only on
\echo

-- include BOM (byte-order mark)
SELECT csv_agg(x, csv_options(bom := true)) AS body
FROM projects x;
id,name,client_id
1,Windows 7,1
2,"has,comma",1
,,
4,OSX,2
,"has""quote",
5,"has,comma and ""quote""",7
6,"has
LF",7
7,"has CR",8
8,"has
CRLF""",8
\echo

-- include BOM with custom delimiter
SELECT csv_agg(x, csv_options(delimiter := ';', bom := true)) AS body
FROM projects x;
id;name;client_id
1;Windows 7;1
2;has,comma;1
;;
4;OSX;2
;"has""quote";
5;"has,comma and ""quote""";7
6;"has
LF";7
7;"has CR";8
8;"has
CRLF""";8
Expand Down
14 changes: 14 additions & 0 deletions test/sql/bom.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- this is done to avoid failing on a pure psql change that happened on postgres 16
-- on pg <= 15 the BOM output adds one extra space, on pg 16 it doesn't
\pset format unaligned
\pset tuples_only on
\echo

-- include BOM (byte-order mark)
SELECT csv_agg(x, csv_options(bom := true)) AS body
FROM projects x;
\echo

-- include BOM with custom delimiter
SELECT csv_agg(x, csv_options(delimiter := ';', bom := true)) AS body
FROM projects x;
Loading