Skip to content

Commit 071a302

Browse files
committed
feat: add BOM option to csv_agg
Support for BOM, requested on PostgREST/postgrest#1371 (comment). ```sql select csv_agg(x, ',', true) from projects x; csv_agg ------------------- id,name,client_id+ 1,Windows 7,1 + 2,Windows 10,1 + 3,IOS,2 + 4,OSX,2 + 5,Orphan, (1 row) ``` Also possible to do with [media type handlers](https://docs.postgrest.org/en/v12/references/api/media_type_handlers.html#overriding-a-builtin-handler), but it's more convenient to have it builtin.
1 parent 4bc9779 commit 071a302

File tree

5 files changed

+69
-4
lines changed

5 files changed

+69
-4
lines changed

README.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ select csv_agg(x) from projects x;
3535
(1 row)
3636
```
3737

38+
### Custom Delimiter
39+
3840
It also supports adding a custom delimiter.
3941

4042
```psql
@@ -50,5 +52,22 @@ select csv_agg(x, '|') from projects x;
5052
(1 row)
5153
```
5254

53-
> [!IMPORTANT]
55+
> [!NOTE]
5456
> Newline, carriage return and double quotes are not supported as delimiters to maintain the integrity of the separated values format.
57+
58+
### BOM
59+
60+
You can include a byte-order mark (BOM) to make the CSV compatible with Excel.
61+
62+
```psql
63+
select csv_agg(x, ',', true) from projects x;
64+
csv_agg
65+
-------------------
66+
id,name,client_id+
67+
1,Windows 7,1 +
68+
2,Windows 10,1 +
69+
3,IOS,2 +
70+
4,OSX,2 +
71+
5,Orphan,
72+
(1 row)
73+
```

sql/pg_csv.sql

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,15 @@ create aggregate csv_agg(anyelement) (
2626
finalfunc = csv_agg_finalfn,
2727
parallel = safe
2828
);
29+
30+
CREATE FUNCTION csv_agg_transfn(internal, anyelement, "char", boolean)
31+
returns internal
32+
language c
33+
as 'pg_csv';
34+
35+
CREATE AGGREGATE csv_agg(anyelement, "char", boolean) (
36+
sfunc = csv_agg_transfn,
37+
stype = internal,
38+
finalfunc = csv_agg_finalfn,
39+
parallel = safe
40+
);

src/pg_csv.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ PG_MODULE_MAGIC;
55
static const char NEWLINE = '\n';
66
static const char DQUOTE = '"';
77
static const char CR = '\r';
8+
static const char BOM[3] = "\xEF\xBB\xBF";
89

910
typedef struct {
1011
StringInfoData accum_buf;
@@ -50,9 +51,10 @@ static char *datum_to_cstring(Datum datum, Oid typeoid) {
5051

5152
PG_FUNCTION_INFO_V1(csv_agg_transfn);
5253
Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
53-
CsvAggState *state = !PG_ARGISNULL(0) ? (CsvAggState *)PG_GETARG_POINTER(0) : NULL;
54-
HeapTupleHeader next = !PG_ARGISNULL(1) ? PG_GETARG_HEAPTUPLEHEADER(1) : NULL;
55-
char delim = PG_NARGS() >= 3 && !PG_ARGISNULL(2) ? PG_GETARG_CHAR(2) : ',';
54+
CsvAggState *state = !PG_ARGISNULL(0) ? (CsvAggState *)PG_GETARG_POINTER(0) : NULL;
55+
HeapTupleHeader next = !PG_ARGISNULL(1) ? PG_GETARG_HEAPTUPLEHEADER(1) : NULL;
56+
char delim = PG_NARGS() >= 3 && !PG_ARGISNULL(2) ? PG_GETARG_CHAR(2) : ',';
57+
bool with_bom = PG_NARGS() >= 4 && !PG_ARGISNULL(3) ? PG_GETARG_BOOL(3) : false;
5658

5759
// first call when the accumulator is NULL
5860
// pretty standard stuff, for example see the jsonb_agg transition function https://github.com/postgres/postgres/blob/3c4e26a62c31ebe296e3aedb13ac51a7a35103bd/src/backend/utils/adt/jsonb.c#L1521
@@ -80,6 +82,8 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
8082
if (!state->header_done) {
8183
TupleDesc tdesc = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(next), HeapTupleHeaderGetTypMod(next));
8284

85+
if (with_bom) appendBinaryStringInfo(&state->accum_buf, BOM, sizeof(BOM));
86+
8387
// build header row
8488
for (int i = 0; i < tdesc->natts; i++) {
8589
Form_pg_attribute att = TupleDescAttr(tdesc, i);

test/expected/bom.out

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
-- this is done to avoid failing on a pure psql change that happened on postgres 16
2+
-- on pg <= 15 the BOM output adds one extra space, on pg 16 it doesn't
3+
\pset format unaligned
4+
\pset tuples_only on
5+
\echo
6+
7+
-- include BOM (byte-order mark)
8+
SELECT csv_agg(x, ';', true) AS body
9+
FROM projects x;
10+
id;name;client_id
11+
1;Windows 7;1
12+
2;has,comma;1
13+
;;
14+
4;OSX;2
15+
;"has""quote";
16+
5;"has,comma and ""quote""";7
17+
6;"has
18+
LF";7
19+
7;"has CR";8
20+
8;"has
21+
CRLF""";8

test/sql/bom.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
-- this is done to avoid failing on a pure psql change that happened on postgres 16
2+
-- on pg <= 15 the BOM output adds one extra space, on pg 16 it doesn't
3+
\pset format unaligned
4+
\pset tuples_only on
5+
\echo
6+
7+
-- include BOM (byte-order mark)
8+
SELECT csv_agg(x, ';', true) AS body
9+
FROM projects x;

0 commit comments

Comments
 (0)