Skip to content

Commit 66ebe21

Browse files
committed
feat: Add gzip decompression to Parquet.
1 parent cc697c3 commit 66ebe21

File tree

2 files changed

+5
-0
lines changed

2 files changed

+5
-0
lines changed

dataframe.cabal

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ library
107107
unordered-containers >= 0.1 && < 1,
108108
vector ^>= 0.13,
109109
vector-algorithms ^>= 0.9,
110+
zlib >= 0.5 && < 1,
110111
zstd >= 0.1.2.0 && < 0.2,
111112
mmap >= 0.5.8 && <= 0.5.9,
112113
parallel >= 3.2.2.0 && < 5
@@ -124,6 +125,7 @@ executable dataframe-benchmark-example
124125
dataframe ^>= 0.3,
125126
random >= 1 && < 2,
126127
time >= 1.12 && < 2,
128+
text,
127129
vector ^>= 0.13,
128130
hs-source-dirs: app
129131
default-language: Haskell2010

src/DataFrame/IO/Parquet/Page.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33

44
module DataFrame.IO.Parquet.Page where
55

6+
import qualified Codec.Compression.GZip as GZip
67
import Codec.Compression.Zstd.Streaming
78
import Data.Bits
89
import qualified Data.ByteString as BSO
10+
import qualified Data.ByteString.Lazy as LB
911
import Data.Int
1012
import Data.Maybe (fromMaybe, listToMaybe)
1113
import Data.Word
@@ -42,6 +44,7 @@ readPage c columnBytes = do
4244
Left e -> error (show e)
4345
Right res -> pure res
4446
UNCOMPRESSED -> pure (BSO.pack compressed)
47+
GZIP -> pure (LB.toStrict (GZip.decompress (LB.pack compressed)))
4548
other -> error ("Unsupported compression type: " ++ show other)
4649
pure
4750
( Just $ Page hdr (BSO.unpack fullData)

0 commit comments

Comments
 (0)