-
-
Notifications
You must be signed in to change notification settings - Fork 5
/
File.hs
65 lines (60 loc) · 3.1 KB
/
File.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
module File where
import qualified Codec.Compression.GZip as GZip
import Control.Applicative ((<|>))
import Control.Monad (guard, when)
import qualified Data.ByteString.Lazy as ByteString
import qualified Data.ByteString.Lazy.Char8 as Char8
import Data.Char (isSpace)
import System.Exit (exitFailure)
import System.IO
import qualified Option as Option
readFromFile :: Option.Option -> Handle -> IO ([String], [[String]])
readFromFile opts handle = do
contents <- joinMultiLines <$> lines <$>
if Option.gzipped opts
then Char8.unpack <$> GZip.decompress <$> ByteString.hGetContents handle
else hGetContents handle
let (headLine : secondLine : _) = contents ++ [ "", "" ]
let delimiter = guard (Option.tabDelimited opts) *> Just "\t" <|> Option.delimiter opts
when (maybe False ((/=1) . length) delimiter) $ do
hPutStrLn stderr "Invalid delimiter."
exitFailure
let splitter = case delimiter of
Just [c] -> (==) c
_ -> detectSplitter headLine secondLine
let headColumns = splitFixedSize splitter 0 headLine
let size = length headColumns
let columns = if Option.skipHeader opts then headColumns else [ 'c' : show i | i <- [1..size] ]
let skipLine = if Option.skipHeader opts then tail else id
let stripSpaces = if Option.keepLeadingWhiteSpace opts then id else dropWhile isSpace
let body = filter (not . null) $ map (map stripSpaces . splitFixedSize splitter size) (skipLine contents)
return (columns, body)
where joinMultiLines (cs:ds:css) | valid True cs = cs : joinMultiLines (ds:css)
| otherwise = joinMultiLines $ (cs ++ "\n" ++ ds) : css
where valid False ('"':'"':xs) = valid False xs
valid False ('\\':'"':xs) = valid False xs
valid b ('"':xs) = valid (not b) xs
valid b (_:xs) = valid b xs
valid b "" = b
joinMultiLines css = css
detectSplitter :: String -> String -> Char -> Bool
detectSplitter xs ys = head $ [ splitter | (x, y, splitter) <- map splitLines splitters
, 1 < length x && length x <= length y ] ++ splitters
where splitLines f = (splitFixedSize f 0 xs, splitFixedSize f 0 ys, f)
splitters = [ (==','), isSpace ]
splitFixedSize :: (Char -> Bool) -> Int -> String -> [String]
splitFixedSize f n = fill . go n
where go _ "" = []
go k (c:cs@(c':_)) | f c && f c' && not (f ' ' && isSpace c') = "" : go (k - 1) cs
| f c = go k cs
go k ('"':cs) = let (ys, xs) = takeQuotedString cs in xs : go (k - 1) ys
where takeQuotedString ('"':'"':xs) = fmap ('"':) (takeQuotedString xs)
takeQuotedString ('\\':'"':xs) = fmap ('"':) (takeQuotedString xs)
takeQuotedString ('"':xs) = (xs, "")
takeQuotedString (x:xs) = fmap (x:) (takeQuotedString xs)
takeQuotedString "" = ("", "")
go k (c:cs) | f c = go k cs
go 1 cs = [cs]
go k cs = let (xs, ys) = break f cs in xs : go (k - 1) ys
fill [] = []
fill xs = xs ++ replicate (n - length xs) ""