2022-04-18 16:29:28 -04:00
|
|
|
{-
|
|
|
|
|
2022-04-21 13:13:42 -04:00
|
|
|
csv-sip
|
2022-04-18 16:29:28 -04:00
|
|
|
Copyright (C) Jonathan Lamothe <jonathan@jlamothe.net>
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or (at
|
|
|
|
your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
-}
|
|
|
|
|
2022-04-19 16:03:14 -04:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
2022-04-21 13:13:42 -04:00
|
|
|
module Data.CSV.SipSpec (spec) where
|
2022-04-18 16:29:28 -04:00
|
|
|
|
2022-04-19 16:03:14 -04:00
|
|
|
import Conduit (runConduit, (.|))
|
2022-04-24 14:23:49 -04:00
|
|
|
import qualified Data.ByteString as BS
|
2022-04-19 20:12:12 -04:00
|
|
|
import Data.Char (ord)
|
2022-04-19 16:03:14 -04:00
|
|
|
import Data.Conduit.List (consume, sourceList)
|
2022-04-21 15:39:07 -04:00
|
|
|
import qualified Data.Map as M
|
2022-04-20 21:21:51 -04:00
|
|
|
import Test.Hspec (Spec, context, describe, it, shouldBe)
|
2022-04-19 16:03:14 -04:00
|
|
|
|
2022-04-21 13:13:42 -04:00
|
|
|
import Data.CSV.Sip
|
2022-04-18 16:29:28 -04:00
|
|
|
|
|
|
|
spec :: Spec
|
2022-04-21 13:13:42 -04:00
|
|
|
spec = describe "Data.CSV.Sip" $ do
|
2022-04-24 16:04:10 -04:00
|
|
|
encodeCSVSpec
|
|
|
|
encodeRawCSVSpec
|
2022-04-24 15:17:52 -04:00
|
|
|
encodeRowsSpec
|
2022-04-24 14:23:49 -04:00
|
|
|
encodeRawRowsSpec
|
2022-04-21 15:39:07 -04:00
|
|
|
labelFieldsSpec
|
2022-04-19 16:03:14 -04:00
|
|
|
decodeRowsSpec
|
2022-04-19 19:33:35 -04:00
|
|
|
decodeRawRowsSpec
|
2022-04-19 16:31:04 -04:00
|
|
|
decodeUTF8Spec
|
2022-04-19 20:12:12 -04:00
|
|
|
toBytesSpec
|
2022-04-19 16:03:14 -04:00
|
|
|
|
2022-04-24 16:04:10 -04:00
|
|
|
encodeCSVSpec :: Spec
|
|
|
|
encodeCSVSpec = describe "encodeCSV" $ do
|
|
|
|
result <- BS.concat <$> runConduit
|
|
|
|
(encodeCSV input .| consume)
|
|
|
|
it ("shouldBe " ++ show expected) $
|
|
|
|
result `shouldBe` expected
|
|
|
|
where
|
|
|
|
|
|
|
|
input =
|
|
|
|
[ [ "foo", "a\"b" ]
|
|
|
|
, [ "a\rb", "a\nb" ]
|
|
|
|
]
|
|
|
|
|
|
|
|
expected = BS.concat
|
|
|
|
[ "\"foo\",\"a\"\"b\"\r\n"
|
|
|
|
, "\"a\rb\",\"a\nb\"\r\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
encodeRawCSVSpec :: Spec
|
|
|
|
encodeRawCSVSpec = describe "encodeRawCSV" $ do
|
|
|
|
result <- BS.concat <$> runConduit
|
|
|
|
(encodeRawCSV input .| consume)
|
|
|
|
it ("shouldBe " ++ show expected) $
|
|
|
|
result `shouldBe` expected
|
|
|
|
where
|
|
|
|
|
|
|
|
input =
|
|
|
|
[ [ "foo", "a\"b" ]
|
|
|
|
, [ "a\rb", "a\nb" ]
|
|
|
|
]
|
|
|
|
|
|
|
|
expected = BS.concat
|
|
|
|
[ "\"foo\",\"a\"\"b\"\r\n"
|
|
|
|
, "\"a\rb\",\"a\nb\"\r\n"
|
|
|
|
]
|
|
|
|
|
2022-04-24 15:17:52 -04:00
|
|
|
encodeRowsSpec :: Spec
|
|
|
|
encodeRowsSpec = describe "encodeRows" $ do
|
|
|
|
result <- BS.concat <$> runConduit
|
|
|
|
(sourceList input .| encodeRows .| consume)
|
|
|
|
it ("shouldBe " ++ show expected) $
|
|
|
|
result `shouldBe` expected
|
|
|
|
where
|
|
|
|
|
|
|
|
input =
|
|
|
|
[ [ "foo", "a\"b" ]
|
|
|
|
, [ "a\rb", "a\nb" ]
|
|
|
|
]
|
|
|
|
|
|
|
|
expected = BS.concat
|
|
|
|
[ "\"foo\",\"a\"\"b\"\r\n"
|
|
|
|
, "\"a\rb\",\"a\nb\"\r\n"
|
|
|
|
]
|
|
|
|
|
2022-04-24 14:23:49 -04:00
|
|
|
encodeRawRowsSpec :: Spec
|
|
|
|
encodeRawRowsSpec = describe "encodeRawRows" $ do
|
|
|
|
result <- BS.concat <$> runConduit
|
|
|
|
(sourceList input .| encodeRawRows .| consume)
|
|
|
|
it ("should be " ++ show expected) $
|
|
|
|
result `shouldBe` expected
|
|
|
|
where
|
|
|
|
|
|
|
|
input =
|
|
|
|
[ [ "foo", "a\"b" ]
|
|
|
|
, [ "a\rb", "a\nb" ]
|
|
|
|
]
|
|
|
|
|
|
|
|
expected = BS.concat
|
|
|
|
[ "\"foo\",\"a\"\"b\"\r\n"
|
|
|
|
, "\"a\rb\",\"a\nb\"\r\n"
|
|
|
|
]
|
|
|
|
|
2022-04-21 15:39:07 -04:00
|
|
|
labelFieldsSpec :: Spec
|
|
|
|
labelFieldsSpec = describe "labelFields" $ mapM_
|
|
|
|
( \(label, input, expected) -> context label $ do
|
|
|
|
result <- runConduit $ sourceList input .| labelFields .| consume
|
|
|
|
let
|
|
|
|
expLen = length expected
|
|
|
|
resLen = length result
|
|
|
|
it ("should have " ++ show expLen ++ " rows") $
|
|
|
|
resLen `shouldBe` expLen
|
|
|
|
mapM_
|
|
|
|
( \(n, result', expected') -> context ("row " ++ show n) $
|
|
|
|
it ("should be " ++ show expected') $
|
|
|
|
result' `shouldBe` expected'
|
|
|
|
) $ zip3 [(0::Int)..] result expected
|
|
|
|
)
|
|
|
|
|
|
|
|
-- label, input, expected
|
|
|
|
[ ( "empty", [], [] )
|
|
|
|
, ( "no body", [headers], [] )
|
|
|
|
, ( "with body", withBodyIn, withBodyRes )
|
|
|
|
, ( "mixed cols", mixedColsIn, mixedColsRes )
|
|
|
|
]
|
|
|
|
|
|
|
|
where
|
|
|
|
headers = ["foo", "bar", "baz"] :: [String]
|
|
|
|
|
|
|
|
withBodyIn = headers :
|
|
|
|
[ ["a", "b", "c"]
|
|
|
|
, ["d", "e", "f"]
|
|
|
|
] :: [[String]]
|
|
|
|
|
|
|
|
mixedColsIn =
|
|
|
|
[ ["foo", "bar"]
|
|
|
|
, ["a"]
|
|
|
|
, ["b", "c"]
|
|
|
|
, ["d", "e", "f"]
|
|
|
|
] :: [[String]]
|
|
|
|
|
|
|
|
withBodyRes = map M.fromList
|
|
|
|
[ [("foo", "a"), ("bar", "b"), ("baz", "c")]
|
|
|
|
, [("foo", "d"), ("bar", "e"), ("baz", "f")]
|
|
|
|
] :: [M.Map String String]
|
|
|
|
|
|
|
|
mixedColsRes = map M.fromList
|
|
|
|
[ [("foo", "a")]
|
|
|
|
, [("foo", "b"), ("bar", "c")]
|
|
|
|
, [("foo", "d"), ("bar", "e")]
|
|
|
|
] :: [M.Map String String]
|
|
|
|
|
2022-04-19 16:03:14 -04:00
|
|
|
decodeRowsSpec :: Spec
|
|
|
|
decodeRowsSpec = describe "decodeRows" $ mapM_
|
|
|
|
( \(label, input, expected) -> context label $ do
|
|
|
|
result <- runConduit $ sourceList input .| decodeRows .| consume
|
|
|
|
let
|
|
|
|
expLen = length expected
|
|
|
|
resLen = length result
|
2022-04-20 21:21:51 -04:00
|
|
|
it ("should have " ++ show expLen ++ " rows") $
|
2022-04-19 16:03:14 -04:00
|
|
|
resLen `shouldBe` expLen
|
|
|
|
mapM_
|
|
|
|
( \(n, expected', result') -> context ("row " ++ show n) $
|
2022-04-20 21:21:51 -04:00
|
|
|
it ("should be " ++ show expected') $
|
2022-04-19 16:03:14 -04:00
|
|
|
result' `shouldBe` expected'
|
|
|
|
) $ zip3 [(0::Int)..] expected result
|
|
|
|
)
|
|
|
|
|
|
|
|
-- label, input, expected
|
2022-04-20 21:21:51 -04:00
|
|
|
[ ( "valid", validIn, validRes )
|
|
|
|
, ( "invalid", invalidIn, invalidRes )
|
|
|
|
, ( "empty", [], [] )
|
2022-04-19 16:03:14 -04:00
|
|
|
]
|
|
|
|
|
|
|
|
where
|
2022-04-20 21:21:51 -04:00
|
|
|
validIn = ["foo,bar\r\n", "baz,quux\r\n"]
|
|
|
|
invalidIn = ["\"a"]
|
|
|
|
validRes = [["foo", "bar"], ["baz", "quux"]]
|
|
|
|
invalidRes = [[""]]
|
2022-04-18 16:29:28 -04:00
|
|
|
|
2022-04-19 19:33:35 -04:00
|
|
|
decodeRawRowsSpec :: Spec
|
|
|
|
decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
|
|
|
|
( \(label, input, expected) -> context label $ do
|
|
|
|
result <- runConduit $ sourceList input .| decodeRawRows .| consume
|
|
|
|
let
|
|
|
|
expLen = length expected
|
|
|
|
resLen = length result
|
2022-04-20 21:21:51 -04:00
|
|
|
it ("should have " ++ show expLen ++ " rows") $
|
2022-04-19 19:33:35 -04:00
|
|
|
resLen `shouldBe` expLen
|
|
|
|
mapM_
|
|
|
|
( \(n, expected', result') -> context ("row " ++ show n) $
|
2022-04-20 21:21:51 -04:00
|
|
|
it ("should be " ++ show result') $
|
|
|
|
result' `shouldBe` expected'
|
2022-04-19 19:33:35 -04:00
|
|
|
) $ zip3 [(0::Int)..] expected result
|
|
|
|
)
|
|
|
|
|
|
|
|
-- label, input, expected
|
|
|
|
[ ( "unquoted", unquotedIn, normalRes )
|
|
|
|
, ( "quoted", quotedIn, normalRes )
|
|
|
|
, ( "mixed", mixedIn, normalRes )
|
|
|
|
, ( "CR only", crOnlyIn, normalRes )
|
|
|
|
, ( "LF only", lfOnlyIn, normalRes )
|
|
|
|
, ( "has quote", quoteIn, quoteRes )
|
|
|
|
, ( "has CR", crIn, crRes )
|
|
|
|
, ( "has LF", lfIn, lfRes )
|
|
|
|
, ( "has CRLF", crlfIn, crlfRes )
|
|
|
|
, ( "odd chunk", oddChunkIn, normalRes )
|
|
|
|
, ( "no newline", noNewlineIn, normalRes )
|
|
|
|
, ( "malformed", malformedIn, malformedRes )
|
2022-04-21 00:02:38 -04:00
|
|
|
, ( "blank end", blankEndIn, blankEndRes )
|
2022-04-19 19:33:35 -04:00
|
|
|
]
|
|
|
|
|
|
|
|
where
|
|
|
|
|
|
|
|
unquotedIn =
|
|
|
|
[ "foo,bar\r\n"
|
|
|
|
, "baz,quux\r\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
quotedIn =
|
|
|
|
[ "\"foo\",\"bar\"\r\n"
|
|
|
|
, "\"baz\",\"quux\"\r\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
mixedIn =
|
|
|
|
[ "\"foo\",bar\r\n"
|
|
|
|
, "baz,\"quux\"\r\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
crOnlyIn =
|
|
|
|
[ "foo,bar\r"
|
|
|
|
, "baz,quux\r"
|
|
|
|
]
|
|
|
|
|
|
|
|
lfOnlyIn =
|
|
|
|
[ "foo,bar\n"
|
|
|
|
, "baz,quux\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
quoteIn =
|
|
|
|
[ "\"a\"\"b\",bar\r\n"
|
|
|
|
, "baz,quux\r\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
crIn =
|
|
|
|
[ "\"a\rb\",bar\r\n"
|
|
|
|
, "baz,quux\r\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
lfIn =
|
|
|
|
[ "\"a\nb\",bar\r\n"
|
|
|
|
, "baz,quux\r\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
crlfIn =
|
|
|
|
[ "\"a\r\nb\",bar\r\n"
|
|
|
|
, "baz,quux\r\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
oddChunkIn =
|
|
|
|
[ "foo,"
|
|
|
|
, "bar\r\nbaz,"
|
|
|
|
, "quux\r\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
noNewlineIn =
|
|
|
|
[ "foo,bar\r\n"
|
|
|
|
, "baz,quux"
|
|
|
|
]
|
|
|
|
|
|
|
|
malformedIn =
|
|
|
|
[ "a\"b,bar\r\n"
|
|
|
|
, "baz,quux\r\n"
|
|
|
|
]
|
|
|
|
|
2022-04-21 00:02:38 -04:00
|
|
|
blankEndIn =
|
|
|
|
[ "foo,bar,\r\n"
|
|
|
|
, "baz,quux\r\n"
|
|
|
|
]
|
|
|
|
|
2022-04-19 19:33:35 -04:00
|
|
|
normalRes =
|
|
|
|
[ ["foo", "bar"]
|
|
|
|
, ["baz", "quux"]
|
|
|
|
]
|
|
|
|
|
|
|
|
quoteRes =
|
|
|
|
[ ["a\"b", "bar"]
|
|
|
|
, ["baz", "quux"]
|
|
|
|
]
|
|
|
|
|
|
|
|
crRes =
|
|
|
|
[ ["a\rb", "bar"]
|
|
|
|
, ["baz", "quux"]
|
|
|
|
]
|
|
|
|
|
|
|
|
lfRes =
|
|
|
|
[ ["a\nb", "bar"]
|
|
|
|
, ["baz", "quux"]
|
|
|
|
]
|
|
|
|
|
|
|
|
crlfRes =
|
|
|
|
[ ["a\r\nb", "bar"]
|
|
|
|
, ["baz", "quux"]
|
|
|
|
]
|
|
|
|
|
|
|
|
malformedRes =
|
|
|
|
[ ["", "bar"]
|
|
|
|
, ["baz", "quux"]
|
|
|
|
]
|
|
|
|
|
2022-04-21 00:02:38 -04:00
|
|
|
blankEndRes =
|
|
|
|
[ ["foo", "bar", ""]
|
|
|
|
, ["baz", "quux"]
|
|
|
|
]
|
|
|
|
|
2022-04-19 16:31:04 -04:00
|
|
|
decodeUTF8Spec :: Spec
|
|
|
|
decodeUTF8Spec = describe "decodeUTF8" $ mapM_
|
|
|
|
( \(label, input, expected) -> context label $
|
|
|
|
it ("should be " ++ show expected) $
|
|
|
|
decodeUTF8 input `shouldBe` expected
|
|
|
|
)
|
|
|
|
|
|
|
|
-- label, input, expected
|
|
|
|
[ ( "plain ASCII", "hello", Just "hello" )
|
|
|
|
, ( "valid UTF8", "\xc3\xa9", Just "é" )
|
|
|
|
, ( "invalid UTF8", "\xff", Nothing )
|
|
|
|
, ( "blank", "", Just "" )
|
|
|
|
]
|
|
|
|
|
2022-04-19 20:12:12 -04:00
|
|
|
toBytesSpec :: Spec
|
|
|
|
toBytesSpec = describe "toBytes" $ let
|
|
|
|
input = ["ab", "cd"]
|
|
|
|
expected = map (fromIntegral . ord) "abcd"
|
|
|
|
in it ("should be " ++ show expected) $ do
|
|
|
|
result <- runConduit $ sourceList input .| toBytes .| consume
|
|
|
|
result `shouldBe` expected
|
|
|
|
|
2022-04-18 16:29:28 -04:00
|
|
|
--jl
|