basic structure for decodeRawRows
This commit is contained in:
parent
eea4710b80
commit
67e85f0a78
|
@ -32,6 +32,7 @@ library
|
||||||
, bytestring
|
, bytestring
|
||||||
, conduit >=1.3.4.2 && <1.4
|
, conduit >=1.3.4.2 && <1.4
|
||||||
, text
|
, text
|
||||||
|
, transformers
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
autogen-modules: Paths_csv_slurp
|
autogen-modules: Paths_csv_slurp
|
||||||
|
|
||||||
|
@ -51,5 +52,6 @@ test-suite csv-slurp-test
|
||||||
, csv-slurp
|
, csv-slurp
|
||||||
, hspec >=2.8.5 && <2.9
|
, hspec >=2.8.5 && <2.9
|
||||||
, text
|
, text
|
||||||
|
, transformers
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
autogen-modules: Paths_csv_slurp
|
autogen-modules: Paths_csv_slurp
|
||||||
|
|
|
@ -26,6 +26,7 @@ dependencies:
|
||||||
- bytestring
|
- bytestring
|
||||||
- conduit >= 1.3.4.2 && < 1.4
|
- conduit >= 1.3.4.2 && < 1.4
|
||||||
- text
|
- text
|
||||||
|
- transformers
|
||||||
|
|
||||||
library:
|
library:
|
||||||
source-dirs: src
|
source-dirs: src
|
||||||
|
|
|
@ -29,13 +29,16 @@ module Data.CSV.Slurp (
|
||||||
decodeRows,
|
decodeRows,
|
||||||
decodeRawRows,
|
decodeRawRows,
|
||||||
decodeUTF8,
|
decodeUTF8,
|
||||||
|
toBytes,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Conduit (ConduitT, mapC, (.|))
|
import Conduit (ConduitT, mapC, (.|))
|
||||||
|
import Control.Monad.Trans.State (StateT, evalStateT)
|
||||||
import qualified Data.ByteString as BS
|
import qualified Data.ByteString as BS
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Data.Text.Encoding (decodeUtf8')
|
import Data.Text.Encoding (decodeUtf8')
|
||||||
|
import Data.Word (Word8)
|
||||||
|
|
||||||
-- | decode the rows from a stream of ByteStrings
|
-- | decode the rows from a stream of ByteStrings
|
||||||
decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
|
decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
|
||||||
|
@ -43,7 +46,7 @@ decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8)
|
||||||
|
|
||||||
-- | decode the rows returning raw ByteStrings instead of text
|
-- | decode the rows returning raw ByteStrings instead of text
|
||||||
decodeRawRows :: Monad m => ConduitT BS.ByteString [BS.ByteString] m ()
|
decodeRawRows :: Monad m => ConduitT BS.ByteString [BS.ByteString] m ()
|
||||||
decodeRawRows = return ()
|
decodeRawRows = toBytes .| evalStateT decodeLoop newDecodeState
|
||||||
|
|
||||||
-- | decode a raw ByteString into Text (if possible)
|
-- | decode a raw ByteString into Text (if possible)
|
||||||
decodeUTF8 :: BS.ByteString -> Maybe T.Text
|
decodeUTF8 :: BS.ByteString -> Maybe T.Text
|
||||||
|
@ -51,4 +54,24 @@ decodeUTF8 bs = case decodeUtf8' bs of
|
||||||
Left _ -> Nothing
|
Left _ -> Nothing
|
||||||
Right txt -> Just txt
|
Right txt -> Just txt
|
||||||
|
|
||||||
|
-- | convert a stream to ByteStrings to a string of bytes
|
||||||
|
toBytes :: Monad m => ConduitT BS.ByteString Word8 m ()
|
||||||
|
toBytes = return ()
|
||||||
|
|
||||||
|
data DecodeState = DecodeState
|
||||||
|
{ isQuoted :: Bool
|
||||||
|
, collected :: BS.ByteString
|
||||||
|
} deriving (Eq, Show)
|
||||||
|
|
||||||
|
newDecodeState :: DecodeState
|
||||||
|
newDecodeState = DecodeState
|
||||||
|
{ isQuoted = False
|
||||||
|
, collected = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
decodeLoop
|
||||||
|
:: Monad m
|
||||||
|
=> StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
|
||||||
|
decodeLoop = return ()
|
||||||
|
|
||||||
--jl
|
--jl
|
||||||
|
|
|
@ -31,6 +31,7 @@ import Data.CSV.Slurp
|
||||||
spec :: Spec
|
spec :: Spec
|
||||||
spec = describe "Data.CSV.Slurp" $ do
|
spec = describe "Data.CSV.Slurp" $ do
|
||||||
decodeRowsSpec
|
decodeRowsSpec
|
||||||
|
decodeRawRowsSpec
|
||||||
decodeUTF8Spec
|
decodeUTF8Spec
|
||||||
|
|
||||||
decodeRowsSpec :: Spec
|
decodeRowsSpec :: Spec
|
||||||
|
@ -60,6 +61,130 @@ decodeRowsSpec = describe "decodeRows" $ mapM_
|
||||||
invalidIn = ["\"a"]
|
invalidIn = ["\"a"]
|
||||||
validRes = [["foo", "bar"], ["baz", "quux"]]
|
validRes = [["foo", "bar"], ["baz", "quux"]]
|
||||||
|
|
||||||
|
decodeRawRowsSpec :: Spec
|
||||||
|
decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
|
||||||
|
( \(label, input, expected) -> context label $ do
|
||||||
|
result <- runConduit $ sourceList input .| decodeRawRows .| consume
|
||||||
|
let
|
||||||
|
expLen = length expected
|
||||||
|
resLen = length result
|
||||||
|
xit ("should have " ++ show expLen ++ " rows") $
|
||||||
|
resLen `shouldBe` expLen
|
||||||
|
mapM_
|
||||||
|
( \(n, expected', result') -> context ("row " ++ show n) $
|
||||||
|
xit ("should be " ++ show result') $
|
||||||
|
expected' `shouldBe` result'
|
||||||
|
) $ zip3 [(0::Int)..] expected result
|
||||||
|
)
|
||||||
|
|
||||||
|
-- label, input, expected
|
||||||
|
[ ( "unquoted", unquotedIn, normalRes )
|
||||||
|
, ( "quoted", quotedIn, normalRes )
|
||||||
|
, ( "mixed", mixedIn, normalRes )
|
||||||
|
, ( "CR only", crOnlyIn, normalRes )
|
||||||
|
, ( "LF only", lfOnlyIn, normalRes )
|
||||||
|
, ( "has quote", quoteIn, quoteRes )
|
||||||
|
, ( "has CR", crIn, crRes )
|
||||||
|
, ( "has LF", lfIn, lfRes )
|
||||||
|
, ( "has CRLF", crlfIn, crlfRes )
|
||||||
|
, ( "odd chunk", oddChunkIn, normalRes )
|
||||||
|
, ( "no newline", noNewlineIn, normalRes )
|
||||||
|
, ( "malformed", malformedIn, malformedRes )
|
||||||
|
]
|
||||||
|
|
||||||
|
where
|
||||||
|
|
||||||
|
unquotedIn =
|
||||||
|
[ "foo,bar\r\n"
|
||||||
|
, "baz,quux\r\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
quotedIn =
|
||||||
|
[ "\"foo\",\"bar\"\r\n"
|
||||||
|
, "\"baz\",\"quux\"\r\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
mixedIn =
|
||||||
|
[ "\"foo\",bar\r\n"
|
||||||
|
, "baz,\"quux\"\r\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
crOnlyIn =
|
||||||
|
[ "foo,bar\r"
|
||||||
|
, "baz,quux\r"
|
||||||
|
]
|
||||||
|
|
||||||
|
lfOnlyIn =
|
||||||
|
[ "foo,bar\n"
|
||||||
|
, "baz,quux\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
quoteIn =
|
||||||
|
[ "\"a\"\"b\",bar\r\n"
|
||||||
|
, "baz,quux\r\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
crIn =
|
||||||
|
[ "\"a\rb\",bar\r\n"
|
||||||
|
, "baz,quux\r\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
lfIn =
|
||||||
|
[ "\"a\nb\",bar\r\n"
|
||||||
|
, "baz,quux\r\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
crlfIn =
|
||||||
|
[ "\"a\r\nb\",bar\r\n"
|
||||||
|
, "baz,quux\r\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
oddChunkIn =
|
||||||
|
[ "foo,"
|
||||||
|
, "bar\r\nbaz,"
|
||||||
|
, "quux\r\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
noNewlineIn =
|
||||||
|
[ "foo,bar\r\n"
|
||||||
|
, "baz,quux"
|
||||||
|
]
|
||||||
|
|
||||||
|
malformedIn =
|
||||||
|
[ "a\"b,bar\r\n"
|
||||||
|
, "baz,quux\r\n"
|
||||||
|
]
|
||||||
|
|
||||||
|
normalRes =
|
||||||
|
[ ["foo", "bar"]
|
||||||
|
, ["baz", "quux"]
|
||||||
|
]
|
||||||
|
|
||||||
|
quoteRes =
|
||||||
|
[ ["a\"b", "bar"]
|
||||||
|
, ["baz", "quux"]
|
||||||
|
]
|
||||||
|
|
||||||
|
crRes =
|
||||||
|
[ ["a\rb", "bar"]
|
||||||
|
, ["baz", "quux"]
|
||||||
|
]
|
||||||
|
|
||||||
|
lfRes =
|
||||||
|
[ ["a\nb", "bar"]
|
||||||
|
, ["baz", "quux"]
|
||||||
|
]
|
||||||
|
|
||||||
|
crlfRes =
|
||||||
|
[ ["a\r\nb", "bar"]
|
||||||
|
, ["baz", "quux"]
|
||||||
|
]
|
||||||
|
|
||||||
|
malformedRes =
|
||||||
|
[ ["", "bar"]
|
||||||
|
, ["baz", "quux"]
|
||||||
|
]
|
||||||
|
|
||||||
decodeUTF8Spec :: Spec
|
decodeUTF8Spec :: Spec
|
||||||
decodeUTF8Spec = describe "decodeUTF8" $ mapM_
|
decodeUTF8Spec = describe "decodeUTF8" $ mapM_
|
||||||
( \(label, input, expected) -> context label $
|
( \(label, input, expected) -> context label $
|
||||||
|
|
Loading…
Reference in New Issue
Block a user