implemented decodeRawRows
This commit is contained in:
parent
63b97649a6
commit
389c206063
|
@ -33,7 +33,9 @@ module Data.CSV.Slurp (
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Conduit (ConduitT, await, mapC, yield, (.|))
|
import Conduit (ConduitT, await, mapC, yield, (.|))
|
||||||
import Control.Monad.Trans.State (StateT, evalStateT)
|
import Control.Monad (unless)
|
||||||
|
import Control.Monad.Trans.Class (lift)
|
||||||
|
import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify)
|
||||||
import qualified Data.ByteString as BS
|
import qualified Data.ByteString as BS
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
@ -63,20 +65,141 @@ toBytes = await >>= \case
|
||||||
toBytes
|
toBytes
|
||||||
Nothing -> return ()
|
Nothing -> return ()
|
||||||
|
|
||||||
|
-- Internal
|
||||||
|
|
||||||
data DecodeState = DecodeState
|
data DecodeState = DecodeState
|
||||||
{ isQuoted :: Bool
|
{ isQuoted :: Bool
|
||||||
|
, fields :: [BS.ByteString]
|
||||||
, collected :: BS.ByteString
|
, collected :: BS.ByteString
|
||||||
} deriving (Eq, Show)
|
} deriving (Eq, Show)
|
||||||
|
|
||||||
|
type Decoder m = StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
|
||||||
|
type Modifier = DecodeState -> DecodeState
|
||||||
|
|
||||||
newDecodeState :: DecodeState
|
newDecodeState :: DecodeState
|
||||||
newDecodeState = DecodeState
|
newDecodeState = DecodeState
|
||||||
{ isQuoted = False
|
{ isQuoted = False
|
||||||
|
, fields = []
|
||||||
, collected = ""
|
, collected = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
decodeLoop
|
-- Decoders
|
||||||
:: Monad m
|
|
||||||
=> StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
|
decodeLoop :: Monad m => Decoder m
|
||||||
decodeLoop = return ()
|
decodeLoop = lift await >>= \case
|
||||||
|
Just byte -> case byte of
|
||||||
|
0x22 -> processQuote
|
||||||
|
0x2c -> processComma
|
||||||
|
0x0d -> processCR
|
||||||
|
0x0a -> processLF
|
||||||
|
_ -> performAction $ addByte byte
|
||||||
|
Nothing -> cleanup
|
||||||
|
|
||||||
|
processQuote :: Monad m => Decoder m
|
||||||
|
processQuote = gets isQuoted >>= \case
|
||||||
|
True -> processQuotedQuote
|
||||||
|
False -> processUnquotedQuote
|
||||||
|
|
||||||
|
processComma :: Monad m => Decoder m
|
||||||
|
processComma = gets isQuoted >>= performAction . \case
|
||||||
|
True -> addByte 0x2c
|
||||||
|
False -> commitField
|
||||||
|
|
||||||
|
processCR :: Monad m => Decoder m
|
||||||
|
processCR = gets isQuoted >>= \case
|
||||||
|
True -> performAction $ addByte 0xd
|
||||||
|
False -> endRow
|
||||||
|
|
||||||
|
processLF :: Monad m => Decoder m
|
||||||
|
processLF = gets isQuoted >>= \case
|
||||||
|
True -> performAction $ addByte 0xa
|
||||||
|
False -> endRow
|
||||||
|
|
||||||
|
processQuotedQuote :: Monad m => Decoder m
|
||||||
|
processQuotedQuote = lift await >>= \case
|
||||||
|
Just byte -> case byte of
|
||||||
|
0x22 -> performAction $ addByte 0x22 -- quote
|
||||||
|
0x2c -> performAction commitField -- comma
|
||||||
|
0x0d -> commitRow -- carriage return
|
||||||
|
0x0a -> commitRow -- line feed
|
||||||
|
_ -> corruptedField
|
||||||
|
Nothing -> cleanup
|
||||||
|
|
||||||
|
processUnquotedQuote :: Monad m => Decoder m
|
||||||
|
processUnquotedQuote = gets (BS.null . collected) >>= \case
|
||||||
|
True -> performAction setQuoted
|
||||||
|
False -> corruptedField
|
||||||
|
|
||||||
|
endRow :: Monad m => Decoder m
|
||||||
|
endRow = do
|
||||||
|
s <- get
|
||||||
|
if null (fields s) && BS.null (collected s)
|
||||||
|
then decodeLoop
|
||||||
|
else commitRow
|
||||||
|
|
||||||
|
commitRow :: Monad m => Decoder m
|
||||||
|
commitRow = do
|
||||||
|
modify commitField
|
||||||
|
gets fields >>= lift . yield
|
||||||
|
performAction dropFields
|
||||||
|
|
||||||
|
corruptedField :: Monad m => Decoder m
|
||||||
|
corruptedField = do
|
||||||
|
modify dropField
|
||||||
|
ignoreField
|
||||||
|
|
||||||
|
ignoreField :: Monad m => Decoder m
|
||||||
|
ignoreField = lift await >>= \case
|
||||||
|
Just byte -> case byte of
|
||||||
|
0x2c -> performAction commitField -- comma
|
||||||
|
0x0d -> commitRow
|
||||||
|
_ -> ignoreField
|
||||||
|
Nothing -> cleanup
|
||||||
|
|
||||||
|
cleanup :: Monad m => Decoder m
|
||||||
|
cleanup = do
|
||||||
|
gets isQuoted >>= \case
|
||||||
|
True -> modify $ commitField . dropField
|
||||||
|
False -> gets (BS.null . collected) >>= \case
|
||||||
|
True -> return ()
|
||||||
|
False -> modify commitField
|
||||||
|
fs <- gets fields
|
||||||
|
unless (null fs) $
|
||||||
|
lift $ yield fs
|
||||||
|
|
||||||
|
performAction :: Monad m => Modifier -> Decoder m
|
||||||
|
performAction f = do
|
||||||
|
modify f
|
||||||
|
decodeLoop
|
||||||
|
|
||||||
|
-- Modifiers
|
||||||
|
|
||||||
|
addByte :: Word8 -> Modifier
|
||||||
|
addByte b s = let
|
||||||
|
collected' = BS.snoc (collected s) b
|
||||||
|
in s { collected = collected' }
|
||||||
|
|
||||||
|
commitField :: Modifier
|
||||||
|
commitField s = let
|
||||||
|
isQuoted' = False
|
||||||
|
fields' = fields s ++ [collected s]
|
||||||
|
collected' = ""
|
||||||
|
in s
|
||||||
|
{ isQuoted = isQuoted'
|
||||||
|
, fields = fields'
|
||||||
|
, collected = collected'
|
||||||
|
}
|
||||||
|
|
||||||
|
dropFields :: Modifier
|
||||||
|
dropFields s = s { fields = [] }
|
||||||
|
|
||||||
|
dropField :: Modifier
|
||||||
|
dropField s = s
|
||||||
|
{ isQuoted = False
|
||||||
|
, collected = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
setQuoted :: Modifier
|
||||||
|
setQuoted s = s { isQuoted = True }
|
||||||
|
|
||||||
--jl
|
--jl
|
||||||
|
|
|
@ -25,7 +25,7 @@ module Data.CSV.SlurpSpec (spec) where
|
||||||
import Conduit (runConduit, (.|))
|
import Conduit (runConduit, (.|))
|
||||||
import Data.Char (ord)
|
import Data.Char (ord)
|
||||||
import Data.Conduit.List (consume, sourceList)
|
import Data.Conduit.List (consume, sourceList)
|
||||||
import Test.Hspec (Spec, context, describe, it, shouldBe, xit)
|
import Test.Hspec (Spec, context, describe, it, shouldBe)
|
||||||
|
|
||||||
import Data.CSV.Slurp
|
import Data.CSV.Slurp
|
||||||
|
|
||||||
|
@ -43,25 +43,26 @@ decodeRowsSpec = describe "decodeRows" $ mapM_
|
||||||
let
|
let
|
||||||
expLen = length expected
|
expLen = length expected
|
||||||
resLen = length result
|
resLen = length result
|
||||||
xit ("should have " ++ show expLen ++ " rows") $
|
it ("should have " ++ show expLen ++ " rows") $
|
||||||
resLen `shouldBe` expLen
|
resLen `shouldBe` expLen
|
||||||
mapM_
|
mapM_
|
||||||
( \(n, expected', result') -> context ("row " ++ show n) $
|
( \(n, expected', result') -> context ("row " ++ show n) $
|
||||||
xit ("should be " ++ show expected') $
|
it ("should be " ++ show expected') $
|
||||||
result' `shouldBe` expected'
|
result' `shouldBe` expected'
|
||||||
) $ zip3 [(0::Int)..] expected result
|
) $ zip3 [(0::Int)..] expected result
|
||||||
)
|
)
|
||||||
|
|
||||||
-- label, input, expected
|
-- label, input, expected
|
||||||
[ ( "valid", validIn, validRes )
|
[ ( "valid", validIn, validRes )
|
||||||
, ( "invalid", invalidIn, [] )
|
, ( "invalid", invalidIn, invalidRes )
|
||||||
, ( "empty", [], [] )
|
, ( "empty", [], [] )
|
||||||
]
|
]
|
||||||
|
|
||||||
where
|
where
|
||||||
validIn = ["foo,bar\r\n", "baz,quuux\r\n"]
|
validIn = ["foo,bar\r\n", "baz,quux\r\n"]
|
||||||
invalidIn = ["\"a"]
|
invalidIn = ["\"a"]
|
||||||
validRes = [["foo", "bar"], ["baz", "quux"]]
|
validRes = [["foo", "bar"], ["baz", "quux"]]
|
||||||
|
invalidRes = [[""]]
|
||||||
|
|
||||||
decodeRawRowsSpec :: Spec
|
decodeRawRowsSpec :: Spec
|
||||||
decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
|
decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
|
||||||
|
@ -70,12 +71,12 @@ decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
|
||||||
let
|
let
|
||||||
expLen = length expected
|
expLen = length expected
|
||||||
resLen = length result
|
resLen = length result
|
||||||
xit ("should have " ++ show expLen ++ " rows") $
|
it ("should have " ++ show expLen ++ " rows") $
|
||||||
resLen `shouldBe` expLen
|
resLen `shouldBe` expLen
|
||||||
mapM_
|
mapM_
|
||||||
( \(n, expected', result') -> context ("row " ++ show n) $
|
( \(n, expected', result') -> context ("row " ++ show n) $
|
||||||
xit ("should be " ++ show result') $
|
it ("should be " ++ show result') $
|
||||||
expected' `shouldBe` result'
|
result' `shouldBe` expected'
|
||||||
) $ zip3 [(0::Int)..] expected result
|
) $ zip3 [(0::Int)..] expected result
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user