implemented decodeRawRows
This commit is contained in:
parent
63b97649a6
commit
389c206063
|
@ -33,7 +33,9 @@ module Data.CSV.Slurp (
|
|||
) where
|
||||
|
||||
import Conduit (ConduitT, await, mapC, yield, (.|))
|
||||
import Control.Monad.Trans.State (StateT, evalStateT)
|
||||
import Control.Monad (unless)
|
||||
import Control.Monad.Trans.Class (lift)
|
||||
import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify)
|
||||
import qualified Data.ByteString as BS
|
||||
import Data.Maybe (fromMaybe)
|
||||
import qualified Data.Text as T
|
||||
|
@ -63,20 +65,141 @@ toBytes = await >>= \case
|
|||
toBytes
|
||||
Nothing -> return ()
|
||||
|
||||
-- Internal
|
||||
|
||||
data DecodeState = DecodeState
|
||||
{ isQuoted :: Bool
|
||||
, fields :: [BS.ByteString]
|
||||
, collected :: BS.ByteString
|
||||
} deriving (Eq, Show)
|
||||
|
||||
type Decoder m = StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
|
||||
type Modifier = DecodeState -> DecodeState
|
||||
|
||||
newDecodeState :: DecodeState
|
||||
newDecodeState = DecodeState
|
||||
{ isQuoted = False
|
||||
, fields = []
|
||||
, collected = ""
|
||||
}
|
||||
|
||||
decodeLoop
|
||||
:: Monad m
|
||||
=> StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
|
||||
decodeLoop = return ()
|
||||
-- Decoders
|
||||
|
||||
decodeLoop :: Monad m => Decoder m
|
||||
decodeLoop = lift await >>= \case
|
||||
Just byte -> case byte of
|
||||
0x22 -> processQuote
|
||||
0x2c -> processComma
|
||||
0x0d -> processCR
|
||||
0x0a -> processLF
|
||||
_ -> performAction $ addByte byte
|
||||
Nothing -> cleanup
|
||||
|
||||
processQuote :: Monad m => Decoder m
|
||||
processQuote = gets isQuoted >>= \case
|
||||
True -> processQuotedQuote
|
||||
False -> processUnquotedQuote
|
||||
|
||||
processComma :: Monad m => Decoder m
|
||||
processComma = gets isQuoted >>= performAction . \case
|
||||
True -> addByte 0x2c
|
||||
False -> commitField
|
||||
|
||||
processCR :: Monad m => Decoder m
|
||||
processCR = gets isQuoted >>= \case
|
||||
True -> performAction $ addByte 0xd
|
||||
False -> endRow
|
||||
|
||||
processLF :: Monad m => Decoder m
|
||||
processLF = gets isQuoted >>= \case
|
||||
True -> performAction $ addByte 0xa
|
||||
False -> endRow
|
||||
|
||||
processQuotedQuote :: Monad m => Decoder m
|
||||
processQuotedQuote = lift await >>= \case
|
||||
Just byte -> case byte of
|
||||
0x22 -> performAction $ addByte 0x22 -- quote
|
||||
0x2c -> performAction commitField -- comma
|
||||
0x0d -> commitRow -- carriage return
|
||||
0x0a -> commitRow -- line feed
|
||||
_ -> corruptedField
|
||||
Nothing -> cleanup
|
||||
|
||||
processUnquotedQuote :: Monad m => Decoder m
|
||||
processUnquotedQuote = gets (BS.null . collected) >>= \case
|
||||
True -> performAction setQuoted
|
||||
False -> corruptedField
|
||||
|
||||
endRow :: Monad m => Decoder m
|
||||
endRow = do
|
||||
s <- get
|
||||
if null (fields s) && BS.null (collected s)
|
||||
then decodeLoop
|
||||
else commitRow
|
||||
|
||||
commitRow :: Monad m => Decoder m
|
||||
commitRow = do
|
||||
modify commitField
|
||||
gets fields >>= lift . yield
|
||||
performAction dropFields
|
||||
|
||||
corruptedField :: Monad m => Decoder m
|
||||
corruptedField = do
|
||||
modify dropField
|
||||
ignoreField
|
||||
|
||||
ignoreField :: Monad m => Decoder m
|
||||
ignoreField = lift await >>= \case
|
||||
Just byte -> case byte of
|
||||
0x2c -> performAction commitField -- comma
|
||||
0x0d -> commitRow
|
||||
_ -> ignoreField
|
||||
Nothing -> cleanup
|
||||
|
||||
cleanup :: Monad m => Decoder m
|
||||
cleanup = do
|
||||
gets isQuoted >>= \case
|
||||
True -> modify $ commitField . dropField
|
||||
False -> gets (BS.null . collected) >>= \case
|
||||
True -> return ()
|
||||
False -> modify commitField
|
||||
fs <- gets fields
|
||||
unless (null fs) $
|
||||
lift $ yield fs
|
||||
|
||||
performAction :: Monad m => Modifier -> Decoder m
|
||||
performAction f = do
|
||||
modify f
|
||||
decodeLoop
|
||||
|
||||
-- Modifiers
|
||||
|
||||
addByte :: Word8 -> Modifier
|
||||
addByte b s = let
|
||||
collected' = BS.snoc (collected s) b
|
||||
in s { collected = collected' }
|
||||
|
||||
commitField :: Modifier
|
||||
commitField s = let
|
||||
isQuoted' = False
|
||||
fields' = fields s ++ [collected s]
|
||||
collected' = ""
|
||||
in s
|
||||
{ isQuoted = isQuoted'
|
||||
, fields = fields'
|
||||
, collected = collected'
|
||||
}
|
||||
|
||||
dropFields :: Modifier
|
||||
dropFields s = s { fields = [] }
|
||||
|
||||
dropField :: Modifier
|
||||
dropField s = s
|
||||
{ isQuoted = False
|
||||
, collected = ""
|
||||
}
|
||||
|
||||
setQuoted :: Modifier
|
||||
setQuoted s = s { isQuoted = True }
|
||||
|
||||
--jl
|
||||
|
|
|
@ -25,7 +25,7 @@ module Data.CSV.SlurpSpec (spec) where
|
|||
import Conduit (runConduit, (.|))
|
||||
import Data.Char (ord)
|
||||
import Data.Conduit.List (consume, sourceList)
|
||||
import Test.Hspec (Spec, context, describe, it, shouldBe, xit)
|
||||
import Test.Hspec (Spec, context, describe, it, shouldBe)
|
||||
|
||||
import Data.CSV.Slurp
|
||||
|
||||
|
@ -43,25 +43,26 @@ decodeRowsSpec = describe "decodeRows" $ mapM_
|
|||
let
|
||||
expLen = length expected
|
||||
resLen = length result
|
||||
xit ("should have " ++ show expLen ++ " rows") $
|
||||
it ("should have " ++ show expLen ++ " rows") $
|
||||
resLen `shouldBe` expLen
|
||||
mapM_
|
||||
( \(n, expected', result') -> context ("row " ++ show n) $
|
||||
xit ("should be " ++ show expected') $
|
||||
it ("should be " ++ show expected') $
|
||||
result' `shouldBe` expected'
|
||||
) $ zip3 [(0::Int)..] expected result
|
||||
)
|
||||
|
||||
-- label, input, expected
|
||||
[ ( "valid", validIn, validRes )
|
||||
, ( "invalid", invalidIn, [] )
|
||||
, ( "empty", [], [] )
|
||||
[ ( "valid", validIn, validRes )
|
||||
, ( "invalid", invalidIn, invalidRes )
|
||||
, ( "empty", [], [] )
|
||||
]
|
||||
|
||||
where
|
||||
validIn = ["foo,bar\r\n", "baz,quuux\r\n"]
|
||||
invalidIn = ["\"a"]
|
||||
validRes = [["foo", "bar"], ["baz", "quux"]]
|
||||
validIn = ["foo,bar\r\n", "baz,quux\r\n"]
|
||||
invalidIn = ["\"a"]
|
||||
validRes = [["foo", "bar"], ["baz", "quux"]]
|
||||
invalidRes = [[""]]
|
||||
|
||||
decodeRawRowsSpec :: Spec
|
||||
decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
|
||||
|
@ -70,12 +71,12 @@ decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
|
|||
let
|
||||
expLen = length expected
|
||||
resLen = length result
|
||||
xit ("should have " ++ show expLen ++ " rows") $
|
||||
it ("should have " ++ show expLen ++ " rows") $
|
||||
resLen `shouldBe` expLen
|
||||
mapM_
|
||||
( \(n, expected', result') -> context ("row " ++ show n) $
|
||||
xit ("should be " ++ show result') $
|
||||
expected' `shouldBe` result'
|
||||
it ("should be " ++ show result') $
|
||||
result' `shouldBe` expected'
|
||||
) $ zip3 [(0::Int)..] expected result
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user