implemented decodeRawRows

This commit is contained in:
Jonathan Lamothe 2022-04-20 21:21:51 -04:00
parent 63b97649a6
commit 389c206063
2 changed files with 141 additions and 17 deletions

View File

@ -33,7 +33,9 @@ module Data.CSV.Slurp (
) where
import Conduit (ConduitT, await, mapC, yield, (.|))
import Control.Monad.Trans.State (StateT, evalStateT)
import Control.Monad (unless)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify)
import qualified Data.ByteString as BS
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
@ -63,20 +65,141 @@ toBytes = await >>= \case
toBytes
Nothing -> return ()
-- Internal
data DecodeState = DecodeState
{ isQuoted :: Bool
, fields :: [BS.ByteString]
, collected :: BS.ByteString
} deriving (Eq, Show)
type Decoder m = StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
type Modifier = DecodeState -> DecodeState
newDecodeState :: DecodeState
newDecodeState = DecodeState
{ isQuoted = False
, fields = []
, collected = ""
}
decodeLoop
:: Monad m
=> StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
decodeLoop = return ()
-- Decoders
decodeLoop :: Monad m => Decoder m
decodeLoop = lift await >>= \case
Just byte -> case byte of
0x22 -> processQuote
0x2c -> processComma
0x0d -> processCR
0x0a -> processLF
_ -> performAction $ addByte byte
Nothing -> cleanup
processQuote :: Monad m => Decoder m
processQuote = gets isQuoted >>= \case
True -> processQuotedQuote
False -> processUnquotedQuote
processComma :: Monad m => Decoder m
processComma = gets isQuoted >>= performAction . \case
True -> addByte 0x2c
False -> commitField
processCR :: Monad m => Decoder m
processCR = gets isQuoted >>= \case
True -> performAction $ addByte 0xd
False -> endRow
processLF :: Monad m => Decoder m
processLF = gets isQuoted >>= \case
True -> performAction $ addByte 0xa
False -> endRow
processQuotedQuote :: Monad m => Decoder m
processQuotedQuote = lift await >>= \case
Just byte -> case byte of
0x22 -> performAction $ addByte 0x22 -- quote
0x2c -> performAction commitField -- comma
0x0d -> commitRow -- carriage return
0x0a -> commitRow -- line feed
_ -> corruptedField
Nothing -> cleanup
processUnquotedQuote :: Monad m => Decoder m
processUnquotedQuote = gets (BS.null . collected) >>= \case
True -> performAction setQuoted
False -> corruptedField
endRow :: Monad m => Decoder m
endRow = do
s <- get
if null (fields s) && BS.null (collected s)
then decodeLoop
else commitRow
commitRow :: Monad m => Decoder m
commitRow = do
modify commitField
gets fields >>= lift . yield
performAction dropFields
corruptedField :: Monad m => Decoder m
corruptedField = do
modify dropField
ignoreField
ignoreField :: Monad m => Decoder m
ignoreField = lift await >>= \case
Just byte -> case byte of
0x2c -> performAction commitField -- comma
0x0d -> commitRow
_ -> ignoreField
Nothing -> cleanup
cleanup :: Monad m => Decoder m
cleanup = do
gets isQuoted >>= \case
True -> modify $ commitField . dropField
False -> gets (BS.null . collected) >>= \case
True -> return ()
False -> modify commitField
fs <- gets fields
unless (null fs) $
lift $ yield fs
performAction :: Monad m => Modifier -> Decoder m
performAction f = do
modify f
decodeLoop
-- Modifiers
addByte :: Word8 -> Modifier
addByte b s = let
collected' = BS.snoc (collected s) b
in s { collected = collected' }
commitField :: Modifier
commitField s = let
isQuoted' = False
fields' = fields s ++ [collected s]
collected' = ""
in s
{ isQuoted = isQuoted'
, fields = fields'
, collected = collected'
}
dropFields :: Modifier
dropFields s = s { fields = [] }
dropField :: Modifier
dropField s = s
{ isQuoted = False
, collected = ""
}
setQuoted :: Modifier
setQuoted s = s { isQuoted = True }
--jl

View File

@ -25,7 +25,7 @@ module Data.CSV.SlurpSpec (spec) where
import Conduit (runConduit, (.|))
import Data.Char (ord)
import Data.Conduit.List (consume, sourceList)
import Test.Hspec (Spec, context, describe, it, shouldBe, xit)
import Test.Hspec (Spec, context, describe, it, shouldBe)
import Data.CSV.Slurp
@ -43,25 +43,26 @@ decodeRowsSpec = describe "decodeRows" $ mapM_
let
expLen = length expected
resLen = length result
xit ("should have " ++ show expLen ++ " rows") $
it ("should have " ++ show expLen ++ " rows") $
resLen `shouldBe` expLen
mapM_
( \(n, expected', result') -> context ("row " ++ show n) $
xit ("should be " ++ show expected') $
it ("should be " ++ show expected') $
result' `shouldBe` expected'
) $ zip3 [(0::Int)..] expected result
)
-- label, input, expected
[ ( "valid", validIn, validRes )
, ( "invalid", invalidIn, [] )
, ( "empty", [], [] )
[ ( "valid", validIn, validRes )
, ( "invalid", invalidIn, invalidRes )
, ( "empty", [], [] )
]
where
validIn = ["foo,bar\r\n", "baz,quuux\r\n"]
invalidIn = ["\"a"]
validRes = [["foo", "bar"], ["baz", "quux"]]
validIn = ["foo,bar\r\n", "baz,quux\r\n"]
invalidIn = ["\"a"]
validRes = [["foo", "bar"], ["baz", "quux"]]
invalidRes = [[""]]
decodeRawRowsSpec :: Spec
decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
@ -70,12 +71,12 @@ decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
let
expLen = length expected
resLen = length result
xit ("should have " ++ show expLen ++ " rows") $
it ("should have " ++ show expLen ++ " rows") $
resLen `shouldBe` expLen
mapM_
( \(n, expected', result') -> context ("row " ++ show n) $
xit ("should be " ++ show result') $
expected' `shouldBe` result'
it ("should be " ++ show result') $
result' `shouldBe` expected'
) $ zip3 [(0::Int)..] expected result
)