implemented encodeRawRows

This commit is contained in:
Jonathan Lamothe 2022-04-24 14:23:49 -04:00
parent 724dfe0345
commit 5d6a7db6c5
2 changed files with 59 additions and 0 deletions

View File

@ -32,6 +32,9 @@ module Data.CSV.Sip (
slurpLabelledCSV,
slurpRawLabelledCSV,
-- * Conduits
-- ** Encoding
encodeRawRows,
-- ** Decoding
labelFields,
decodeRows,
decodeRawRows,
@ -94,6 +97,17 @@ slurpRawLabelledCSV
slurpRawLabelledCSV file = runConduit $
sourceFile file .| decodeRawRows .| labelFields .|consume
-- | encode raw CSV stream row by row, each element in the list read
-- represents a field, with the entire list representing a row
encodeRawRows :: Monad m => ConduitT [BS.ByteString] BS.ByteString m ()
encodeRawRows = await >>= \case
Just fs-> do
encodeFields fs
encodeRawRows
Nothing -> return ()
-- | read a CSV stream, using the first row as a header containing
-- field labels
labelFields :: (Monad m, Ord a) => ConduitT [a] (M.Map a a) m ()
@ -144,6 +158,16 @@ newDecodeState = DecodeState
-- Conduits
encodeFields
:: Monad m
=> [BS.ByteString]
-> ConduitT [BS.ByteString] BS.ByteString m ()
encodeFields [] = yield "\r\n"
encodeFields [f] = yield $ escapeField f `BS.append` "\r\n"
encodeFields (f:fs) = do
yield $ escapeField f `BS.append` ","
encodeFields fs
labelLoop :: (Monad m, Ord a) => [a] -> ConduitT [a] (M.Map a a) m ()
labelLoop headers = await >>= \case
Just values -> do
@ -270,4 +294,19 @@ dropField s = s
setQuoted :: Modifier
setQuoted s = s { isQuoted = True }
-- Helpers
escapeField :: BS.ByteString -> BS.ByteString
escapeField field = let
bytes = BS.unpack field
in BS.concat
[ "\""
, BS.pack $ escapeLoop bytes
, "\""
]
escapeLoop :: [Word8] -> [Word8]
escapeLoop [] = []
escapeLoop (0x22:bs) = [0x22, 0x22] ++ escapeLoop bs -- escape quote
escapeLoop (b:bs) = b : escapeLoop bs
--jl

View File

@ -23,6 +23,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
module Data.CSV.SipSpec (spec) where
import Conduit (runConduit, (.|))
import qualified Data.ByteString as BS
import Data.Char (ord)
import Data.Conduit.List (consume, sourceList)
import qualified Data.Map as M
@ -32,12 +33,31 @@ import Data.CSV.Sip
spec :: Spec
spec = describe "Data.CSV.Sip" $ do
encodeRawRowsSpec
labelFieldsSpec
decodeRowsSpec
decodeRawRowsSpec
decodeUTF8Spec
toBytesSpec
encodeRawRowsSpec :: Spec
encodeRawRowsSpec = describe "encodeRawRows" $ do
result <- BS.concat <$> runConduit
(sourceList input .| encodeRawRows .| consume)
it ("should be " ++ show expected) $
result `shouldBe` expected
where
input =
[ [ "foo", "a\"b" ]
, [ "a\rb", "a\nb" ]
]
expected = BS.concat
[ "\"foo\",\"a\"\"b\"\r\n"
, "\"a\rb\",\"a\nb\"\r\n"
]
labelFieldsSpec :: Spec
labelFieldsSpec = describe "labelFields" $ mapM_
( \(label, input, expected) -> context label $ do