From 5d6a7db6c5f5bee6fef8c34e38b54996d79a72f5 Mon Sep 17 00:00:00 2001 From: Jonathan Lamothe Date: Sun, 24 Apr 2022 14:23:49 -0400 Subject: [PATCH] implemented encodeRawRows --- src/Data/CSV/Sip.hs | 39 +++++++++++++++++++++++++++++++++++++++ test/Data/CSV/SipSpec.hs | 20 ++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/Data/CSV/Sip.hs b/src/Data/CSV/Sip.hs index 7a0895b..c940cd6 100644 --- a/src/Data/CSV/Sip.hs +++ b/src/Data/CSV/Sip.hs @@ -32,6 +32,9 @@ module Data.CSV.Sip ( slurpLabelledCSV, slurpRawLabelledCSV, -- * Conduits + -- ** Encoding + encodeRawRows, + -- ** Decoding labelFields, decodeRows, decodeRawRows, @@ -94,6 +97,17 @@ slurpRawLabelledCSV slurpRawLabelledCSV file = runConduit $ sourceFile file .| decodeRawRows .| labelFields .|consume +-- | encode raw CSV stream row by row, each element in the list read +-- represents a field, with the entire list representing a row +encodeRawRows :: Monad m => ConduitT [BS.ByteString] BS.ByteString m () +encodeRawRows = await >>= \case + + Just fs-> do + encodeFields fs + encodeRawRows + + Nothing -> return () + -- | read a CSV stream, using the first row as a header containing -- field labels labelFields :: (Monad m, Ord a) => ConduitT [a] (M.Map a a) m () @@ -144,6 +158,16 @@ newDecodeState = DecodeState -- Conduits +encodeFields + :: Monad m + => [BS.ByteString] + -> ConduitT [BS.ByteString] BS.ByteString m () +encodeFields [] = yield "\r\n" +encodeFields [f] = yield $ escapeField f `BS.append` "\r\n" +encodeFields (f:fs) = do + yield $ escapeField f `BS.append` "," + encodeFields fs + labelLoop :: (Monad m, Ord a) => [a] -> ConduitT [a] (M.Map a a) m () labelLoop headers = await >>= \case Just values -> do @@ -270,4 +294,19 @@ dropField s = s setQuoted :: Modifier setQuoted s = s { isQuoted = True } +-- Helpers +escapeField :: BS.ByteString -> BS.ByteString +escapeField field = let + bytes = BS.unpack field + in BS.concat + [ "\"" + , BS.pack $ escapeLoop bytes + , "\"" + ] + +escapeLoop :: [Word8] -> [Word8] +escapeLoop [] = [] +escapeLoop (0x22:bs) = [0x22, 0x22] ++ escapeLoop bs -- escape quote +escapeLoop (b:bs) = b : escapeLoop bs + --jl diff --git a/test/Data/CSV/SipSpec.hs b/test/Data/CSV/SipSpec.hs index 0268f90..71c749f 100644 --- a/test/Data/CSV/SipSpec.hs +++ b/test/Data/CSV/SipSpec.hs @@ -23,6 +23,7 @@ along with this program. If not, see . module Data.CSV.SipSpec (spec) where import Conduit (runConduit, (.|)) +import qualified Data.ByteString as BS import Data.Char (ord) import Data.Conduit.List (consume, sourceList) import qualified Data.Map as M @@ -32,12 +33,31 @@ import Data.CSV.Sip spec :: Spec spec = describe "Data.CSV.Sip" $ do + encodeRawRowsSpec labelFieldsSpec decodeRowsSpec decodeRawRowsSpec decodeUTF8Spec toBytesSpec +encodeRawRowsSpec :: Spec +encodeRawRowsSpec = describe "encodeRawRows" $ do + result <- BS.concat <$> runConduit + (sourceList input .| encodeRawRows .| consume) + it ("should be " ++ show expected) $ + result `shouldBe` expected + where + + input = + [ [ "foo", "a\"b" ] + , [ "a\rb", "a\nb" ] + ] + + expected = BS.concat + [ "\"foo\",\"a\"\"b\"\r\n" + , "\"a\rb\",\"a\nb\"\r\n" + ] + labelFieldsSpec :: Spec labelFieldsSpec = describe "labelFields" $ mapM_ ( \(label, input, expected) -> context label $ do