implemented encodeRawRows

This commit is contained in:
Jonathan Lamothe 2022-04-24 14:23:49 -04:00
parent 724dfe0345
commit 5d6a7db6c5
2 changed files with 59 additions and 0 deletions

View File

@ -32,6 +32,9 @@ module Data.CSV.Sip (
slurpLabelledCSV, slurpLabelledCSV,
slurpRawLabelledCSV, slurpRawLabelledCSV,
-- * Conduits -- * Conduits
-- ** Encoding
encodeRawRows,
-- ** Decoding
labelFields, labelFields,
decodeRows, decodeRows,
decodeRawRows, decodeRawRows,
@ -94,6 +97,17 @@ slurpRawLabelledCSV
slurpRawLabelledCSV file = runConduit $ slurpRawLabelledCSV file = runConduit $
sourceFile file .| decodeRawRows .| labelFields .|consume sourceFile file .| decodeRawRows .| labelFields .|consume
-- | encode raw CSV stream row by row, each element in the list read
-- represents a field, with the entire list representing a row
encodeRawRows :: Monad m => ConduitT [BS.ByteString] BS.ByteString m ()
encodeRawRows = await >>= \case
Just fs-> do
encodeFields fs
encodeRawRows
Nothing -> return ()
-- | read a CSV stream, using the first row as a header containing -- | read a CSV stream, using the first row as a header containing
-- field labels -- field labels
labelFields :: (Monad m, Ord a) => ConduitT [a] (M.Map a a) m () labelFields :: (Monad m, Ord a) => ConduitT [a] (M.Map a a) m ()
@ -144,6 +158,16 @@ newDecodeState = DecodeState
-- Conduits -- Conduits
encodeFields
:: Monad m
=> [BS.ByteString]
-> ConduitT [BS.ByteString] BS.ByteString m ()
encodeFields [] = yield "\r\n"
encodeFields [f] = yield $ escapeField f `BS.append` "\r\n"
encodeFields (f:fs) = do
yield $ escapeField f `BS.append` ","
encodeFields fs
labelLoop :: (Monad m, Ord a) => [a] -> ConduitT [a] (M.Map a a) m () labelLoop :: (Monad m, Ord a) => [a] -> ConduitT [a] (M.Map a a) m ()
labelLoop headers = await >>= \case labelLoop headers = await >>= \case
Just values -> do Just values -> do
@ -270,4 +294,19 @@ dropField s = s
setQuoted :: Modifier setQuoted :: Modifier
setQuoted s = s { isQuoted = True } setQuoted s = s { isQuoted = True }
-- Helpers
escapeField :: BS.ByteString -> BS.ByteString
escapeField field = let
bytes = BS.unpack field
in BS.concat
[ "\""
, BS.pack $ escapeLoop bytes
, "\""
]
escapeLoop :: [Word8] -> [Word8]
escapeLoop [] = []
escapeLoop (0x22:bs) = [0x22, 0x22] ++ escapeLoop bs -- escape quote
escapeLoop (b:bs) = b : escapeLoop bs
--jl --jl

View File

@ -23,6 +23,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
module Data.CSV.SipSpec (spec) where module Data.CSV.SipSpec (spec) where
import Conduit (runConduit, (.|)) import Conduit (runConduit, (.|))
import qualified Data.ByteString as BS
import Data.Char (ord) import Data.Char (ord)
import Data.Conduit.List (consume, sourceList) import Data.Conduit.List (consume, sourceList)
import qualified Data.Map as M import qualified Data.Map as M
@ -32,12 +33,31 @@ import Data.CSV.Sip
spec :: Spec spec :: Spec
spec = describe "Data.CSV.Sip" $ do spec = describe "Data.CSV.Sip" $ do
encodeRawRowsSpec
labelFieldsSpec labelFieldsSpec
decodeRowsSpec decodeRowsSpec
decodeRawRowsSpec decodeRawRowsSpec
decodeUTF8Spec decodeUTF8Spec
toBytesSpec toBytesSpec
encodeRawRowsSpec :: Spec
encodeRawRowsSpec = describe "encodeRawRows" $ do
result <- BS.concat <$> runConduit
(sourceList input .| encodeRawRows .| consume)
it ("should be " ++ show expected) $
result `shouldBe` expected
where
input =
[ [ "foo", "a\"b" ]
, [ "a\rb", "a\nb" ]
]
expected = BS.concat
[ "\"foo\",\"a\"\"b\"\r\n"
, "\"a\rb\",\"a\nb\"\r\n"
]
labelFieldsSpec :: Spec labelFieldsSpec :: Spec
labelFieldsSpec = describe "labelFields" $ mapM_ labelFieldsSpec = describe "labelFields" $ mapM_
( \(label, input, expected) -> context label $ do ( \(label, input, expected) -> context label $ do