From 76cea1e0510cef41b8634b8f69e3b5509733b81a Mon Sep 17 00:00:00 2001 From: Jonathan Lamothe Date: Thu, 21 Apr 2022 15:39:07 -0400 Subject: [PATCH] implemented labelFields --- csv-sip.cabal | 2 ++ package.yaml | 1 + src/Data/CSV/Sip.hs | 17 ++++++++++++++ test/Data/CSV/SipSpec.hs | 51 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) diff --git a/csv-sip.cabal b/csv-sip.cabal index 24f8dcf..17d2832 100644 --- a/csv-sip.cabal +++ b/csv-sip.cabal @@ -31,6 +31,7 @@ library base >=4.7 && <5 , bytestring , conduit >=1.3.4.2 && <1.4 + , containers , text , transformers default-language: Haskell2010 @@ -49,6 +50,7 @@ test-suite csv-sip-test base >=4.7 && <5 , bytestring , conduit >=1.3.4.2 && <1.4 + , containers , csv-sip , hspec >=2.8.5 && <2.9 , text diff --git a/package.yaml b/package.yaml index 1629bdf..8ec1ac4 100644 --- a/package.yaml +++ b/package.yaml @@ -25,6 +25,7 @@ dependencies: - base >= 4.7 && < 5 - bytestring - conduit >= 1.3.4.2 && < 1.4 +- containers - text - transformers diff --git a/src/Data/CSV/Sip.hs b/src/Data/CSV/Sip.hs index 8b6d7ed..976ff4f 100644 --- a/src/Data/CSV/Sip.hs +++ b/src/Data/CSV/Sip.hs @@ -26,6 +26,7 @@ along with this program. If not, see . {-# LANGUAGE LambdaCase, OverloadedStrings #-} module Data.CSV.Sip ( + labelFields, decodeRows, decodeRawRows, decodeUTF8, @@ -37,11 +38,18 @@ import Control.Monad (unless) import Control.Monad.Trans.Class (lift) import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify) import qualified Data.ByteString as BS +import qualified Data.Map as M import Data.Maybe (fromMaybe) import qualified Data.Text as T import Data.Text.Encoding (decodeUtf8') import Data.Word (Word8) +-- | read a CSV stream, using the first row as a header containing field labels +labelFields :: (Monad m, Ord a) => ConduitT [a] (M.Map a a) m () +labelFields = await >>= \case + Just headers -> labelLoop headers + Nothing -> return () + -- | decode the rows from a stream of ByteStrings decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m () decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8) @@ -83,6 +91,15 @@ newDecodeState = DecodeState , collected = "" } +-- Conduits + +labelLoop :: (Monad m, Ord a) => [a] -> ConduitT [a] (M.Map a a) m () +labelLoop headers = await >>= \case + Just values -> do + yield $ M.fromList $ zip headers values + labelLoop headers + Nothing -> return () + -- Decoders decodeLoop :: Monad m => Decoder m diff --git a/test/Data/CSV/SipSpec.hs b/test/Data/CSV/SipSpec.hs index a907cb1..0268f90 100644 --- a/test/Data/CSV/SipSpec.hs +++ b/test/Data/CSV/SipSpec.hs @@ -25,17 +25,68 @@ module Data.CSV.SipSpec (spec) where import Conduit (runConduit, (.|)) import Data.Char (ord) import Data.Conduit.List (consume, sourceList) +import qualified Data.Map as M import Test.Hspec (Spec, context, describe, it, shouldBe) import Data.CSV.Sip spec :: Spec spec = describe "Data.CSV.Sip" $ do + labelFieldsSpec decodeRowsSpec decodeRawRowsSpec decodeUTF8Spec toBytesSpec +labelFieldsSpec :: Spec +labelFieldsSpec = describe "labelFields" $ mapM_ + ( \(label, input, expected) -> context label $ do + result <- runConduit $ sourceList input .| labelFields .| consume + let + expLen = length expected + resLen = length result + it ("should have " ++ show expLen ++ " rows") $ + resLen `shouldBe` expLen + mapM_ + ( \(n, result', expected') -> context ("row " ++ show n) $ + it ("should be " ++ show expected') $ + result' `shouldBe` expected' + ) $ zip3 [(0::Int)..] result expected + ) + + -- label, input, expected + [ ( "empty", [], [] ) + , ( "no body", [headers], [] ) + , ( "with body", withBodyIn, withBodyRes ) + , ( "mixed cols", mixedColsIn, mixedColsRes ) + ] + + where + headers = ["foo", "bar", "baz"] :: [String] + + withBodyIn = headers : + [ ["a", "b", "c"] + , ["d", "e", "f"] + ] :: [[String]] + + mixedColsIn = + [ ["foo", "bar"] + , ["a"] + , ["b", "c"] + , ["d", "e", "f"] + ] :: [[String]] + + withBodyRes = map M.fromList + [ [("foo", "a"), ("bar", "b"), ("baz", "c")] + , [("foo", "d"), ("bar", "e"), ("baz", "f")] + ] :: [M.Map String String] + + mixedColsRes = map M.fromList + [ [("foo", "a")] + , [("foo", "b"), ("bar", "c")] + , [("foo", "d"), ("bar", "e")] + ] :: [M.Map String String] + decodeRowsSpec :: Spec decodeRowsSpec = describe "decodeRows" $ mapM_ ( \(label, input, expected) -> context label $ do