implemented labelFields

This commit is contained in:
Jonathan Lamothe 2022-04-21 15:39:07 -04:00
parent f27e190be6
commit 76cea1e051
4 changed files with 71 additions and 0 deletions

View File

@ -31,6 +31,7 @@ library
base >=4.7 && <5 base >=4.7 && <5
, bytestring , bytestring
, conduit >=1.3.4.2 && <1.4 , conduit >=1.3.4.2 && <1.4
, containers
, text , text
, transformers , transformers
default-language: Haskell2010 default-language: Haskell2010
@ -49,6 +50,7 @@ test-suite csv-sip-test
base >=4.7 && <5 base >=4.7 && <5
, bytestring , bytestring
, conduit >=1.3.4.2 && <1.4 , conduit >=1.3.4.2 && <1.4
, containers
, csv-sip , csv-sip
, hspec >=2.8.5 && <2.9 , hspec >=2.8.5 && <2.9
, text , text

View File

@ -25,6 +25,7 @@ dependencies:
- base >= 4.7 && < 5 - base >= 4.7 && < 5
- bytestring - bytestring
- conduit >= 1.3.4.2 && < 1.4 - conduit >= 1.3.4.2 && < 1.4
- containers
- text - text
- transformers - transformers

View File

@ -26,6 +26,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
{-# LANGUAGE LambdaCase, OverloadedStrings #-} {-# LANGUAGE LambdaCase, OverloadedStrings #-}
module Data.CSV.Sip ( module Data.CSV.Sip (
labelFields,
decodeRows, decodeRows,
decodeRawRows, decodeRawRows,
decodeUTF8, decodeUTF8,
@ -37,11 +38,18 @@ import Control.Monad (unless)
import Control.Monad.Trans.Class (lift) import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify) import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify)
import qualified Data.ByteString as BS import qualified Data.ByteString as BS
import qualified Data.Map as M
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8') import Data.Text.Encoding (decodeUtf8')
import Data.Word (Word8) import Data.Word (Word8)
-- | read a CSV stream, using the first row as a header containing field labels
labelFields :: (Monad m, Ord a) => ConduitT [a] (M.Map a a) m ()
labelFields = await >>= \case
Just headers -> labelLoop headers
Nothing -> return ()
-- | decode the rows from a stream of ByteStrings -- | decode the rows from a stream of ByteStrings
decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m () decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8) decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8)
@ -83,6 +91,15 @@ newDecodeState = DecodeState
, collected = "" , collected = ""
} }
-- Conduits
labelLoop :: (Monad m, Ord a) => [a] -> ConduitT [a] (M.Map a a) m ()
labelLoop headers = await >>= \case
Just values -> do
yield $ M.fromList $ zip headers values
labelLoop headers
Nothing -> return ()
-- Decoders -- Decoders
decodeLoop :: Monad m => Decoder m decodeLoop :: Monad m => Decoder m

View File

@ -25,17 +25,68 @@ module Data.CSV.SipSpec (spec) where
import Conduit (runConduit, (.|)) import Conduit (runConduit, (.|))
import Data.Char (ord) import Data.Char (ord)
import Data.Conduit.List (consume, sourceList) import Data.Conduit.List (consume, sourceList)
import qualified Data.Map as M
import Test.Hspec (Spec, context, describe, it, shouldBe) import Test.Hspec (Spec, context, describe, it, shouldBe)
import Data.CSV.Sip import Data.CSV.Sip
spec :: Spec spec :: Spec
spec = describe "Data.CSV.Sip" $ do spec = describe "Data.CSV.Sip" $ do
labelFieldsSpec
decodeRowsSpec decodeRowsSpec
decodeRawRowsSpec decodeRawRowsSpec
decodeUTF8Spec decodeUTF8Spec
toBytesSpec toBytesSpec
labelFieldsSpec :: Spec
labelFieldsSpec = describe "labelFields" $ mapM_
( \(label, input, expected) -> context label $ do
result <- runConduit $ sourceList input .| labelFields .| consume
let
expLen = length expected
resLen = length result
it ("should have " ++ show expLen ++ " rows") $
resLen `shouldBe` expLen
mapM_
( \(n, result', expected') -> context ("row " ++ show n) $
it ("should be " ++ show expected') $
result' `shouldBe` expected'
) $ zip3 [(0::Int)..] result expected
)
-- label, input, expected
[ ( "empty", [], [] )
, ( "no body", [headers], [] )
, ( "with body", withBodyIn, withBodyRes )
, ( "mixed cols", mixedColsIn, mixedColsRes )
]
where
headers = ["foo", "bar", "baz"] :: [String]
withBodyIn = headers :
[ ["a", "b", "c"]
, ["d", "e", "f"]
] :: [[String]]
mixedColsIn =
[ ["foo", "bar"]
, ["a"]
, ["b", "c"]
, ["d", "e", "f"]
] :: [[String]]
withBodyRes = map M.fromList
[ [("foo", "a"), ("bar", "b"), ("baz", "c")]
, [("foo", "d"), ("bar", "e"), ("baz", "f")]
] :: [M.Map String String]
mixedColsRes = map M.fromList
[ [("foo", "a")]
, [("foo", "b"), ("bar", "c")]
, [("foo", "d"), ("bar", "e")]
] :: [M.Map String String]
decodeRowsSpec :: Spec decodeRowsSpec :: Spec
decodeRowsSpec = describe "decodeRows" $ mapM_ decodeRowsSpec = describe "decodeRows" $ mapM_
( \(label, input, expected) -> context label $ do ( \(label, input, expected) -> context label $ do