implemented labelFields

This commit is contained in:
Jonathan Lamothe 2022-04-21 15:39:07 -04:00
parent f27e190be6
commit 76cea1e051
4 changed files with 71 additions and 0 deletions

View File

@ -31,6 +31,7 @@ library
base >=4.7 && <5
, bytestring
, conduit >=1.3.4.2 && <1.4
, containers
, text
, transformers
default-language: Haskell2010
@ -49,6 +50,7 @@ test-suite csv-sip-test
base >=4.7 && <5
, bytestring
, conduit >=1.3.4.2 && <1.4
, containers
, csv-sip
, hspec >=2.8.5 && <2.9
, text

View File

@ -25,6 +25,7 @@ dependencies:
- base >= 4.7 && < 5
- bytestring
- conduit >= 1.3.4.2 && < 1.4
- containers
- text
- transformers

View File

@ -26,6 +26,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
{-# LANGUAGE LambdaCase, OverloadedStrings #-}
module Data.CSV.Sip (
labelFields,
decodeRows,
decodeRawRows,
decodeUTF8,
@ -37,11 +38,18 @@ import Control.Monad (unless)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify)
import qualified Data.ByteString as BS
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8')
import Data.Word (Word8)
-- | read a CSV stream, using the first row as a header containing field labels
labelFields :: (Monad m, Ord a) => ConduitT [a] (M.Map a a) m ()
labelFields = await >>= \case
Just headers -> labelLoop headers
Nothing -> return ()
-- | decode the rows from a stream of ByteStrings
decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8)
@ -83,6 +91,15 @@ newDecodeState = DecodeState
, collected = ""
}
-- Conduits
labelLoop :: (Monad m, Ord a) => [a] -> ConduitT [a] (M.Map a a) m ()
labelLoop headers = await >>= \case
Just values -> do
yield $ M.fromList $ zip headers values
labelLoop headers
Nothing -> return ()
-- Decoders
decodeLoop :: Monad m => Decoder m

View File

@ -25,17 +25,68 @@ module Data.CSV.SipSpec (spec) where
import Conduit (runConduit, (.|))
import Data.Char (ord)
import Data.Conduit.List (consume, sourceList)
import qualified Data.Map as M
import Test.Hspec (Spec, context, describe, it, shouldBe)
import Data.CSV.Sip
spec :: Spec
spec = describe "Data.CSV.Sip" $ do
labelFieldsSpec
decodeRowsSpec
decodeRawRowsSpec
decodeUTF8Spec
toBytesSpec
labelFieldsSpec :: Spec
labelFieldsSpec = describe "labelFields" $ mapM_
( \(label, input, expected) -> context label $ do
result <- runConduit $ sourceList input .| labelFields .| consume
let
expLen = length expected
resLen = length result
it ("should have " ++ show expLen ++ " rows") $
resLen `shouldBe` expLen
mapM_
( \(n, result', expected') -> context ("row " ++ show n) $
it ("should be " ++ show expected') $
result' `shouldBe` expected'
) $ zip3 [(0::Int)..] result expected
)
-- label, input, expected
[ ( "empty", [], [] )
, ( "no body", [headers], [] )
, ( "with body", withBodyIn, withBodyRes )
, ( "mixed cols", mixedColsIn, mixedColsRes )
]
where
headers = ["foo", "bar", "baz"] :: [String]
withBodyIn = headers :
[ ["a", "b", "c"]
, ["d", "e", "f"]
] :: [[String]]
mixedColsIn =
[ ["foo", "bar"]
, ["a"]
, ["b", "c"]
, ["d", "e", "f"]
] :: [[String]]
withBodyRes = map M.fromList
[ [("foo", "a"), ("bar", "b"), ("baz", "c")]
, [("foo", "d"), ("bar", "e"), ("baz", "f")]
] :: [M.Map String String]
mixedColsRes = map M.fromList
[ [("foo", "a")]
, [("foo", "b"), ("bar", "c")]
, [("foo", "d"), ("bar", "e")]
] :: [M.Map String String]
decodeRowsSpec :: Spec
decodeRowsSpec = describe "decodeRows" $ mapM_
( \(label, input, expected) -> context label $ do