implemented labelFields
This commit is contained in:
parent
f27e190be6
commit
76cea1e051
|
@ -31,6 +31,7 @@ library
|
||||||
base >=4.7 && <5
|
base >=4.7 && <5
|
||||||
, bytestring
|
, bytestring
|
||||||
, conduit >=1.3.4.2 && <1.4
|
, conduit >=1.3.4.2 && <1.4
|
||||||
|
, containers
|
||||||
, text
|
, text
|
||||||
, transformers
|
, transformers
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
@ -49,6 +50,7 @@ test-suite csv-sip-test
|
||||||
base >=4.7 && <5
|
base >=4.7 && <5
|
||||||
, bytestring
|
, bytestring
|
||||||
, conduit >=1.3.4.2 && <1.4
|
, conduit >=1.3.4.2 && <1.4
|
||||||
|
, containers
|
||||||
, csv-sip
|
, csv-sip
|
||||||
, hspec >=2.8.5 && <2.9
|
, hspec >=2.8.5 && <2.9
|
||||||
, text
|
, text
|
||||||
|
|
|
@ -25,6 +25,7 @@ dependencies:
|
||||||
- base >= 4.7 && < 5
|
- base >= 4.7 && < 5
|
||||||
- bytestring
|
- bytestring
|
||||||
- conduit >= 1.3.4.2 && < 1.4
|
- conduit >= 1.3.4.2 && < 1.4
|
||||||
|
- containers
|
||||||
- text
|
- text
|
||||||
- transformers
|
- transformers
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
{-# LANGUAGE LambdaCase, OverloadedStrings #-}
|
{-# LANGUAGE LambdaCase, OverloadedStrings #-}
|
||||||
|
|
||||||
module Data.CSV.Sip (
|
module Data.CSV.Sip (
|
||||||
|
labelFields,
|
||||||
decodeRows,
|
decodeRows,
|
||||||
decodeRawRows,
|
decodeRawRows,
|
||||||
decodeUTF8,
|
decodeUTF8,
|
||||||
|
@ -37,11 +38,18 @@ import Control.Monad (unless)
|
||||||
import Control.Monad.Trans.Class (lift)
|
import Control.Monad.Trans.Class (lift)
|
||||||
import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify)
|
import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify)
|
||||||
import qualified Data.ByteString as BS
|
import qualified Data.ByteString as BS
|
||||||
|
import qualified Data.Map as M
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Data.Text.Encoding (decodeUtf8')
|
import Data.Text.Encoding (decodeUtf8')
|
||||||
import Data.Word (Word8)
|
import Data.Word (Word8)
|
||||||
|
|
||||||
|
-- | read a CSV stream, using the first row as a header containing field labels
|
||||||
|
labelFields :: (Monad m, Ord a) => ConduitT [a] (M.Map a a) m ()
|
||||||
|
labelFields = await >>= \case
|
||||||
|
Just headers -> labelLoop headers
|
||||||
|
Nothing -> return ()
|
||||||
|
|
||||||
-- | decode the rows from a stream of ByteStrings
|
-- | decode the rows from a stream of ByteStrings
|
||||||
decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
|
decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
|
||||||
decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8)
|
decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8)
|
||||||
|
@ -83,6 +91,15 @@ newDecodeState = DecodeState
|
||||||
, collected = ""
|
, collected = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- Conduits
|
||||||
|
|
||||||
|
labelLoop :: (Monad m, Ord a) => [a] -> ConduitT [a] (M.Map a a) m ()
|
||||||
|
labelLoop headers = await >>= \case
|
||||||
|
Just values -> do
|
||||||
|
yield $ M.fromList $ zip headers values
|
||||||
|
labelLoop headers
|
||||||
|
Nothing -> return ()
|
||||||
|
|
||||||
-- Decoders
|
-- Decoders
|
||||||
|
|
||||||
decodeLoop :: Monad m => Decoder m
|
decodeLoop :: Monad m => Decoder m
|
||||||
|
|
|
@ -25,17 +25,68 @@ module Data.CSV.SipSpec (spec) where
|
||||||
import Conduit (runConduit, (.|))
|
import Conduit (runConduit, (.|))
|
||||||
import Data.Char (ord)
|
import Data.Char (ord)
|
||||||
import Data.Conduit.List (consume, sourceList)
|
import Data.Conduit.List (consume, sourceList)
|
||||||
|
import qualified Data.Map as M
|
||||||
import Test.Hspec (Spec, context, describe, it, shouldBe)
|
import Test.Hspec (Spec, context, describe, it, shouldBe)
|
||||||
|
|
||||||
import Data.CSV.Sip
|
import Data.CSV.Sip
|
||||||
|
|
||||||
spec :: Spec
|
spec :: Spec
|
||||||
spec = describe "Data.CSV.Sip" $ do
|
spec = describe "Data.CSV.Sip" $ do
|
||||||
|
labelFieldsSpec
|
||||||
decodeRowsSpec
|
decodeRowsSpec
|
||||||
decodeRawRowsSpec
|
decodeRawRowsSpec
|
||||||
decodeUTF8Spec
|
decodeUTF8Spec
|
||||||
toBytesSpec
|
toBytesSpec
|
||||||
|
|
||||||
|
labelFieldsSpec :: Spec
|
||||||
|
labelFieldsSpec = describe "labelFields" $ mapM_
|
||||||
|
( \(label, input, expected) -> context label $ do
|
||||||
|
result <- runConduit $ sourceList input .| labelFields .| consume
|
||||||
|
let
|
||||||
|
expLen = length expected
|
||||||
|
resLen = length result
|
||||||
|
it ("should have " ++ show expLen ++ " rows") $
|
||||||
|
resLen `shouldBe` expLen
|
||||||
|
mapM_
|
||||||
|
( \(n, result', expected') -> context ("row " ++ show n) $
|
||||||
|
it ("should be " ++ show expected') $
|
||||||
|
result' `shouldBe` expected'
|
||||||
|
) $ zip3 [(0::Int)..] result expected
|
||||||
|
)
|
||||||
|
|
||||||
|
-- label, input, expected
|
||||||
|
[ ( "empty", [], [] )
|
||||||
|
, ( "no body", [headers], [] )
|
||||||
|
, ( "with body", withBodyIn, withBodyRes )
|
||||||
|
, ( "mixed cols", mixedColsIn, mixedColsRes )
|
||||||
|
]
|
||||||
|
|
||||||
|
where
|
||||||
|
headers = ["foo", "bar", "baz"] :: [String]
|
||||||
|
|
||||||
|
withBodyIn = headers :
|
||||||
|
[ ["a", "b", "c"]
|
||||||
|
, ["d", "e", "f"]
|
||||||
|
] :: [[String]]
|
||||||
|
|
||||||
|
mixedColsIn =
|
||||||
|
[ ["foo", "bar"]
|
||||||
|
, ["a"]
|
||||||
|
, ["b", "c"]
|
||||||
|
, ["d", "e", "f"]
|
||||||
|
] :: [[String]]
|
||||||
|
|
||||||
|
withBodyRes = map M.fromList
|
||||||
|
[ [("foo", "a"), ("bar", "b"), ("baz", "c")]
|
||||||
|
, [("foo", "d"), ("bar", "e"), ("baz", "f")]
|
||||||
|
] :: [M.Map String String]
|
||||||
|
|
||||||
|
mixedColsRes = map M.fromList
|
||||||
|
[ [("foo", "a")]
|
||||||
|
, [("foo", "b"), ("bar", "c")]
|
||||||
|
, [("foo", "d"), ("bar", "e")]
|
||||||
|
] :: [M.Map String String]
|
||||||
|
|
||||||
decodeRowsSpec :: Spec
|
decodeRowsSpec :: Spec
|
||||||
decodeRowsSpec = describe "decodeRows" $ mapM_
|
decodeRowsSpec = describe "decodeRows" $ mapM_
|
||||||
( \(label, input, expected) -> context label $ do
|
( \(label, input, expected) -> context label $ do
|
||||||
|
|
Loading…
Reference in New Issue
Block a user