diff --git a/src/Data/CSV/Slurp.hs b/src/Data/CSV/Slurp.hs index 8803a34..da7e3fb 100644 --- a/src/Data/CSV/Slurp.hs +++ b/src/Data/CSV/Slurp.hs @@ -35,6 +35,7 @@ import Conduit (ConduitT, mapC, (.|)) import qualified Data.ByteString as BS import Data.Maybe (fromMaybe) import qualified Data.Text as T +import Data.Text.Encoding (decodeUtf8') -- | decode the rows from a stream of ByteStrings decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m () @@ -46,6 +47,8 @@ decodeRawRows = return () -- | decode a raw ByteString into Text (if possible) decodeUTF8 :: BS.ByteString -> Maybe T.Text -decodeUTF8 = const Nothing +decodeUTF8 bs = case decodeUtf8' bs of + Left _ -> Nothing + Right txt -> Just txt --jl diff --git a/test/Data/CSV/SlurpSpec.hs b/test/Data/CSV/SlurpSpec.hs index 1fdc4fd..9b59ba5 100644 --- a/test/Data/CSV/SlurpSpec.hs +++ b/test/Data/CSV/SlurpSpec.hs @@ -24,13 +24,14 @@ module Data.CSV.SlurpSpec (spec) where import Conduit (runConduit, (.|)) import Data.Conduit.List (consume, sourceList) -import Test.Hspec (Spec, context, describe, shouldBe, xit) +import Test.Hspec (Spec, context, describe, it, shouldBe, xit) import Data.CSV.Slurp spec :: Spec -spec = describe "Data.CSV.Slurp" +spec = describe "Data.CSV.Slurp" $ do decodeRowsSpec + decodeUTF8Spec decodeRowsSpec :: Spec decodeRowsSpec = describe "decodeRows" $ mapM_ @@ -59,4 +60,18 @@ decodeRowsSpec = describe "decodeRows" $ mapM_ invalidIn = ["\"a"] validRes = [["foo", "bar"], ["baz", "quux"]] +decodeUTF8Spec :: Spec +decodeUTF8Spec = describe "decodeUTF8" $ mapM_ + ( \(label, input, expected) -> context label $ + it ("should be " ++ show expected) $ + decodeUTF8 input `shouldBe` expected + ) + + -- label, input, expected + [ ( "plain ASCII", "hello", Just "hello" ) + , ( "valid UTF8", "\xc3\xa9", Just "é" ) + , ( "invalid UTF8", "\xff", Nothing ) + , ( "blank", "", Just "" ) + ] + --jl