5 changed files with 4 additions and 389 deletions
--- a/csv-slurp.cabal
+++ b/csv-slurp.cabal
@ -29,10 +29,6 @@ library
  ghc-options: -Wall
  build-depends:
      base >=4.7 && <5
    , bytestring
    , conduit >=1.3.4.2 && <1.4
    , text
    , transformers
  default-language: Haskell2010
  autogen-modules: Paths_csv_slurp
@ -47,11 +43,7 @@ test-suite csv-slurp-test
  ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
  build-depends:
      base >=4.7 && <5
    , bytestring
    , conduit >=1.3.4.2 && <1.4
    , csv-slurp
    , hspec >=2.8.5 && <2.9
    , text
    , transformers
  default-language: Haskell2010
  autogen-modules: Paths_csv_slurp
--- a/package.yaml
+++ b/package.yaml
@ -23,10 +23,6 @@ ghc-options:
 dependencies:
 - base >= 4.7 && < 5
 - bytestring
 - conduit >= 1.3.4.2 && < 1.4
 - text
 - transformers
 library:
  source-dirs: src
--- a/src/Data/CSV/Slurp.hs
+++ b/src/Data/CSV/Slurp.hs
@ -23,183 +23,6 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
 -}
-{-# LANGUAGE LambdaCase, OverloadedStrings #-}
+module Data.CSV.Slurp where
 module Data.CSV.Slurp (
  decodeRows,
  decodeRawRows,
  decodeUTF8,
  toBytes,
 ) where
 import Conduit (ConduitT, await, mapC, yield, (.|))
 import Control.Monad (unless)
 import Control.Monad.Trans.Class (lift)
 import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify)
 import qualified Data.ByteString as BS
 import Data.Maybe (fromMaybe)
 import qualified Data.Text as T
 import Data.Text.Encoding (decodeUtf8')
 import Data.Word (Word8)
 -- | decode the rows from a stream of ByteStrings
 decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
 decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8)
 -- | decode the rows returning raw ByteStrings instead of text
 decodeRawRows :: Monad m => ConduitT BS.ByteString [BS.ByteString] m ()
 decodeRawRows = toBytes .| evalStateT decodeLoop newDecodeState
 -- | decode a raw ByteString into Text (if possible)
 decodeUTF8 :: BS.ByteString -> Maybe T.Text
 decodeUTF8 bs = case decodeUtf8' bs of
  Left _    -> Nothing
  Right txt -> Just txt
 -- | convert a stream to ByteStrings to a string of bytes
 toBytes :: Monad m => ConduitT BS.ByteString Word8 m ()
 toBytes = await >>= \case
  Just bs -> do
    let bytes = BS.unpack bs
    mapM_ yield bytes
    toBytes
  Nothing -> return ()
 -- Internal
 data DecodeState = DecodeState
  { isQuoted  :: Bool
  , fields    :: [BS.ByteString]
  , collected :: BS.ByteString
  } deriving (Eq, Show)
 type Decoder m = StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
 type Modifier = DecodeState -> DecodeState
 newDecodeState :: DecodeState
 newDecodeState = DecodeState
  { isQuoted  = False
  , fields    = []
  , collected = ""
  }
 -- Decoders
 decodeLoop :: Monad m => Decoder m
 decodeLoop = lift await >>= \case
  Just byte -> case byte of
    0x22 -> processQuote
    0x2c -> processComma
    0x0d -> processCR
    0x0a -> processLF
    _    -> performAction $ addByte byte
  Nothing -> cleanup
 processQuote :: Monad m => Decoder m
 processQuote = gets isQuoted >>= \case
  True  -> processQuotedQuote
  False -> processUnquotedQuote
 processComma :: Monad m => Decoder m
 processComma = gets isQuoted >>= performAction . \case
  True  -> addByte 0x2c
  False -> commitField
 processCR :: Monad m => Decoder m
 processCR = gets isQuoted >>= \case
  True  -> performAction $ addByte 0xd
  False -> endRow
 processLF :: Monad m => Decoder m
 processLF = gets isQuoted >>= \case
  True  -> performAction $ addByte 0xa
  False -> endRow
 processQuotedQuote :: Monad m => Decoder m
 processQuotedQuote = lift await >>= \case
  Just byte -> case byte of
    0x22 -> performAction $ addByte 0x22 -- quote
    0x2c -> performAction commitField    -- comma
    0x0d -> commitRow                    -- carriage return
    0x0a -> commitRow                    -- line feed
    _    -> corruptedField
  Nothing -> cleanup
 processUnquotedQuote :: Monad m => Decoder m
 processUnquotedQuote = gets (BS.null . collected) >>= \case
  True  -> performAction setQuoted
  False -> corruptedField
 endRow :: Monad m => Decoder m
 endRow = do
  s <- get
  if null (fields s) && BS.null (collected s)
    then decodeLoop
    else commitRow
 commitRow :: Monad m => Decoder m
 commitRow = do
  modify commitField
  gets fields >>= lift . yield
  performAction dropFields
 corruptedField :: Monad m => Decoder m
 corruptedField = do
  modify dropField
  ignoreField
 ignoreField :: Monad m => Decoder m
 ignoreField = lift await >>= \case
  Just byte -> case byte of
    0x2c -> performAction commitField -- comma
    0x0d -> commitRow
    _    -> ignoreField
  Nothing -> cleanup
 cleanup :: Monad m => Decoder m
 cleanup = do
  gets isQuoted >>= \case
    True  -> modify $ commitField . dropField
    False -> gets (BS.null . collected) >>= \case
      True  -> return ()
      False -> modify commitField
  fs <- gets fields
  unless (null fs) $
    lift $ yield fs
 performAction :: Monad m => Modifier -> Decoder m
 performAction f = do
  modify f
  decodeLoop
 -- Modifiers
 addByte :: Word8 -> Modifier
 addByte b s = let
  collected' = BS.snoc (collected s) b
  in s { collected = collected' }
 commitField :: Modifier
 commitField s = let
  isQuoted'  = False
  fields'    = fields s ++ [collected s]
  collected' = ""
  in s
  { isQuoted  = isQuoted'
  , fields    = fields'
  , collected = collected'
  }
 dropFields :: Modifier
 dropFields s = s { fields = [] }
 dropField :: Modifier
 dropField s = s
  { isQuoted  = False
  , collected = ""
  }
 setQuoted :: Modifier
 setQuoted s = s { isQuoted = True }
 --jl
--- a/test/Data/CSV/SlurpSpec.hs
+++ b/test/Data/CSV/SlurpSpec.hs
@ -18,207 +18,11 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
 -}
 {-# LANGUAGE OverloadedStrings #-}
 module Data.CSV.SlurpSpec (spec) where
-import Conduit (runConduit, (.|))
+import Test.Hspec (Spec, describe)
 import Data.Char (ord)
 import Data.Conduit.List (consume, sourceList)
 import Test.Hspec (Spec, context, describe, it, shouldBe)
 import Data.CSV.Slurp
 spec :: Spec
-spec = describe "Data.CSV.Slurp" $ do
+spec = describe "Data.CSV.Slurp" $ return ()
  decodeRowsSpec
  decodeRawRowsSpec
  decodeUTF8Spec
  toBytesSpec
 decodeRowsSpec :: Spec
 decodeRowsSpec = describe "decodeRows" $ mapM_
  ( \(label, input, expected) -> context label $ do
    result <- runConduit $ sourceList input .| decodeRows .| consume
    let
      expLen = length expected
      resLen = length result
    it ("should have " ++ show expLen ++ " rows") $
      resLen `shouldBe` expLen
    mapM_
      ( \(n, expected', result') -> context ("row " ++ show n) $
        it ("should be " ++ show expected') $
          result' `shouldBe` expected'
      ) $ zip3 [(0::Int)..] expected result
  )
  --  label,     input,     expected
  [ ( "valid",   validIn,   validRes   )
  , ( "invalid", invalidIn, invalidRes )
  , ( "empty",   [],        []         )
  ]
  where
    validIn    = ["foo,bar\r\n", "baz,quux\r\n"]
    invalidIn  = ["\"a"]
    validRes   = [["foo", "bar"], ["baz", "quux"]]
    invalidRes = [[""]]
 decodeRawRowsSpec :: Spec
 decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
  ( \(label, input, expected) -> context label $ do
    result <- runConduit $ sourceList input .| decodeRawRows .| consume
    let
      expLen = length expected
      resLen = length result
    it ("should have " ++ show expLen ++ " rows") $
      resLen `shouldBe` expLen
    mapM_
      ( \(n, expected', result') -> context ("row " ++ show n) $
        it ("should be " ++ show result') $
          result' `shouldBe` expected'
      ) $ zip3 [(0::Int)..] expected result
  )
  --  label,         input,      expected
  [ ( "unquoted",    unquotedIn,  normalRes    )
  , ( "quoted",      quotedIn,    normalRes    )
  , ( "mixed",       mixedIn,     normalRes    )
  , ( "CR only",     crOnlyIn,    normalRes    )
  , ( "LF only",     lfOnlyIn,    normalRes    )
  , ( "has quote",   quoteIn,     quoteRes     )
  , ( "has CR",      crIn,        crRes        )
  , ( "has LF",      lfIn,        lfRes        )
  , ( "has CRLF",    crlfIn,      crlfRes      )
  , ( "odd chunk",   oddChunkIn,  normalRes    )
  , ( "no newline",  noNewlineIn, normalRes    )
  , ( "malformed",   malformedIn, malformedRes )
  , ( "blank end",   blankEndIn,  blankEndRes  )
  ]
  where
    unquotedIn =
      [ "foo,bar\r\n"
      , "baz,quux\r\n"
      ]
    quotedIn =
      [ "\"foo\",\"bar\"\r\n"
      ,  "\"baz\",\"quux\"\r\n"
      ]
    mixedIn =
      [ "\"foo\",bar\r\n"
      , "baz,\"quux\"\r\n"
      ]
    crOnlyIn =
      [ "foo,bar\r"
      , "baz,quux\r"
      ]
    lfOnlyIn =
      [ "foo,bar\n"
      , "baz,quux\n"
      ]
    quoteIn =
      [ "\"a\"\"b\",bar\r\n"
      , "baz,quux\r\n"
      ]
    crIn =
      [ "\"a\rb\",bar\r\n"
      , "baz,quux\r\n"
      ]
    lfIn =
      [ "\"a\nb\",bar\r\n"
      , "baz,quux\r\n"
      ]
    crlfIn =
      [ "\"a\r\nb\",bar\r\n"
      , "baz,quux\r\n"
      ]
    oddChunkIn =
      [ "foo,"
      , "bar\r\nbaz,"
      , "quux\r\n"
      ]
    noNewlineIn =
      [ "foo,bar\r\n"
      , "baz,quux"
      ]
    malformedIn =
      [ "a\"b,bar\r\n"
      , "baz,quux\r\n"
      ]
    blankEndIn =
      [ "foo,bar,\r\n"
      , "baz,quux\r\n"
      ]
    normalRes =
      [ ["foo", "bar"]
      , ["baz", "quux"]
      ]
    quoteRes =
      [ ["a\"b", "bar"]
      , ["baz", "quux"]
      ]
    crRes =
      [ ["a\rb", "bar"]
      , ["baz", "quux"]
      ]
    lfRes =
      [ ["a\nb", "bar"]
      , ["baz", "quux"]
      ]
    crlfRes =
      [ ["a\r\nb", "bar"]
      , ["baz", "quux"]
      ]
    malformedRes =
      [ ["", "bar"]
      , ["baz", "quux"]
      ]
    blankEndRes =
      [ ["foo", "bar", ""]
      , ["baz", "quux"]
      ]
 decodeUTF8Spec :: Spec
 decodeUTF8Spec = describe "decodeUTF8" $ mapM_
  ( \(label, input, expected) -> context label $
    it ("should be " ++ show expected) $
      decodeUTF8 input `shouldBe` expected
  )
  --  label,          input,      expected
  [ ( "plain ASCII",  "hello",    Just "hello" )
  , ( "valid UTF8",   "\xc3\xa9", Just "é"     )
  , ( "invalid UTF8", "\xff",     Nothing      )
  , ( "blank",        "",         Just ""      )
  ]
 toBytesSpec :: Spec
 toBytesSpec = describe "toBytes" $ let
  input    = ["ab", "cd"]
  expected = map (fromIntegral . ord) "abcd"
  in it ("should be " ++ show expected) $ do
    result <- runConduit $ sourceList input .| toBytes .| consume
    result `shouldBe` expected
 --jl
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -25,6 +25,6 @@ import Test.Hspec (hspec)
 import qualified Data.CSV.SlurpSpec as Slurp
 main :: IO ()
-main = hspec Slurp.spec
+main = hspec $ Slurp.spec
 --jl