csv-sip/src/Data/CSV/Slurp.hs

83 lines
2.3 KiB
Haskell

{-|
Module : Data.CSV.Slurp
Description : works with CSV files
Copyright : (C) Jonathan Lamothe
License : GPL-3.0-or-later
Maintainer : jonathan@jlamothe.net
Stability : experimental
Portability : POSIX
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
-}
{-# LANGUAGE LambdaCase, OverloadedStrings #-}
module Data.CSV.Slurp (
decodeRows,
decodeRawRows,
decodeUTF8,
toBytes,
) where
import Conduit (ConduitT, await, mapC, yield, (.|))
import Control.Monad.Trans.State (StateT, evalStateT)
import qualified Data.ByteString as BS
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8')
import Data.Word (Word8)
-- | decode the rows from a stream of ByteStrings
decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8)
-- | decode the rows returning raw ByteStrings instead of text
decodeRawRows :: Monad m => ConduitT BS.ByteString [BS.ByteString] m ()
decodeRawRows = toBytes .| evalStateT decodeLoop newDecodeState
-- | decode a raw ByteString into Text (if possible)
decodeUTF8 :: BS.ByteString -> Maybe T.Text
decodeUTF8 bs = case decodeUtf8' bs of
Left _ -> Nothing
Right txt -> Just txt
-- | convert a stream to ByteStrings to a string of bytes
toBytes :: Monad m => ConduitT BS.ByteString Word8 m ()
toBytes = await >>= \case
Just bs -> do
let bytes = BS.unpack bs
mapM_ yield bytes
toBytes
Nothing -> return ()
data DecodeState = DecodeState
{ isQuoted :: Bool
, collected :: BS.ByteString
} deriving (Eq, Show)
newDecodeState :: DecodeState
newDecodeState = DecodeState
{ isQuoted = False
, collected = ""
}
decodeLoop
:: Monad m
=> StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
decodeLoop = return ()
--jl