csv-sip/src/Data/CSV/Slurp.hs

78 lines
2.2 KiB
Haskell
Raw Normal View History

2022-04-18 16:29:28 -04:00
{-|
Module : Data.CSV.Slurp
Description : works with CSV files
Copyright : (C) Jonathan Lamothe
License : GPL-3.0-or-later
Maintainer : jonathan@jlamothe.net
Stability : experimental
Portability : POSIX
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
-}
2022-04-19 16:03:14 -04:00
{-# LANGUAGE OverloadedStrings #-}
module Data.CSV.Slurp (
decodeRows,
decodeRawRows,
decodeUTF8,
2022-04-19 19:33:35 -04:00
toBytes,
2022-04-19 16:03:14 -04:00
) where
import Conduit (ConduitT, mapC, (.|))
2022-04-19 19:33:35 -04:00
import Control.Monad.Trans.State (StateT, evalStateT)
2022-04-19 16:03:14 -04:00
import qualified Data.ByteString as BS
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
2022-04-19 16:31:04 -04:00
import Data.Text.Encoding (decodeUtf8')
2022-04-19 19:33:35 -04:00
import Data.Word (Word8)
2022-04-19 16:03:14 -04:00
-- | decode the rows from a stream of ByteStrings
decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8)
-- | decode the rows returning raw ByteStrings instead of text
decodeRawRows :: Monad m => ConduitT BS.ByteString [BS.ByteString] m ()
2022-04-19 19:33:35 -04:00
decodeRawRows = toBytes .| evalStateT decodeLoop newDecodeState
2022-04-19 16:03:14 -04:00
-- | decode a raw ByteString into Text (if possible)
decodeUTF8 :: BS.ByteString -> Maybe T.Text
2022-04-19 16:31:04 -04:00
decodeUTF8 bs = case decodeUtf8' bs of
Left _ -> Nothing
Right txt -> Just txt
2022-04-18 16:29:28 -04:00
2022-04-19 19:33:35 -04:00
-- | convert a stream to ByteStrings to a string of bytes
toBytes :: Monad m => ConduitT BS.ByteString Word8 m ()
toBytes = return ()
data DecodeState = DecodeState
{ isQuoted :: Bool
, collected :: BS.ByteString
} deriving (Eq, Show)
newDecodeState :: DecodeState
newDecodeState = DecodeState
{ isQuoted = False
, collected = ""
}
decodeLoop
:: Monad m
=> StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
decodeLoop = return ()
2022-04-18 16:29:28 -04:00
--jl