Compare commits

...

10 Commits

Author SHA1 Message Date
af4066d3db updated package description/synopsis 2022-04-26 20:16:58 -04:00
3bc1be9eb9 moved decodeUTF8 into helpers section 2022-04-25 18:46:57 -04:00
d99862b3f4 updated README.md 2022-04-25 18:45:37 -04:00
22b6942900 changed types of encodeCSV and encodeRawCSV
...to be more generic, the type now allows the to take any kind of input
2022-04-24 22:14:30 -04:00
c1e9fb7b8e typo 2022-04-24 22:13:07 -04:00
82085eaaf9 note about pull requests 2022-04-24 22:11:28 -04:00
493b7dd9d4 version 0.1.0 2022-04-24 19:03:01 -04:00
723f046ea4 added homepage and bug reports information 2022-04-24 19:00:47 -04:00
bb970e4f42 removed warning about API changes 2022-04-24 18:47:53 -04:00
1b20188dfc added executive summary to README.md 2022-04-24 18:47:28 -04:00
6 changed files with 37 additions and 27 deletions

View File

@@ -1,3 +1,5 @@
# Changelog for csv-sip # Changelog for csv-sip
## Unreleased changes ## Unreleased changes
- changed the types of encodeCSV and encodeRawCSV to make them more generic
- slight re-structuring of documentation

View File

@@ -14,5 +14,8 @@ General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. along with this program. If not, see <https://www.gnu.org/licenses/>.
## Important Note ## Executive Summary
This library is not yet ready for release. As such, all code should be considered to be unstable and subject to change at any time. This library allows for reading and writing to and from CSV files in a streaming manner. Files can be read and written to on a row-by-row basis allowing larger files to be worked with, since the whole file doesn't have to be loaded to manipulate it. It is based on the [conduit](https://hackage.haskell.org/package/conduit] library.
## Pull Requests
Please make pull requests to the `dev` branch.

View File

@@ -5,10 +5,12 @@ cabal-version: 2.2
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: csv-sip name: csv-sip
version: 0.0.0 version: 0.1.0
synopsis: extracts data from a CSV file synopsis: CSV streaming library
description: extracts data from a CSV file - see README.md for more details description: CSV streaming library - see README.md for more details
category: Data category: Data
homepage: https://codeberg.org/jlamothe/csv-sip
bug-reports: https://codeberg.org/jlamothe/csv-sip/issues
author: Jonathan Lamothe author: Jonathan Lamothe
maintainer: jonathan@jlamothe.net maintainer: jonathan@jlamothe.net
copyright: (C) 2022 Jonathan Lamothe copyright: (C) 2022 Jonathan Lamothe

View File

@@ -1,5 +1,5 @@
name: csv-sip name: csv-sip
version: 0.0.0 version: 0.1.0
license: GPL-3.0-or-later license: GPL-3.0-or-later
author: "Jonathan Lamothe" author: "Jonathan Lamothe"
maintainer: "jonathan@jlamothe.net" maintainer: "jonathan@jlamothe.net"
@@ -10,13 +10,15 @@ extra-source-files:
- ChangeLog.md - ChangeLog.md
# Metadata used when publishing your package # Metadata used when publishing your package
synopsis: extracts data from a CSV file synopsis: CSV streaming library
category: Data category: Data
# To avoid duplicated efforts in documentation and dealing with the # To avoid duplicated efforts in documentation and dealing with the
# complications of embedding Haddock markup inside cabal files, it is # complications of embedding Haddock markup inside cabal files, it is
# common to point users to the README.md file. # common to point users to the README.md file.
description: extracts data from a CSV file - see README.md for more details description: CSV streaming library - see README.md for more details
homepage: https://codeberg.org/jlamothe/csv-sip
bug-reports: https://codeberg.org/jlamothe/csv-sip/issues
ghc-options: ghc-options:
- -Wall - -Wall

View File

@@ -52,8 +52,9 @@ module Data.CSV.Sip (
labelFields, labelFields,
decodeRows, decodeRows,
decodeRawRows, decodeRawRows,
decodeUTF8,
toBytes, toBytes,
-- * Helper Functions
decodeUTF8,
) where ) where
import Conduit import Conduit
@@ -153,7 +154,7 @@ encodeCSV
:: Monad m :: Monad m
=> [[T.Text]] => [[T.Text]]
-- ^ the data being encoded, organized into rows and fields -- ^ the data being encoded, organized into rows and fields
-> ConduitT () BS.ByteString m () -> ConduitT o BS.ByteString m ()
encodeCSV csv = sourceList csv .| encodeRows encodeCSV csv = sourceList csv .| encodeRows
-- | encode an entire CSV file -- | encode an entire CSV file
@@ -161,7 +162,7 @@ encodeRawCSV
:: Monad m :: Monad m
=> [[BS.ByteString]] => [[BS.ByteString]]
-- ^ the data being encoded, organized into rows and fields -- ^ the data being encoded, organized into rows and fields
-> ConduitT () BS.ByteString m () -> ConduitT o BS.ByteString m ()
encodeRawCSV csv = sourceList csv .| encodeRawRows encodeRawCSV csv = sourceList csv .| encodeRawRows
-- | Writes a stream of Text-based rows to a CSV file -- | Writes a stream of Text-based rows to a CSV file
@@ -211,13 +212,7 @@ decodeRows = decodeRawRows .| mapC (map $ fromMaybe "" . decodeUTF8)
decodeRawRows :: Monad m => ConduitT BS.ByteString [BS.ByteString] m () decodeRawRows :: Monad m => ConduitT BS.ByteString [BS.ByteString] m ()
decodeRawRows = toBytes .| evalStateT decodeLoop newDecodeState decodeRawRows = toBytes .| evalStateT decodeLoop newDecodeState
-- | decode a raw ByteString into Text (if possible) -- | convert a stream to ByteStrings to a stream of bytes
decodeUTF8 :: BS.ByteString -> Maybe T.Text
decodeUTF8 bs = case decodeUtf8' bs of
Left _ -> Nothing
Right txt -> Just txt
-- | convert a stream to ByteStrings to a string of bytes
toBytes :: Monad m => ConduitT BS.ByteString Word8 m () toBytes :: Monad m => ConduitT BS.ByteString Word8 m ()
toBytes = await >>= \case toBytes = await >>= \case
Just bs -> do Just bs -> do
@@ -226,6 +221,12 @@ toBytes = await >>= \case
toBytes toBytes
Nothing -> return () Nothing -> return ()
-- | decode a raw ByteString into Text (if possible)
decodeUTF8 :: BS.ByteString -> Maybe T.Text
decodeUTF8 bs = case decodeUtf8' bs of
Left _ -> Nothing
Right txt -> Just txt
-- Internal -- Internal
data DecodeState = DecodeState data DecodeState = DecodeState

View File

@@ -40,8 +40,8 @@ spec = describe "Data.CSV.Sip" $ do
labelFieldsSpec labelFieldsSpec
decodeRowsSpec decodeRowsSpec
decodeRawRowsSpec decodeRawRowsSpec
decodeUTF8Spec
toBytesSpec toBytesSpec
decodeUTF8Spec
encodeCSVSpec :: Spec encodeCSVSpec :: Spec
encodeCSVSpec = describe "encodeCSV" $ do encodeCSVSpec = describe "encodeCSV" $ do
@@ -327,6 +327,14 @@ decodeRawRowsSpec = describe "decodeRawRows" $ mapM_
, ["baz", "quux"] , ["baz", "quux"]
] ]
toBytesSpec :: Spec
toBytesSpec = describe "toBytes" $ let
input = ["ab", "cd"]
expected = map (fromIntegral . ord) "abcd"
in it ("should be " ++ show expected) $ do
result <- runConduit $ sourceList input .| toBytes .| consume
result `shouldBe` expected
decodeUTF8Spec :: Spec decodeUTF8Spec :: Spec
decodeUTF8Spec = describe "decodeUTF8" $ mapM_ decodeUTF8Spec = describe "decodeUTF8" $ mapM_
( \(label, input, expected) -> context label $ ( \(label, input, expected) -> context label $
@@ -341,12 +349,4 @@ decodeUTF8Spec = describe "decodeUTF8" $ mapM_
, ( "blank", "", Just "" ) , ( "blank", "", Just "" )
] ]
toBytesSpec :: Spec
toBytesSpec = describe "toBytes" $ let
input = ["ab", "cd"]
expected = map (fromIntegral . ord) "abcd"
in it ("should be " ++ show expected) $ do
result <- runConduit $ sourceList input .| toBytes .| consume
result `shouldBe` expected
--jl --jl