From cb7f54e75043054e5de6f72647b5328eda8496e5 Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Sat, 27 Jul 2024 15:26:52 +0200 Subject: [PATCH] Emit an error if row and header size mismatch --- .../src/main/scala/fs2/data/csv/package.scala | 45 +++++++++++++++++++ site/documentation/csv/index.md | 4 +- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/csv/shared/src/main/scala/fs2/data/csv/package.scala b/csv/shared/src/main/scala/fs2/data/csv/package.scala index 13c250f05..0320bd9bb 100644 --- a/csv/shared/src/main/scala/fs2/data/csv/package.scala +++ b/csv/shared/src/main/scala/fs2/data/csv/package.scala @@ -198,6 +198,10 @@ package object csv { } /** Encode a specified type into a CSV prepending the given headers. */ + @deprecated( + message = + "Emits incorrect data if rows have a different length than headers. Please use `encodeWithGivenHeaders` instead.", + since = "fs2-data 1.11.1") def encodeGivenHeaders[T]: PartiallyAppliedEncodeGivenHeaders[T] = new PartiallyAppliedEncodeGivenHeaders[T](dummy = true) @@ -217,6 +221,27 @@ package object csv { } } + /** Encode a specified type into a CSV prepending the given headers. */ + def encodeWithGivenHeaders[T]: PartiallyAppliedEncodeWithGivenHeaders[T] = + new PartiallyAppliedEncodeWithGivenHeaders[T](dummy = true) + + @nowarn + class PartiallyAppliedEncodeWithGivenHeaders[T](val dummy: Boolean) extends AnyVal { + def apply[F[_], Header](headers: NonEmptyList[Header], + fullRows: Boolean = false, + separator: Char = ',', + newline: String = "\n", + escape: EscapeMode = EscapeMode.Auto)(implicit + F: RaiseThrowable[F], + T: RowEncoder[T], + H: WriteableHeader[Header]): Pipe[F, T, String] = { + val stringPipe = + if (fullRows) lowlevel.toRowStrings[F](separator, newline, escape) + else lowlevel.toStrings[F](separator, newline, escape) + lowlevel.encode[F, T] andThen lowlevel.writeWithGivenHeaders(headers) andThen stringPipe + } + } + /** Encode a specified type into a CSV that contains the headers determined by encoding the first element. Empty if input is. */ def encodeUsingFirstHeaders[T]: PartiallyAppliedEncodeUsingFirstHeaders[T] = new PartiallyAppliedEncodeUsingFirstHeaders(dummy = true) @@ -316,10 +341,30 @@ package object csv { } /** Encode a given type into CSV rows using a set of explicitly given headers. */ + @deprecated( + message = + "Emits incorrect data if rows have a different length than headers. Please use `writeWithGivenHeaders` instead.", + since = "fs2-data 1.11.1") def writeWithHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit H: WriteableHeader[Header]): Pipe[F, Row, NonEmptyList[String]] = Stream(H(headers)) ++ _.map(_.values) + /** Encode a given type into CSV rows using a set of explicitly given headers. */ + def writeWithGivenHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit + F: RaiseThrowable[F], + H: WriteableHeader[Header]): Pipe[F, Row, NonEmptyList[String]] = + attemptWriteWithGivenHeaders(headers).apply(_).rethrow + + /** Encode a given type into CSV rows using a set of explicitly given headers, but signals errors as values. */ + def attemptWriteWithGivenHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit + H: WriteableHeader[Header]): Pipe[F, Row, Either[CsvException, NonEmptyList[String]]] = { + val headerSize = headers.size + Stream(Right(H(headers))) ++ _.map { row => + val rowSize = row.size + if (rowSize == headerSize) Right(row.values) else Left(new HeaderSizeError(headerSize, rowSize, row.line)) + } + } + /** Encode a given type into CSV rows without headers. */ def writeWithoutHeaders[F[_]]: Pipe[F, Row, NonEmptyList[String]] = _.map(_.values) diff --git a/site/documentation/csv/index.md b/site/documentation/csv/index.md index 9025e2d06..dea202583 100644 --- a/site/documentation/csv/index.md +++ b/site/documentation/csv/index.md @@ -51,7 +51,7 @@ More high-level pipes are available for the following use cases: * `decodeGivenHeaders` for CSV parsing that requires headers, but they aren't present in the input * `decodeUsingHeaders` for CSV parsing that requires headers and they're present in the input * `encodeWithoutHeaders` for CSV encoding that works entirely without headers (Note: requires `RowEncoder` instead of `CsvRowEncoder`) -* `encodeGivenHeaders` for CSV encoding that works without headers, but they should be added to the output +* `encodeWithGivenHeaders` for CSV encoding that works without headers, but they should be added to the output * `encodeUsingFirstHeaders` for CSV encoding that works with headers. Uses the headers of the first row for the output. ### Dealing with erroneous files @@ -219,7 +219,7 @@ testRows .string ``` -If you want to write headers, use `writeWithHeaders` or, in case you use `CsvRow`, `encodeRowWithFirstHeaders`. For writing non-String headers, you'll need to provide an instance of `WritableHeader`, a type class analog to `ParseableHeader`. +If you want to write headers, use `writeWithGivenHeaders` or, in case you use `CsvRow`, `encodeRowWithFirstHeaders`. For writing non-String headers, you'll need to provide an instance of `WritableHeader`, a type class analog to `ParseableHeader`. ## The type classes: Decoders and Encoders