gnieh · satabin · Jul 28, 2024 · Jul 27, 2024 · ybasket · Jul 28, 2024
@@ -198,6 +198,10 @@ package object csv {
   }
 
   /** Encode a specified type into a CSV prepending the given headers. */
+  @deprecated(
+    message =
+      "Emits incorrect data if rows have a different length than headers. Please use `encodeWithGivenHeaders` instead.",
+    since = "fs2-data 1.11.1")
   def encodeGivenHeaders[T]: PartiallyAppliedEncodeGivenHeaders[T] =
     new PartiallyAppliedEncodeGivenHeaders[T](dummy = true)
 
@@ -217,6 +221,27 @@ package object csv {
     }
   }
 
+  /** Encode a specified type into a CSV prepending the given headers. */
+  def encodeWithGivenHeaders[T]: PartiallyAppliedEncodeWithGivenHeaders[T] =
+    new PartiallyAppliedEncodeWithGivenHeaders[T](dummy = true)
+
+  @nowarn
+  class PartiallyAppliedEncodeWithGivenHeaders[T](val dummy: Boolean) extends AnyVal {
+    def apply[F[_], Header](headers: NonEmptyList[Header],
+                            fullRows: Boolean = false,
+                            separator: Char = ',',
+                            newline: String = "\n",
+                            escape: EscapeMode = EscapeMode.Auto)(implicit
+        F: RaiseThrowable[F],
+        T: RowEncoder[T],
+        H: WriteableHeader[Header]): Pipe[F, T, String] = {
+      val stringPipe =
+        if (fullRows) lowlevel.toRowStrings[F](separator, newline, escape)
+        else lowlevel.toStrings[F](separator, newline, escape)
+      lowlevel.encode[F, T] andThen lowlevel.writeWithGivenHeaders(headers) andThen stringPipe
+    }
+  }
+
   /** Encode a specified type into a CSV that contains the headers determined by encoding the first element. Empty if input is. */
   def encodeUsingFirstHeaders[T]: PartiallyAppliedEncodeUsingFirstHeaders[T] =
     new PartiallyAppliedEncodeUsingFirstHeaders(dummy = true)
@@ -316,10 +341,30 @@ package object csv {
     }
 
     /** Encode a given type into CSV rows using a set of explicitly given headers. */
+    @deprecated(
+      message =
+        "Emits incorrect data if rows have a different length than headers. Please use `writeWithGivenHeaders` instead.",
+      since = "fs2-data 1.11.1")
     def writeWithHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit
         H: WriteableHeader[Header]): Pipe[F, Row, NonEmptyList[String]] =
       Stream(H(headers)) ++ _.map(_.values)
 
+    /** Encode a given type into CSV rows using a set of explicitly given headers. */
+    def writeWithGivenHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit
+        F: RaiseThrowable[F],
+        H: WriteableHeader[Header]): Pipe[F, Row, NonEmptyList[String]] =
+      attemptWriteWithGivenHeaders(headers).apply(_).rethrow
+
+    /** Encode a given type into CSV rows using a set of explicitly given headers, but signals errors as values. */
+    def attemptWriteWithGivenHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit
+        H: WriteableHeader[Header]): Pipe[F, Row, Either[CsvException, NonEmptyList[String]]] = {
+      val headerSize = headers.size
+      Stream(Right(H(headers))) ++ _.map { row =>
+        val rowSize = row.size
+        if (rowSize == headerSize) Right(row.values) else Left(new HeaderSizeError(headerSize, rowSize, row.line))
+      }
+    }
+
     /** Encode a given type into CSV rows without headers. */
     def writeWithoutHeaders[F[_]]: Pipe[F, Row, NonEmptyList[String]] =
       _.map(_.values)

@@ -0,0 +1,51 @@
+/*
+ * Copyright 2024 fs2-data Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package fs2.data.csv
+
+import cats.data.NonEmptyList
+import weaver.*
+
+object RowGeneratorTest extends SimpleIOSuite {
+
+  pureTest("Emit error on wrong row size (#621)") {
+    val input = List(
+      Row(NonEmptyList.of("a", "b", "c"), Some(1)),
+      Row(NonEmptyList.of("d", "e"), Some(2)),
+      Row(NonEmptyList.of("f", "g", "h", "i"), Some(3)),
+      Row(NonEmptyList.of("j", "k", "l"), Some(4))
+    )
+    val headers = NonEmptyList.of("first", "second", "third")
-    val input = List(
-      Row(NonEmptyList.of("a", "b", "c"), Some(1)),
-      Row(NonEmptyList.of("d", "e"), Some(2)),
-      Row(NonEmptyList.of("f", "g", "h", "i"), Some(3)),
-      Row(NonEmptyList.of("j", "k", "l"), Some(4))
-    )
-    val headers = NonEmptyList.of("first", "second", "third")
+    val headers = NonEmptyList.of("first", "second", "third")
+    val input = List(
+      Row(NonEmptyList.of("a", "b", "c"), Some(1)),
+      Row(NonEmptyList.of("d", "e"), Some(2)),
+      Row(NonEmptyList.of("f", "g", "h", "i"), Some(3)),
+      Row(NonEmptyList.of("j", "k", "l"), Some(4))
+    )
-    val input = List(
-      Row(NonEmptyList.of("a", "b", "c"), Some(1)),
-      Row(NonEmptyList.of("d", "e"), Some(2)),
-      Row(NonEmptyList.of("f", "g", "h", "i"), Some(3)),
-      Row(NonEmptyList.of("j", "k", "l"), Some(4))
-    )
-    val headers = NonEmptyList.of("first", "second", "third")
+    val headers = NonEmptyList.of("first", "second", "third")
+    val input = List(
+      Row(NonEmptyList.of("a", "b", "c"), Some(1)),
+      Row(NonEmptyList.of("d", "e"), Some(2)),
+      Row(NonEmptyList.of("f", "g", "h", "i"), Some(3)),
+      Row(NonEmptyList.of("j", "k", "l"), Some(4))
+    )
+
+    val result = fs2.Stream.emits(input).through(lowlevel.attemptWriteWithGivenHeaders(headers)).compile.toList
+
+    matches(result) {
+      case List(
+            Right(NonEmptyList("first", "second" :: "third" :: Nil)),
+            Right(NonEmptyList("a", "b" :: "c" :: Nil)),
+            Left(e1: HeaderSizeError),
+            Left(e2: HeaderSizeError),
+            Right(NonEmptyList("j", "k" :: "l" :: Nil))
+          ) =>
+        expect.all(e1.expectedColumns == 3,
+                   e1.actualColumns == 2,
+                   e1.line == Some(2L),
+                   e2.expectedColumns == 3,
+                   e2.actualColumns == 4,
+                   e2.line == Some(3L))
+    }
+  }
+}
@@ -51,7 +51,7 @@ More high-level pipes are available for the following use cases:
 * `decodeGivenHeaders` for CSV parsing that requires headers, but they aren't present in the input
 * `decodeUsingHeaders` for CSV parsing that requires headers and they're present in the input
 * `encodeWithoutHeaders` for CSV encoding that works entirely without headers (Note: requires `RowEncoder` instead of `CsvRowEncoder`)
-* `encodeGivenHeaders` for CSV encoding that works without headers, but they should be added to the output
+* `encodeWithGivenHeaders` for CSV encoding that works without headers, but they should be added to the output
 * `encodeUsingFirstHeaders` for CSV encoding that works with headers. Uses the headers of the first row for the output.
 
 ### Dealing with erroneous files
@@ -219,7 +219,7 @@ testRows
   .string
 ```
 
-If you want to write headers, use `writeWithHeaders` or, in case you use `CsvRow`, `encodeRowWithFirstHeaders`. For writing non-String headers, you'll need to provide an instance of `WritableHeader`, a type class analog to `ParseableHeader`.
+If you want to write headers, use `writeWithGivenHeaders` or, in case you use `CsvRow`, `encodeRowWithFirstHeaders`. For writing non-String headers, you'll need to provide an instance of `WritableHeader`, a type class analog to `ParseableHeader`.
 
 ## The type classes: Decoders and Encoders