From 6b4e9d0329f08dd18c7ffc754bcedfffc1c12860 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 29 Oct 2024 13:31:54 +0100 Subject: [PATCH 01/13] expanded cumSum with Short, Byte, and BigInteger --- .../jetbrains/kotlinx/dataframe/api/cumSum.kt | 18 +++- .../kotlinx/dataframe/math/cumsum.kt | 91 +++++++++++++++++++ .../kotlinx/dataframe/statistics/cumsum.kt | 25 +++++ 3 files changed, 133 insertions(+), 1 deletion(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt index 9d8c7ff715..aec6f276ff 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -9,6 +9,7 @@ import org.jetbrains.kotlinx.dataframe.math.cumSum import org.jetbrains.kotlinx.dataframe.math.defaultCumSumSkipNA import org.jetbrains.kotlinx.dataframe.typeClass import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KProperty import kotlin.reflect.typeOf @@ -22,15 +23,30 @@ public fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSki typeOf() -> cast().cumSum(skipNA).cast() typeOf() -> cast().cumSum().cast() typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() typeOf() -> cast().cumSum().cast() typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() typeOf() -> cast().cumSum().cast() typeOf() -> cast().cumSum(skipNA).cast() typeOf(), typeOf() -> convertToDouble().cumSum(skipNA).cast() else -> error("Cumsum for type ${type()} is not supported") } -private val supportedClasses = setOf(Double::class, Float::class, Int::class, Long::class, BigDecimal::class) +private val supportedClasses = setOf( + Double::class, + Float::class, + Int::class, + Byte::class, + Short::class, + Long::class, + BigInteger::class, + BigDecimal::class, +) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt index 58ef59329a..5efb7ff97f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.isNA import org.jetbrains.kotlinx.dataframe.api.map import java.math.BigDecimal +import java.math.BigInteger internal val defaultCumSumSkipNA: Boolean = true @@ -88,6 +89,66 @@ internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): Dat } } +@JvmName("byteCumsum") +internal fun DataColumn.cumSum(): DataColumn { + var sum = 0.toByte() + return map { + sum = (sum + it).toByte() + sum + } +} + +@JvmName("cumsumByteNullable") +internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { + var sum = 0.toByte() + var fillNull = false + return map { + when { + it == null -> { + if (!skipNA) fillNull = true + null + } + + fillNull -> null + + else -> { + sum = (sum + it).toByte() + sum + } + } + } +} + +@JvmName("shortCumsum") +internal fun DataColumn.cumSum(): DataColumn { + var sum = 0.toShort() + return map { + sum = (sum + it).toShort() + sum + } +} + +@JvmName("cumsumShortNullable") +internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { + var sum = 0.toShort() + var fillNull = false + return map { + when { + it == null -> { + if (!skipNA) fillNull = true + null + } + + fillNull -> null + + else -> { + sum = (sum + it).toShort() + sum + } + } + } +} + @JvmName("longCumsum") internal fun DataColumn.cumSum(): DataColumn { var sum = 0L @@ -118,6 +179,36 @@ internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): Da } } +@JvmName("bigIntegerCumsum") +internal fun DataColumn.cumSum(): DataColumn { + var sum = BigInteger.ZERO + return map { + sum += it + sum + } +} + +@JvmName("cumsumBigIntegerNullable") +internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { + var sum = BigInteger.ZERO + var fillNull = false + return map { + when { + it == null -> { + if (!skipNA) fillNull = true + null + } + + fillNull -> null + + else -> { + sum += it + sum + } + } + } +} + @JvmName("bigDecimalCumsum") internal fun DataColumn.cumSum(): DataColumn { var sum = BigDecimal.ZERO diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt index 385023eda8..cf42af3fe8 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.cumSum import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.jetbrains.kotlinx.dataframe.api.map import org.junit.Test @Suppress("ktlint:standard:argument-list-wrapping") @@ -22,6 +23,30 @@ class CumsumTests { col.cumSum(skipNA = false).toList() shouldBe expectedNoSkip } + @Test + fun `short column`() { + col.map { it?.toShort() }.cumSum().toList() shouldBe expected.map { it?.toShort() } + col.map { it?.toShort() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toShort() } + } + + @Test + fun `byte column`() { + col.map { it?.toByte() }.cumSum().toList() shouldBe expected.map { it?.toByte() } + col.map { it?.toByte() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toByte() } + } + + @Test + fun `big int column`() { + col.map { it?.toBigInteger() }.cumSum().toList() shouldBe expected.map { it?.toBigInteger() } + col.map { it?.toBigInteger() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toBigInteger() } + } + + @Test + fun `big decimal column`() { + col.map { it?.toBigDecimal() }.cumSum().toList() shouldBe expected.map { it?.toBigDecimal() } + col.map { it?.toBigDecimal() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toBigDecimal() } + } + @Test fun frame() { val str by columnOf("a", "b", "c", "d", "e") From c97a97367956dc00381198319973f3de1cfd13ae Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 29 Oct 2024 15:18:48 +0100 Subject: [PATCH 02/13] added BigInteger to mean() --- .../org/jetbrains/kotlinx/dataframe/math/mean.kt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt index c7c6e95962..d2c924c6f7 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.math import org.jetbrains.kotlinx.dataframe.api.skipNA_default import org.jetbrains.kotlinx.dataframe.impl.renderType import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -28,6 +29,8 @@ internal fun Sequence.mean(type: KType, skipNA: Boolean = skipNA Long::class -> (this as Sequence).map { it.toDouble() }.mean(false) + BigInteger::class -> (this as Sequence).map { it.toDouble() }.mean(false) + BigDecimal::class -> (this as Sequence).map { it.toDouble() }.mean(skipNA) Number::class -> (this as Sequence).map { it.toDouble() }.mean(skipNA) @@ -132,6 +135,19 @@ public fun Iterable.mean(): Double = if (count > 0) sum / count else Double.NaN } +@JvmName("bigIntegerMean") +public fun Iterable.mean(): Double = + if (this is Collection) { + if (size > 0) sumOf { it.toDouble() } / size else Double.NaN + } else { + var count = 0 + val sum = sumOf { + count++ + it.toDouble() + } + if (count > 0) sum / count else Double.NaN + } + @JvmName("bigDecimalMean") public fun Iterable.mean(): Double = if (this is Collection) { From ba00f0f2324a814cb5f5251bfc9b53abfe1c20bc Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 7 Nov 2024 16:47:02 +0100 Subject: [PATCH 03/13] fixup! expanded cumSum with Short, Byte, and BigInteger --- .../jetbrains/kotlinx/dataframe/api/cumSum.kt | 23 +++++-- .../kotlinx/dataframe/math/cumsum.kt | 60 ------------------- 2 files changed, 17 insertions(+), 66 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt index aec6f276ff..5be3311c1a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -18,22 +18,33 @@ import kotlin.reflect.typeOf public fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn = when (type()) { typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum(skipNA).cast() - typeOf() -> cast().cumSum().cast() - typeOf() -> cast().cumSum(skipNA).cast() - typeOf() -> cast().cumSum().cast() - typeOf() -> cast().cumSum(skipNA).cast() - typeOf() -> cast().cumSum().cast() - typeOf() -> cast().cumSum(skipNA).cast() + + // careful, cast to Int can occur! TODO + typeOf(), typeOf(), typeOf() -> cast().cumSum().cast() + + // careful, cast to Int can occur! TODO + typeOf(), typeOf(), typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf(), typeOf() -> convertToDouble().cumSum(skipNA).cast() + else -> error("Cumsum for type ${type()} is not supported") } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt index 5efb7ff97f..89b9f86dcd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt @@ -89,66 +89,6 @@ internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): Dat } } -@JvmName("byteCumsum") -internal fun DataColumn.cumSum(): DataColumn { - var sum = 0.toByte() - return map { - sum = (sum + it).toByte() - sum - } -} - -@JvmName("cumsumByteNullable") -internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { - var sum = 0.toByte() - var fillNull = false - return map { - when { - it == null -> { - if (!skipNA) fillNull = true - null - } - - fillNull -> null - - else -> { - sum = (sum + it).toByte() - sum - } - } - } -} - -@JvmName("shortCumsum") -internal fun DataColumn.cumSum(): DataColumn { - var sum = 0.toShort() - return map { - sum = (sum + it).toShort() - sum - } -} - -@JvmName("cumsumShortNullable") -internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { - var sum = 0.toShort() - var fillNull = false - return map { - when { - it == null -> { - if (!skipNA) fillNull = true - null - } - - fillNull -> null - - else -> { - sum = (sum + it).toShort() - sum - } - } - } -} - @JvmName("longCumsum") internal fun DataColumn.cumSum(): DataColumn { var sum = 0L From 4d4ebdac129cc6ce0bb58c6d2f59e9f6f3f0c4fb Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 7 Nov 2024 16:51:47 +0100 Subject: [PATCH 04/13] expanded sum with float, short, byte, biginteger and some warnings for conversions --- .../jetbrains/kotlinx/dataframe/math/sum.kt | 68 ++++++++++++++++++- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt index d26c155a34..470ca76cda 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt @@ -1,6 +1,7 @@ package org.jetbrains.kotlinx.dataframe.math import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KType @PublishedApi @@ -11,10 +12,29 @@ internal fun Iterable.sumOf(type: KType, selector: (T) -> R?) } return when (type.classifier) { Double::class -> sumOf(selector as ((T) -> Double)) as R + + // careful, conversion to Double to Float occurs! TODO + Float::class -> sumOf { (selector as ((T) -> Float))(it).toDouble() }.toFloat() as R + Int::class -> sumOf(selector as ((T) -> Int)) as R + + // careful, cast to Int occurs! TODO + Short::class -> sumOf { (selector as ((T) -> Short))(it).toInt() } as R + + // careful, cast to Int occurs! TODO + Byte::class -> sumOf { (selector as ((T) -> Byte))(it).toInt() } as R + Long::class -> sumOf(selector as ((T) -> Long)) as R + BigDecimal::class -> sumOf(selector as ((T) -> BigDecimal)) as R - else -> TODO() + + BigInteger::class -> sumOf(selector as ((T) -> BigInteger)) as R + + Number::class -> sumOf { (selector as ((T) -> Number))(it).toDouble() } as R + + Nothing::class -> 0.0 as R + + else -> throw IllegalArgumentException("sumOf is not supported for $type") } } @@ -22,11 +42,23 @@ internal fun Iterable.sumOf(type: KType, selector: (T) -> R?) internal fun Iterable.sum(type: KType): T = when (type.classifier) { Double::class -> (this as Iterable).sum() as T + Float::class -> (this as Iterable).sum() as T + + // careful, cast to Int occurs! TODO Int::class, Short::class, Byte::class -> (this as Iterable).sum() as T + Long::class -> (this as Iterable).sum() as T + BigDecimal::class -> (this as Iterable).sum() as T - else -> throw IllegalArgumentException("Sum is not supported for $type") + + BigInteger::class -> (this as Iterable).sum() as T + + Number::class -> (this as Iterable).map { it.toDouble() }.sum() as T + + Nothing::class -> 0.0 as T + + else -> throw IllegalArgumentException("sum is not supported for $type") } @JvmName("sumNullableT") @@ -34,11 +66,23 @@ internal fun Iterable.sum(type: KType): T = internal fun Iterable.sum(type: KType): T = when (type.classifier) { Double::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + Float::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + + // careful, cast to Int occurs! TODO Int::class, Short::class, Byte::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + Long::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + BigDecimal::class -> (this as Iterable).asSequence().filterNotNull().sum() as T - else -> TODO() + + BigInteger::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + + Number::class -> (this as Iterable).asSequence().filterNotNull().map { it.toDouble() }.sum() as T + + Nothing::class -> 0.0 as T + + else -> throw IllegalArgumentException("sum is not supported for $type") } @PublishedApi @@ -58,3 +102,21 @@ internal fun Sequence.sum(): BigDecimal { } return sum } + +@PublishedApi +internal fun Iterable.sum(): BigInteger { + var sum: BigInteger = BigInteger.ZERO + for (element in this) { + sum += element + } + return sum +} + +@PublishedApi +internal fun Sequence.sum(): BigInteger { + var sum: BigInteger = BigInteger.ZERO + for (element in this) { + sum += element + } + return sum +} From 2ef56b424c97033e3a87068f29aac49e34008e6d Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 7 Nov 2024 16:52:35 +0100 Subject: [PATCH 05/13] expanded std with BigInteger and Number --- .../jetbrains/kotlinx/dataframe/math/std.kt | 6 ++++++ .../kotlinx/dataframe/math/stdMean.kt | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt index 119f0e2257..ab38f8c9c0 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.api.ddof_default import org.jetbrains.kotlinx.dataframe.api.skipNA_default import org.jetbrains.kotlinx.dataframe.impl.renderType import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -26,6 +27,8 @@ internal fun Iterable.std( Int::class, Short::class, Byte::class -> (this as Iterable).std(ddof) Long::class -> (this as Iterable).std(ddof) BigDecimal::class -> (this as Iterable).std(ddof) + BigInteger::class -> (this as Iterable).std(ddof) + Number::class -> (this as Iterable).map { it.toDouble() }.std(skipNA, ddof) Nothing::class -> Double.NaN else -> throw IllegalArgumentException("Unable to compute the std for type ${renderType(type)}") } @@ -47,3 +50,6 @@ public fun Iterable.std(ddof: Int = ddof_default): Double = varianceAndMea @JvmName("bigDecimalStd") public fun Iterable.std(ddof: Int = ddof_default): Double = varianceAndMean().std(ddof) + +@JvmName("bigIntegerStd") +public fun Iterable.std(ddof: Int = ddof_default): Double = varianceAndMean().std(ddof) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/stdMean.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/stdMean.kt index ee95c6d1ae..8deb8c1eff 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/stdMean.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/stdMean.kt @@ -4,6 +4,7 @@ package org.jetbrains.kotlinx.dataframe.math import org.jetbrains.kotlinx.dataframe.api.skipNA_default import java.math.BigDecimal +import java.math.BigInteger import kotlin.math.sqrt public data class BasicStats(val count: Int, val mean: Double, val variance: Double) { @@ -114,3 +115,20 @@ public fun Iterable.varianceAndMean(): BasicStats { } return BasicStats(count, mean.toDouble(), variance.toDouble()) } + +@JvmName("bigIntegerVarianceAndMean") +public fun Iterable.varianceAndMean(): BasicStats { + var count = 0 + var sum = BigInteger.ZERO + for (element in this) { + sum += element + count++ + } + val mean = sum.toDouble() / count + var variance = .0 + for (element in this) { + val diff = element.toDouble() - mean + variance += diff * diff + } + return BasicStats(count, mean, variance) +} From ac3eb9ae3d3b2644e89257d3e83638a524be03e2 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 7 Nov 2024 20:43:20 +0100 Subject: [PATCH 06/13] Fixed ColumnDescription.type type --- .../main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt index 66b51c5881..e03c4c3129 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt @@ -16,7 +16,7 @@ import kotlin.reflect.KType public interface ColumnDescription { public val name: String public val path: ColumnPath - public val type: KType + public val type: String public val count: Int public val unique: Int public val nulls: Int From 563d3676725c19f4bc631460f28d97fad18e8ce2 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 7 Nov 2024 21:40:10 +0100 Subject: [PATCH 07/13] added describe support for incomparable number types (converting them to either double or bigdecimal) and added tests --- .../kotlinx/dataframe/api/DataColumnType.kt | 4 ++ .../kotlinx/dataframe/impl/TypeUtils.kt | 18 +++++ .../kotlinx/dataframe/impl/api/describe.kt | 68 ++++++++++++++++--- .../kotlinx/dataframe/api/describe.kt | 58 ++++++++++++++++ 4 files changed, 137 insertions(+), 11 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt index 2c7887ab3b..91cd02eb41 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt @@ -11,6 +11,8 @@ import org.jetbrains.kotlinx.dataframe.impl.isNothing import org.jetbrains.kotlinx.dataframe.impl.projectTo import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.typeClass +import java.math.BigDecimal +import java.math.BigInteger import kotlin.contracts.ExperimentalContracts import kotlin.contracts.contract import kotlin.reflect.KClass @@ -44,6 +46,8 @@ public inline fun AnyCol.isType(): Boolean = type() == typeOf() public fun AnyCol.isNumber(): Boolean = isSubtypeOf() +public fun AnyCol.isBigNumber(): Boolean = isSubtypeOf() || isSubtypeOf() + public fun AnyCol.isList(): Boolean = typeClass == List::class /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt index 199568630a..54c12362c5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt @@ -11,6 +11,8 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.Infer import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.util.GUESS_VALUE_TYPE +import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KClass import kotlin.reflect.KType import kotlin.reflect.KTypeParameter @@ -29,6 +31,7 @@ import kotlin.reflect.full.superclasses import kotlin.reflect.full.withNullability import kotlin.reflect.jvm.jvmErasure import kotlin.reflect.typeOf +import kotlin.toBigDecimal as toBigDecimalKotlin internal inline fun KClass<*>.createTypeUsing() = typeOf().projectTo(this) @@ -646,3 +649,18 @@ internal fun Any.asArrayAsListOrNull(): List<*>? = is Array<*> -> asList() else -> null } + +internal fun Any.isBigNumber(): Boolean = this is BigInteger || this is BigDecimal + +internal fun Number.toBigDecimal(): BigDecimal = + when (this) { + is BigDecimal -> this + is BigInteger -> this.toBigDecimalKotlin() + is Int -> this.toBigDecimalKotlin() + is Byte -> this.toInt().toBigDecimalKotlin() + is Short -> this.toInt().toBigDecimalKotlin() + is Long -> this.toBigDecimalKotlin() + is Float -> this.toBigDecimalKotlin() + is Double -> this.toBigDecimalKotlin() + else -> BigDecimal(this.toString()) + } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt index dad18f4901..47dc3048cd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt @@ -5,6 +5,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.ColumnDescription import org.jetbrains.kotlinx.dataframe.api.add import org.jetbrains.kotlinx.dataframe.api.after +import org.jetbrains.kotlinx.dataframe.api.any import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asComparable import org.jetbrains.kotlinx.dataframe.api.asNumbers @@ -12,6 +13,7 @@ import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.isNumber +import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.maxOrNull import org.jetbrains.kotlinx.dataframe.api.mean import org.jetbrains.kotlinx.dataframe.api.medianOrNull @@ -25,7 +27,9 @@ import org.jetbrains.kotlinx.dataframe.columns.size import org.jetbrains.kotlinx.dataframe.columns.values import org.jetbrains.kotlinx.dataframe.impl.columns.addPath import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn +import org.jetbrains.kotlinx.dataframe.impl.isBigNumber import org.jetbrains.kotlinx.dataframe.impl.renderType +import org.jetbrains.kotlinx.dataframe.impl.toBigDecimal import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.kind import org.jetbrains.kotlinx.dataframe.type @@ -55,12 +59,12 @@ internal fun describeImpl(cols: List): DataFrame { } } - val all = cols.collectAll(false) + val allCols = cols.collectAll(false) - val hasNumeric = all.any { it.isNumber() } - val hasComparable = all.any { it.isComparable() } - val hasLongPaths = all.any { it.path().size > 1 } - var df = all.toDataFrame { + val hasNumericCols = allCols.any { it.isNumber() } + val hasInterComparableCols = allCols.any { it.isComparable() } + val hasLongPaths = allCols.any { it.path().size > 1 } + var df = allCols.toDataFrame { ColumnDescription::name from { it.name() } if (hasLongPaths) { ColumnDescription::path from { it.path() } @@ -74,21 +78,63 @@ internal fun describeImpl(cols: List): DataFrame { .groupBy { it }.maxByOrNull { it.value.size } ?.key } - if (hasNumeric) { + if (hasNumericCols) { ColumnDescription::mean from { if (it.isNumber()) it.asNumbers().mean() else null } ColumnDescription::std from { if (it.isNumber()) it.asNumbers().std() else null } } - if (hasComparable) { - ColumnDescription::min from inferType { if (it.isComparable()) it.asComparable().minOrNull() else null } + if (hasInterComparableCols || hasNumericCols) { + ColumnDescription::min from inferType { + when { + it.isComparable() -> + it.asComparable().minOrNull() + + // Found incomparable number types, convert all to Double or BigDecimal first + it.isNumber() -> + if (it.any { it?.isBigNumber() == true }) { + it.map { (it as Number?)?.toBigDecimal() }.minOrNull() + } else { + it.map { (it as Number?)?.toDouble() }.minOrNull() + } + + else -> null + } + } ColumnDescription::median from inferType { - if (it.isComparable()) it.asComparable().medianOrNull() else null + when { + it.isComparable() -> + it.asComparable().medianOrNull() + + // Found incomparable number types, convert all to Double or BigDecimal first + it.isNumber() -> + if (it.any { it?.isBigNumber() == true }) { + it.map { (it as Number?)?.toBigDecimal() }.medianOrNull() + } else { + it.map { (it as Number?)?.toDouble() }.medianOrNull() + } + + else -> null + } + } + ColumnDescription::max from inferType { + when { + it.isComparable() -> it.asComparable().maxOrNull() + + // Found incomparable number types, convert all to Double or BigDecimal first + it.isNumber() -> + if (it.any { it?.isBigNumber() == true }) { + it.map { (it as Number?)?.toBigDecimal() }.maxOrNull() + } else { + it.map { (it as Number?)?.toDouble() }.maxOrNull() + } + + else -> null + } } - ColumnDescription::max from inferType { if (it.isComparable()) it.asComparable().maxOrNull() else null } } } df = df.add(ColumnDescription::freq) { val top = it[ColumnDescription::top] - val data = all[index] + val data = allCols[index] data.values.count { it == top } }.move(ColumnDescription::freq).after(ColumnDescription::top) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt index a418e87445..6f6366a609 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt @@ -1,7 +1,9 @@ package org.jetbrains.kotlinx.dataframe.api import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.alsoDebug import org.junit.Test +import kotlin.reflect.typeOf class DescribeTests { @@ -11,4 +13,60 @@ class DescribeTests { val df = dataFrameOf(a).drop(1) df.describe()["min"][0] shouldBe null } + + @Test + fun `describe nullable Number column`() { + val a by columnOf( + 1, + 2.0, + 3f, + 4L, + 5.toShort(), + 6.toByte(), + 7.toBigInteger(), + 8.toBigDecimal(), + null, + ) + val df = dataFrameOf(a) + val describe = df.describe() + .alsoDebug() + .single() + with(describe) { + name shouldBe "a" + type shouldBe "Number?" + count shouldBe 9 + unique shouldBe 9 + nulls shouldBe 1 + top shouldBe 1 + freq shouldBe 1 + mean shouldBe 4.5 + std shouldBe 2.449489742783178 + min shouldBe 1.toBigDecimal() + median shouldBe 4.toBigDecimal() + max shouldBe 8.toBigDecimal() + } + } + + @Test + fun `describe with NaNs`() { + val a by columnOf(1.0, 2.0, Double.NaN, 4.0) + val df = dataFrameOf(a) + val describe = df.describe() + .alsoDebug() + .single() + with(describe) { + name shouldBe "a" + type shouldBe "Double" + count shouldBe 4 + unique shouldBe 4 + nulls shouldBe 0 + top shouldBe 1 + freq shouldBe 1 + mean.isNaN() shouldBe true + std.isNaN() shouldBe true + min shouldBe 1.0 // TODO should be NaN too? + median shouldBe 3.0 + max.isNaN shouldBe true + } + } } From 966447de6a06e2fe34d8f4fe6c4c64b0186a0b86 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 8 Nov 2024 12:49:20 +0100 Subject: [PATCH 08/13] renamed isComparable to isInterComparable to better reflect what it does. (It kept confusing me) --- .../kotlinx/dataframe/api/DataColumnType.kt | 20 ++++++++++++++++--- .../jetbrains/kotlinx/dataframe/api/max.kt | 8 ++++---- .../jetbrains/kotlinx/dataframe/api/median.kt | 10 +++++----- .../jetbrains/kotlinx/dataframe/api/min.kt | 8 ++++---- .../kotlinx/dataframe/api/typeConversions.kt | 2 +- .../dataframe/impl/aggregation/getColumns.kt | 6 +++--- .../kotlinx/dataframe/impl/api/describe.kt | 10 +++++----- .../jupyter/KotlinNotebookPluginUtils.kt | 4 ++-- .../dataframe/util/deprecationMessages.kt | 8 ++++++-- .../testSets/person/DataFrameTests.kt | 14 ++++++------- 10 files changed, 54 insertions(+), 36 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt index 91cd02eb41..9c360cbc32 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt @@ -11,6 +11,9 @@ import org.jetbrains.kotlinx.dataframe.impl.isNothing import org.jetbrains.kotlinx.dataframe.impl.projectTo import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.typeClass +import org.jetbrains.kotlinx.dataframe.util.IS_COMPARABLE +import org.jetbrains.kotlinx.dataframe.util.IS_COMPARABLE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.IS_INTER_COMPARABLE_IMPORT import java.math.BigDecimal import java.math.BigInteger import kotlin.contracts.ExperimentalContracts @@ -50,11 +53,22 @@ public fun AnyCol.isBigNumber(): Boolean = isSubtypeOf() || isSubty public fun AnyCol.isList(): Boolean = typeClass == List::class +/** @include [isInterComparable] */ +@Deprecated( + message = IS_COMPARABLE, + replaceWith = ReplaceWith(IS_COMPARABLE_REPLACE, IS_INTER_COMPARABLE_IMPORT), + level = DeprecationLevel.WARNING, +) +public fun AnyCol.isComparable(): Boolean = isInterComparable() + /** - * Returns `true` if [this] column is comparable, i.e. its type is a subtype of [Comparable] and its - * type argument is not [Nothing]. + * Returns `true` if [this] column is inter-comparable, i.e. + * its elements can be compared with each other. + * + * Technically, this means the elements' common type is a subtype of [Comparable] with + * the type argument not being [Nothing]. */ -public fun AnyCol.isComparable(): Boolean = +public fun AnyCol.isInterComparable(): Boolean = isSubtypeOf?>() && type().projectTo(Comparable::class).arguments[0].let { it != KTypeProjection.STAR && diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt index 59b34e8562..96f836bc0d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt @@ -11,7 +11,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.columns.values import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators -import org.jetbrains.kotlinx.dataframe.impl.aggregation.comparableColumns +import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfDelegated @@ -52,7 +52,7 @@ public inline fun > AnyRow.rowMaxOf(): T = rowMaxOfOrN // region DataFrame -public fun DataFrame.max(): DataRow = maxFor(comparableColumns()) +public fun DataFrame.max(): DataRow = maxFor(interComparableColumns()) public fun > DataFrame.maxFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.max.aggregateFor(this, columns) @@ -121,7 +121,7 @@ public fun > DataFrame.maxByOrNull(column: KProperty // region GroupBy -public fun Grouped.max(): DataFrame = maxFor(comparableColumns()) +public fun Grouped.max(): DataFrame = maxFor(interComparableColumns()) public fun > Grouped.maxFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.max.aggregateFor(this, columns) @@ -218,7 +218,7 @@ public fun > Pivot.maxBy(column: KProperty): Reduced // region PivotGroupBy -public fun PivotGroupBy.max(separate: Boolean = false): DataFrame = maxFor(separate, comparableColumns()) +public fun PivotGroupBy.max(separate: Boolean = false): DataFrame = maxFor(separate, interComparableColumns()) public fun > PivotGroupBy.maxFor( separate: Boolean = false, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt index fb57e33d77..ded0944f4a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt @@ -11,7 +11,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast -import org.jetbrains.kotlinx.dataframe.impl.aggregation.comparableColumns +import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf @@ -53,7 +53,7 @@ public inline fun > AnyRow.rowMedianOf(): T = // region DataFrame -public fun DataFrame.median(): DataRow = medianFor(comparableColumns()) +public fun DataFrame.median(): DataRow = medianFor(interComparableColumns()) public fun > DataFrame.medianFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.median.aggregateFor(this, columns) @@ -96,7 +96,7 @@ public inline fun > DataFrame.medianOf( // region GroupBy -public fun Grouped.median(): DataFrame = medianFor(comparableColumns()) +public fun Grouped.median(): DataFrame = medianFor(interComparableColumns()) public fun > Grouped.medianFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.median.aggregateFor(this, columns) @@ -134,7 +134,7 @@ public inline fun > Grouped.medianOf( // region Pivot -public fun Pivot.median(separate: Boolean = false): DataRow = medianFor(separate, comparableColumns()) +public fun Pivot.median(separate: Boolean = false): DataRow = medianFor(separate, interComparableColumns()) public fun > Pivot.medianFor( separate: Boolean = false, @@ -174,7 +174,7 @@ public inline fun > Pivot.medianOf( // region PivotGroupBy public fun PivotGroupBy.median(separate: Boolean = false): DataFrame = - medianFor(separate, comparableColumns()) + medianFor(separate, interComparableColumns()) public fun > PivotGroupBy.medianFor( separate: Boolean = false, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt index 2ec4be8943..42b1405372 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt @@ -11,7 +11,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.columns.values import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators -import org.jetbrains.kotlinx.dataframe.impl.aggregation.comparableColumns +import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfDelegated @@ -52,7 +52,7 @@ public inline fun > AnyRow.rowMinOf(): T = rowMinOfOrN // region DataFrame -public fun DataFrame.min(): DataRow = minFor(comparableColumns()) +public fun DataFrame.min(): DataRow = minFor(interComparableColumns()) public fun > DataFrame.minFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.min.aggregateFor(this, columns) @@ -121,7 +121,7 @@ public fun > DataFrame.minByOrNull(column: KProperty // region GroupBy -public fun Grouped.min(): DataFrame = minFor(comparableColumns()) +public fun Grouped.min(): DataFrame = minFor(interComparableColumns()) public fun > Grouped.minFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.min.aggregateFor(this, columns) @@ -219,7 +219,7 @@ public fun > Pivot.minBy(column: KProperty): Reduced // region PivotGroupBy -public fun PivotGroupBy.min(separate: Boolean = false): DataFrame = minFor(separate, comparableColumns()) +public fun PivotGroupBy.min(separate: Boolean = false): DataFrame = minFor(separate, interComparableColumns()) public fun > PivotGroupBy.minFor( separate: Boolean = false, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt index 9cd34f7ea9..adafce95cd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt @@ -83,7 +83,7 @@ public fun DataColumn.asNumbers(): ValueColumn { } public fun DataColumn.asComparable(): DataColumn> { - require(isComparable()) + require(isInterComparable()) return this as DataColumn> } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt index 0f244fec4d..60c7104e24 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt @@ -5,7 +5,7 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.aggregation.Aggregatable import org.jetbrains.kotlinx.dataframe.aggregation.NamedValue import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.isComparable +import org.jetbrains.kotlinx.dataframe.api.isInterComparable import org.jetbrains.kotlinx.dataframe.api.isNumber import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType @@ -14,8 +14,8 @@ internal inline fun Aggregatable.remainingColumns( crossinline predicate: (AnyCol) -> Boolean, ): ColumnsSelector = remainingColumnsSelector().filter { predicate(it.data) } -internal fun Aggregatable.comparableColumns() = - remainingColumns { it.isComparable() } as ColumnsSelector> +internal fun Aggregatable.interComparableColumns() = + remainingColumns { it.isInterComparable() } as ColumnsSelector> internal fun Aggregatable.numberColumns() = remainingColumns { it.isNumber() } as ColumnsSelector diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt index 47dc3048cd..9f4b1bc5f3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt @@ -11,7 +11,7 @@ import org.jetbrains.kotlinx.dataframe.api.asComparable import org.jetbrains.kotlinx.dataframe.api.asNumbers import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.isComparable +import org.jetbrains.kotlinx.dataframe.api.isInterComparable import org.jetbrains.kotlinx.dataframe.api.isNumber import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.maxOrNull @@ -62,7 +62,7 @@ internal fun describeImpl(cols: List): DataFrame { val allCols = cols.collectAll(false) val hasNumericCols = allCols.any { it.isNumber() } - val hasInterComparableCols = allCols.any { it.isComparable() } + val hasInterComparableCols = allCols.any { it.isInterComparable() } val hasLongPaths = allCols.any { it.path().size > 1 } var df = allCols.toDataFrame { ColumnDescription::name from { it.name() } @@ -85,7 +85,7 @@ internal fun describeImpl(cols: List): DataFrame { if (hasInterComparableCols || hasNumericCols) { ColumnDescription::min from inferType { when { - it.isComparable() -> + it.isInterComparable() -> it.asComparable().minOrNull() // Found incomparable number types, convert all to Double or BigDecimal first @@ -101,7 +101,7 @@ internal fun describeImpl(cols: List): DataFrame { } ColumnDescription::median from inferType { when { - it.isComparable() -> + it.isInterComparable() -> it.asComparable().medianOrNull() // Found incomparable number types, convert all to Double or BigDecimal first @@ -117,7 +117,7 @@ internal fun describeImpl(cols: List): DataFrame { } ColumnDescription::max from inferType { when { - it.isComparable() -> it.asComparable().maxOrNull() + it.isInterComparable() -> it.asComparable().maxOrNull() // Found incomparable number types, convert all to Double or BigDecimal first it.isNumber() -> diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index 6799f41ca0..382e60fedd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -28,7 +28,7 @@ import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.frames import org.jetbrains.kotlinx.dataframe.api.getColumn import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.isComparable +import org.jetbrains.kotlinx.dataframe.api.isInterComparable import org.jetbrains.kotlinx.dataframe.api.sortWith import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.api.values @@ -107,7 +107,7 @@ public object KotlinNotebookPluginUtils { private fun createComparator(sortKeys: List, isDesc: List): Comparator> { return Comparator { row1, row2 -> for ((key, desc) in sortKeys.zip(isDesc)) { - val comparisonResult = if (row1.df().getColumn(key).isComparable()) { + val comparisonResult = if (row1.df().getColumn(key).isInterComparable()) { compareComparableValues(row1, row2, key, desc) } else { compareStringValues(row1, row2, key, desc) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index b25e7fb604..349ad0f12c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -9,7 +9,7 @@ package org.jetbrains.kotlinx.dataframe.util // region WARNING in 0.15, ERROR in 0.16 -private const val MESSAGE_0_16 = "Will be removed in 0.16." +private const val MESSAGE_0_16 = "Will be ERROR in 0.16." internal const val DF_READ_NO_CSV = "This function is deprecated and should be replaced with `readCSV`. $MESSAGE_0_16" internal const val DF_READ_NO_CSV_REPLACE = @@ -44,11 +44,15 @@ internal const val PARSER_OPTIONS = "This constructor is only here for binary co internal const val PARSER_OPTIONS_COPY = "This function is only here for binary compatibility. $MESSAGE_0_16" +internal const val IS_COMPARABLE = "This function is replaced by `isInterComparable()` to better reflect its purpose. $MESSAGE_0_16" +internal const val IS_COMPARABLE_REPLACE = "isInterComparable()" +internal const val IS_INTER_COMPARABLE_IMPORT = "org.jetbrains.kotlinx.dataframe.api.isInterComparable" + // endregion // region WARNING in 0.16, ERROR in 0.17 -private const val MESSAGE_0_17 = "Will be removed in 0.17." +private const val MESSAGE_0_17 = "Will be ERROR in 0.17." // endregion diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt index 63ee6c5e03..31671c5c90 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt @@ -81,9 +81,9 @@ import org.jetbrains.kotlinx.dataframe.api.intoColumns import org.jetbrains.kotlinx.dataframe.api.intoList import org.jetbrains.kotlinx.dataframe.api.intoRows import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.isEmpty import org.jetbrains.kotlinx.dataframe.api.isFrameColumn +import org.jetbrains.kotlinx.dataframe.api.isInterComparable import org.jetbrains.kotlinx.dataframe.api.isNA import org.jetbrains.kotlinx.dataframe.api.isNumber import org.jetbrains.kotlinx.dataframe.api.keysInto @@ -2445,12 +2445,12 @@ class DataFrameTests : BaseTest() { ComparableTest(2, 2, "b", "b", "2", "2"), ).toDataFrame() - df.int.isComparable() shouldBe true - df.comparableInt.isComparable() shouldBe true - df.string.isComparable() shouldBe true - df.comparableString.isComparable() shouldBe true - df.comparableStar.isComparable() shouldBe false - df.comparableNothing.isComparable() shouldBe false + df.int.isInterComparable() shouldBe true + df.comparableInt.isInterComparable() shouldBe true + df.string.isInterComparable() shouldBe true + df.comparableString.isInterComparable() shouldBe true + df.comparableStar.isInterComparable() shouldBe false + df.comparableNothing.isInterComparable() shouldBe false } @Test From 63ee92968a65e8134895e952e93fc36ff421bb2c Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 8 Nov 2024 13:06:34 +0100 Subject: [PATCH 09/13] fixup! added describe support for incomparable number types (converting them to either double or bigdecimal) and added tests --- .../kotlinx/dataframe/impl/api/describe.kt | 109 +++++++----------- 1 file changed, 44 insertions(+), 65 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt index 9f4b1bc5f3..c2929ed08a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt @@ -1,6 +1,7 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.ColumnDescription import org.jetbrains.kotlinx.dataframe.api.add @@ -35,30 +36,6 @@ import org.jetbrains.kotlinx.dataframe.kind import org.jetbrains.kotlinx.dataframe.type internal fun describeImpl(cols: List): DataFrame { - fun List.collectAll(atAnyDepth: Boolean): List = - flatMap { col -> - when (col.kind) { - ColumnKind.Frame -> - col.asAnyFrameColumn() - .concat() - .columns() - .map { it.addPath(col.path() + it.name) } - .collectAll(true) - - ColumnKind.Group -> - if (atAnyDepth) { - col.asColumnGroup() - .columns() - .map { it.addPath(col.path() + it.name) } - .collectAll(true) - } else { - listOf(col) - } - - ColumnKind.Value -> listOf(col) - } - } - val allCols = cols.collectAll(false) val hasNumericCols = allCols.any { it.isNumber() } @@ -84,51 +61,13 @@ internal fun describeImpl(cols: List): DataFrame { } if (hasInterComparableCols || hasNumericCols) { ColumnDescription::min from inferType { - when { - it.isInterComparable() -> - it.asComparable().minOrNull() - - // Found incomparable number types, convert all to Double or BigDecimal first - it.isNumber() -> - if (it.any { it?.isBigNumber() == true }) { - it.map { (it as Number?)?.toBigDecimal() }.minOrNull() - } else { - it.map { (it as Number?)?.toDouble() }.minOrNull() - } - - else -> null - } + it.convertToInterComparableOrNull()?.minOrNull() } ColumnDescription::median from inferType { - when { - it.isInterComparable() -> - it.asComparable().medianOrNull() - - // Found incomparable number types, convert all to Double or BigDecimal first - it.isNumber() -> - if (it.any { it?.isBigNumber() == true }) { - it.map { (it as Number?)?.toBigDecimal() }.medianOrNull() - } else { - it.map { (it as Number?)?.toDouble() }.medianOrNull() - } - - else -> null - } + it.convertToInterComparableOrNull()?.medianOrNull() } ColumnDescription::max from inferType { - when { - it.isInterComparable() -> it.asComparable().maxOrNull() - - // Found incomparable number types, convert all to Double or BigDecimal first - it.isNumber() -> - if (it.any { it?.isBigNumber() == true }) { - it.map { (it as Number?)?.toBigDecimal() }.maxOrNull() - } else { - it.map { (it as Number?)?.toDouble() }.maxOrNull() - } - - else -> null - } + it.convertToInterComparableOrNull()?.maxOrNull() } } } @@ -140,3 +79,43 @@ internal fun describeImpl(cols: List): DataFrame { return df.cast() } + +private fun List.collectAll(atAnyDepth: Boolean): List = + flatMap { col -> + when (col.kind) { + ColumnKind.Frame -> + col.asAnyFrameColumn() + .concat() + .columns() + .map { it.addPath(col.path() + it.name) } + .collectAll(true) + + ColumnKind.Group -> + if (atAnyDepth) { + col.asColumnGroup() + .columns() + .map { it.addPath(col.path() + it.name) } + .collectAll(true) + } else { + listOf(col) + } + + ColumnKind.Value -> listOf(col) + } + } + +/** Converts a column to a comparable column if it is not already comparable. */ +private fun DataColumn.convertToInterComparableOrNull(): DataColumn>? = + when { + isInterComparable() -> asComparable() + + // Found incomparable number types, convert all to Double or BigDecimal first + isNumber() -> + if (any { it?.isBigNumber() == true }) { + map { (it as Number?)?.toBigDecimal() } + } else { + map { (it as Number?)?.toDouble() } + }.cast() + + else -> null + } From 05e0993624bdbe93916129cad86f118c0c68effc Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 13 Nov 2024 12:07:22 +0100 Subject: [PATCH 10/13] adding tests and comments, missing types for median, expanding cumSum --- .../jetbrains/kotlinx/dataframe/api/cumSum.kt | 18 +++-- .../kotlinx/dataframe/math/cumsum.kt | 60 +++++++++++++++++ .../jetbrains/kotlinx/dataframe/math/mean.kt | 2 + .../kotlinx/dataframe/math/median.kt | 13 +++- .../kotlinx/dataframe/math/minmax.kt | 25 ------- .../jetbrains/kotlinx/dataframe/math/sum.kt | 18 +++-- .../kotlinx/dataframe/statistics/cumsum.kt | 20 ++++-- .../kotlinx/dataframe/statistics/std.kt | 13 ++++ .../kotlinx/dataframe/statistics/sum.kt | 66 +++++++++++++++++++ 9 files changed, 197 insertions(+), 38 deletions(-) delete mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/minmax.kt create mode 100644 core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt index b6c8816984..fbbcbdec1f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -27,11 +27,21 @@ public fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSki typeOf() -> cast().cumSum(skipNA).cast() - // careful, cast to Int can occur! TODO - typeOf(), typeOf(), typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum().cast() - // careful, cast to Int can occur! TODO - typeOf(), typeOf(), typeOf() -> cast().cumSum(skipNA).cast() + // TODO cumSum for Byte returns Int but is cast back to T: Byte + typeOf() -> cast().cumSum().cast() + + // TODO cumSum for Short returns Int but is cast back to T: Short + typeOf() -> cast().cumSum().cast() + + typeOf() -> cast().cumSum(skipNA).cast() + + // TODO cumSum for Byte? returns Int? but is cast back to T: Byte? + typeOf() -> cast().cumSum(skipNA).cast() + + // TODO cumSum for Short? returns Int? but is cast back to T: Short? + typeOf() -> cast().cumSum(skipNA).cast() typeOf() -> cast().cumSum().cast() diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt index 89b9f86dcd..e1220c83a4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt @@ -89,6 +89,66 @@ internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): Dat } } +@JvmName("byteCumsum") +internal fun DataColumn.cumSum(): DataColumn { + var sum = 0 + return map { + sum += it + sum + } +} + +@JvmName("cumsumByteNullable") +internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { + var sum = 0 + var fillNull = false + return map { + when { + it == null -> { + if (!skipNA) fillNull = true + null + } + + fillNull -> null + + else -> { + sum += it + sum + } + } + } +} + +@JvmName("shortCumsum") +internal fun DataColumn.cumSum(): DataColumn { + var sum = 0 + return map { + sum += it + sum + } +} + +@JvmName("cumsumShortNullable") +internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { + var sum = 0 + var fillNull = false + return map { + when { + it == null -> { + if (!skipNA) fillNull = true + null + } + + fillNull -> null + + else -> { + sum += it + sum + } + } + } +} + @JvmName("longCumsum") internal fun DataColumn.cumSum(): DataColumn { var sum = 0L diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt index d2c924c6f7..d4deec31a1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt @@ -135,6 +135,7 @@ public fun Iterable.mean(): Double = if (count > 0) sum / count else Double.NaN } +// TODO result is Double, but should be BigDecimal, Issue #558 @JvmName("bigIntegerMean") public fun Iterable.mean(): Double = if (this is Collection) { @@ -148,6 +149,7 @@ public fun Iterable.mean(): Double = if (count > 0) sum / count else Double.NaN } +// TODO result is Double, but should be BigDecimal, Issue #558 @JvmName("bigDecimalMean") public fun Iterable.mean(): Double = if (this is Collection) { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt index 559b4027d6..7fe4df949b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt @@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.math import org.jetbrains.kotlinx.dataframe.impl.asList import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KType import kotlin.reflect.typeOf @@ -9,6 +10,8 @@ public inline fun > Iterable.medianOrNull(): T? = m public inline fun > Iterable.median(): T = medianOrNull()!! +// TODO median always returns the same type, but this can be confusing for iterables of even length +// TODO (e.g. median of [1, 2] should be 1.5, but the type is Int, so it returns 1), Issue #558 @PublishedApi internal inline fun > Iterable.median(type: KType): T? { val list = if (type.isMarkedNullable) filterNotNull() else (this as Iterable).asList() @@ -19,14 +22,22 @@ internal inline fun > Iterable.median(type: KType) return when (type.classifier) { Double::class -> ((list.quickSelect(index - 1) as Double + list.quickSelect(index) as Double) / 2.0) as T + Float::class -> ((list.quickSelect(index - 1) as Float + list.quickSelect(index) as Float) / 2.0f) as T + Int::class -> ((list.quickSelect(index - 1) as Int + list.quickSelect(index) as Int) / 2) as T + Short::class -> ((list.quickSelect(index - 1) as Short + list.quickSelect(index) as Short) / 2) as T + Long::class -> ((list.quickSelect(index - 1) as Long + list.quickSelect(index) as Long) / 2L) as T Byte::class -> ((list.quickSelect(index - 1) as Byte + list.quickSelect(index) as Byte) / 2).toByte() as T BigDecimal::class -> ( - (list.quickSelect(index - 1) as BigDecimal + list.quickSelect(index) as BigDecimal) / BigDecimal(2) + (list.quickSelect(index - 1) as BigDecimal + list.quickSelect(index) as BigDecimal) / 2.toBigDecimal() + ) as T + + BigInteger::class -> ( + (list.quickSelect(index - 1) as BigInteger + list.quickSelect(index) as BigInteger) / 2.toBigInteger() ) as T else -> list.quickSelect(index - 1) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/minmax.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/minmax.kt deleted file mode 100644 index fb0f0871a0..0000000000 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/minmax.kt +++ /dev/null @@ -1,25 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.math - -import java.math.BigDecimal -import kotlin.reflect.KType -import kotlin.reflect.jvm.jvmErasure - -internal fun Iterable.min(type: KType) = - when (type.jvmErasure) { - Double::class -> (this as Iterable).minOrNull() - Float::class -> (this as Iterable).minOrNull() - Int::class, Short::class, Byte::class -> (this as Iterable).minOrNull() - Long::class -> (this as Iterable).minOrNull() - BigDecimal::class -> (this as Iterable).minOrNull() - else -> throw IllegalArgumentException() - } - -internal fun Iterable.max(type: KType) = - when (type.jvmErasure) { - Double::class -> (this as Iterable).maxOrNull() - Float::class -> (this as Iterable).maxOrNull() - Int::class, Short::class, Byte::class -> (this as Iterable).maxOrNull() - Long::class -> (this as Iterable).maxOrNull() - BigDecimal::class -> (this as Iterable).maxOrNull() - else -> throw IllegalArgumentException() - } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt index 470ca76cda..9d0d747f14 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt @@ -45,8 +45,13 @@ internal fun Iterable.sum(type: KType): T = Float::class -> (this as Iterable).sum() as T - // careful, cast to Int occurs! TODO - Int::class, Short::class, Byte::class -> (this as Iterable).sum() as T + Int::class -> (this as Iterable).sum() as T + + // TODO result should be Int, but same type as input is returned, Issue #558 + Short::class -> (this as Iterable).sum().toShort() as T + + // TODO result should be Int, but same type as input is returned, Issue #558 + Byte::class -> (this as Iterable).sum().toByte() as T Long::class -> (this as Iterable).sum() as T @@ -69,8 +74,13 @@ internal fun Iterable.sum(type: KType): T = Float::class -> (this as Iterable).asSequence().filterNotNull().sum() as T - // careful, cast to Int occurs! TODO - Int::class, Short::class, Byte::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + Int::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + + // TODO result should be Int, but same type as input is returned, Issue #558 + Short::class -> (this as Iterable).asSequence().filterNotNull().sum().toShort() as T + + // TODO result should be Int, but same type as input is returned, Issue #558 + Byte::class -> (this as Iterable).asSequence().filterNotNull().sum().toByte() as T Long::class -> (this as Iterable).asSequence().filterNotNull().sum() as T diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt index 051296acaf..ade5ecbf66 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt @@ -25,14 +25,26 @@ class CumsumTests { @Test fun `short column`() { - col.map { it?.toShort() }.cumSum().toList() shouldBe expected.map { it?.toShort() } - col.map { it?.toShort() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toShort() } + col.map { it?.toShort() }.cumSum().toList() shouldBe expected + col.map { it?.toShort() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip + } + + @Test + fun `frame with multiple columns`() { + val col2 by columnOf(1.toShort(), 2, 3, 4, 5) + val col3 by columnOf(1.toByte(), 2, 3, 4, null) + val df = dataFrameOf(col, col2, col3) + val res = df.cumSum(skipNA = false) + + res[col].toList() shouldBe expectedNoSkip + res[col2].toList() shouldBe listOf(1, 3, 6, 10, 15) + res[col3].toList() shouldBe listOf(1, 3, 6, 10, null) } @Test fun `byte column`() { - col.map { it?.toByte() }.cumSum().toList() shouldBe expected.map { it?.toByte() } - col.map { it?.toByte() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toByte() } + col.map { it?.toByte() }.cumSum().toList() shouldBe expected + col.map { it?.toByte() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip } @Test diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt index 4d403a173c..34f9548b9f 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt @@ -28,6 +28,19 @@ class StdTests { df.std().columnTypes().single() shouldBe typeOf() } + @Test + fun `std one byte column`() { + val value by columnOf(1.toByte(), 2.toByte(), 3.toByte()) + val df = dataFrameOf(value) + val expected = 1.0 + + value.values().std(typeOf()) shouldBe expected + value.std() shouldBe expected + df[value].std() shouldBe expected + df.std { value } shouldBe expected + df.std().columnTypes().single() shouldBe typeOf() + } + @Test fun `std one double column`() { val value by columnOf(1.0, 2.0, 3.0) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt new file mode 100644 index 0000000000..5675590abf --- /dev/null +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt @@ -0,0 +1,66 @@ +package org.jetbrains.kotlinx.dataframe.statistics + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.sum +import org.jetbrains.kotlinx.dataframe.api.sumOf +import org.junit.Test + +class SumTests { + + @Test + fun `test single column`() { + val value by columnOf(1, 2, 3) + val df = dataFrameOf(value) + val expected = 6 + + value.values().sum() shouldBe expected + value.sum() shouldBe expected + df[value].sum() shouldBe expected + df.sum { value } shouldBe expected + df.sum()[value] shouldBe expected + df.sumOf { value() } shouldBe expected + } + + @Test + fun `test single short column`() { + val value by columnOf(1.toShort(), 2.toShort(), 3.toShort()) + val df = dataFrameOf(value) + val expected = 6 + + value.values().sum() shouldBe expected + value.sum() shouldBe expected + df[value].sum() shouldBe expected + df.sum { value } shouldBe expected + df.sum()[value] shouldBe expected + df.sumOf { value() } shouldBe expected + } + + @Test + fun `test multiple columns`() { + val value1 by columnOf(1, 2, 3) + val value2 by columnOf(4.0, 5.0, 6.0) + val value3: DataColumn by columnOf(7.0, 8, null) + val df = dataFrameOf(value1, value2, value3) + val expected1 = 6 + val expected2 = 15.0 + val expected3 = 15.0 + + df.sum()[value1] shouldBe expected1 + df.sum()[value2] shouldBe expected2 + df.sum()[value3] shouldBe expected3 + df.sumOf { value1() } shouldBe expected1 + df.sumOf { value2() } shouldBe expected2 + df.sumOf { value3() } shouldBe expected3 + df.sum(value1) shouldBe expected1 + df.sum(value2) shouldBe expected2 + df.sum(value3) shouldBe expected3 + df.sum { value1 } shouldBe expected1 + df.sum { value2 } shouldBe expected2 + df.sum { value3 } shouldBe expected3 + } + + +} From c8de3396d39d041bb313e8dfb94ecb7a4ad151e7 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 12 Nov 2024 17:22:52 +0100 Subject: [PATCH 11/13] bumping ktlint engine version to 1.4.1, disabled new when-entry-bracing rule as it was a bit too aggressive --- .editorconfig | 1 + build.gradle.kts | 2 +- core/api/core.api | 14 +++++++++++--- .../org/jetbrains/kotlinx/dataframe/math/mean.kt | 1 + .../jetbrains/kotlinx/dataframe/statistics/sum.kt | 2 -- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.editorconfig b/.editorconfig index cb28f789b2..be0f4587f3 100644 --- a/.editorconfig +++ b/.editorconfig @@ -32,6 +32,7 @@ ktlint_function_signature_rule_force_multiline_when_parameter_count_greater_or_e ktlint_standard_chain-method-continuation = disabled ktlint_ignore_back_ticked_identifier = true ktlint_standard_multiline-expression-wrapping = disabled +ktlint_standard_when-entry-bracing = disabled [{*/build/**/*,**/*keywords*/**,**/*.Generated.kt,**/*$Extensions.kt}] ktlint = disabled diff --git a/build.gradle.kts b/build.gradle.kts index 54209b4da9..bb0df363e4 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -153,7 +153,7 @@ allprojects { afterEvaluate { try { configure { - version = "1.3.0" + version = "1.4.1" // rules are set up through .editorconfig } } catch (_: UnknownDomainObjectException) { diff --git a/core/api/core.api b/core/api/core.api index 0d9aa53c8e..9711065442 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -2691,7 +2691,7 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/ColumnDescri public abstract fun getPath ()Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath; public abstract fun getStd ()D public abstract fun getTop ()Ljava/lang/Object; - public abstract fun getType ()Lkotlin/reflect/KType; + public abstract fun getType ()Ljava/lang/String; public abstract fun getUnique ()I } @@ -2719,7 +2719,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/ColumnDescription_Extensi public static final fun ColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; public static final fun ColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; - public static final fun ColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Lkotlin/reflect/KType; + public static final fun ColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; public static final fun ColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/DataRow;)I public static final fun NullableColumnDescription_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; @@ -2745,7 +2745,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/ColumnDescription_Extensi public static final fun NullableColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; public static final fun NullableColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; - public static final fun NullableColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Lkotlin/reflect/KType; + public static final fun NullableColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; public static final fun NullableColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Integer; } @@ -4146,9 +4146,11 @@ public final class org/jetbrains/kotlinx/dataframe/api/DataColumnArithmeticsKt { } public final class org/jetbrains/kotlinx/dataframe/api/DataColumnTypeKt { + public static final fun isBigNumber (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isColumnGroup (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isComparable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isFrameColumn (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z + public static final fun isInterComparable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isList (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isNumber (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isPrimitive (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z @@ -10771,6 +10773,7 @@ public final class org/jetbrains/kotlinx/dataframe/math/BasicStats { public final class org/jetbrains/kotlinx/dataframe/math/MeanKt { public static final fun bigDecimalMean (Ljava/lang/Iterable;)D + public static final fun bigIntegerMean (Ljava/lang/Iterable;)D public static final fun byteMean (Ljava/lang/Iterable;)D public static final fun doubleMean (Ljava/lang/Iterable;Z)D public static synthetic fun doubleMean$default (Ljava/lang/Iterable;ZILjava/lang/Object;)D @@ -10794,6 +10797,8 @@ public final class org/jetbrains/kotlinx/dataframe/math/MedianKt { public final class org/jetbrains/kotlinx/dataframe/math/StdKt { public static final fun bigDecimalStd (Ljava/lang/Iterable;I)D public static synthetic fun bigDecimalStd$default (Ljava/lang/Iterable;IILjava/lang/Object;)D + public static final fun bigIntegerStd (Ljava/lang/Iterable;I)D + public static synthetic fun bigIntegerStd$default (Ljava/lang/Iterable;IILjava/lang/Object;)D public static final fun doubleStd (Ljava/lang/Iterable;ZI)D public static synthetic fun doubleStd$default (Ljava/lang/Iterable;ZIILjava/lang/Object;)D public static final fun floatStd (Ljava/lang/Iterable;ZI)D @@ -10808,6 +10813,7 @@ public final class org/jetbrains/kotlinx/dataframe/math/StdKt { public final class org/jetbrains/kotlinx/dataframe/math/StdMeanKt { public static final fun bigDecimalVarianceAndMean (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; + public static final fun bigIntegerVarianceAndMean (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; public static final fun doubleVarianceAndMean (Ljava/lang/Iterable;Z)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; public static synthetic fun doubleVarianceAndMean$default (Ljava/lang/Iterable;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; public static final fun floatVarianceAndMean (Ljava/lang/Iterable;Z)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; @@ -10818,8 +10824,10 @@ public final class org/jetbrains/kotlinx/dataframe/math/StdMeanKt { public final class org/jetbrains/kotlinx/dataframe/math/SumKt { public static final fun sum (Ljava/lang/Iterable;)Ljava/math/BigDecimal; + public static final fun sum (Ljava/lang/Iterable;)Ljava/math/BigInteger; public static final fun sum (Ljava/lang/Iterable;Lkotlin/reflect/KType;)Ljava/lang/Number; public static final fun sum (Lkotlin/sequences/Sequence;)Ljava/math/BigDecimal; + public static final fun sum (Lkotlin/sequences/Sequence;)Ljava/math/BigInteger; public static final fun sumNullableT (Ljava/lang/Iterable;Lkotlin/reflect/KType;)Ljava/lang/Number; public static final fun sumOf (Ljava/lang/Iterable;Lkotlin/reflect/KType;Lkotlin/jvm/functions/Function1;)Ljava/lang/Number; } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt index d4deec31a1..40fe9bbce1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt @@ -11,6 +11,7 @@ import kotlin.reflect.full.withNullability internal fun Iterable.mean(type: KType, skipNA: Boolean = skipNA_default): Double = asSequence().mean(type, skipNA) +@Suppress("UNCHECKED_CAST") internal fun Sequence.mean(type: KType, skipNA: Boolean = skipNA_default): Double { if (type.isMarkedNullable) { return filterNotNull().mean(type.withNullability(false), skipNA) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt index 5675590abf..b93d8ab705 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt @@ -61,6 +61,4 @@ class SumTests { df.sum { value2 } shouldBe expected2 df.sum { value3 } shouldBe expected3 } - - } From ea4d035db1a6c152969f9c92783957d49f3fc11b Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 15 Nov 2024 12:59:32 +0100 Subject: [PATCH 12/13] fixup! adding tests and comments, missing types for median, expanding cumSum --- .../jetbrains/kotlinx/dataframe/api/cumSum.kt | 16 ++++++++-------- .../org/jetbrains/kotlinx/dataframe/math/sum.kt | 10 +++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt index fbbcbdec1f..4779d07437 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -29,19 +29,19 @@ public fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSki typeOf() -> cast().cumSum().cast() - // TODO cumSum for Byte returns Int but is cast back to T: Byte - typeOf() -> cast().cumSum().cast() + // TODO cumSum for Byte returns Int but is converted back to T: Byte, Issue #558 + typeOf() -> cast().cumSum().map { it.toByte() }.cast() - // TODO cumSum for Short returns Int but is cast back to T: Short - typeOf() -> cast().cumSum().cast() + // TODO cumSum for Short returns Int but is converted back to T: Short, Issue #558 + typeOf() -> cast().cumSum().map { it.toShort() }.cast() typeOf() -> cast().cumSum(skipNA).cast() - // TODO cumSum for Byte? returns Int? but is cast back to T: Byte? - typeOf() -> cast().cumSum(skipNA).cast() + // TODO cumSum for Byte? returns Int? but is converted back to T: Byte?, Issue #558 + typeOf() -> cast().cumSum(skipNA).map { it?.toByte() }.cast() - // TODO cumSum for Short? returns Int? but is cast back to T: Short? - typeOf() -> cast().cumSum(skipNA).cast() + // TODO cumSum for Short? returns Int? but is converted back to T: Short?, Issue #558 + typeOf() -> cast().cumSum(skipNA).map { it?.toShort() }.cast() typeOf() -> cast().cumSum().cast() diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt index 9d0d747f14..08dae78937 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt @@ -13,16 +13,16 @@ internal fun Iterable.sumOf(type: KType, selector: (T) -> R?) return when (type.classifier) { Double::class -> sumOf(selector as ((T) -> Double)) as R - // careful, conversion to Double to Float occurs! TODO + // careful, conversion to Double to Float occurs! TODO, Issue #558 Float::class -> sumOf { (selector as ((T) -> Float))(it).toDouble() }.toFloat() as R Int::class -> sumOf(selector as ((T) -> Int)) as R - // careful, cast to Int occurs! TODO - Short::class -> sumOf { (selector as ((T) -> Short))(it).toInt() } as R + // careful, conversion to Int occurs! TODO, Issue #558 + Short::class -> sumOf { (selector as ((T) -> Short))(it).toInt() }.toShort() as R - // careful, cast to Int occurs! TODO - Byte::class -> sumOf { (selector as ((T) -> Byte))(it).toInt() } as R + // careful, conversion to Int occurs! TODO, Issue #558 + Byte::class -> sumOf { (selector as ((T) -> Byte))(it).toInt() }.toByte() as R Long::class -> sumOf(selector as ((T) -> Long)) as R From b64791ff9369a93016e3843ab121b9dd2bee99aa Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 19 Nov 2024 12:20:15 +0100 Subject: [PATCH 13/13] fixed tests --- .../org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt index ade5ecbf66..73282e6125 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt @@ -37,14 +37,14 @@ class CumsumTests { val res = df.cumSum(skipNA = false) res[col].toList() shouldBe expectedNoSkip - res[col2].toList() shouldBe listOf(1, 3, 6, 10, 15) - res[col3].toList() shouldBe listOf(1, 3, 6, 10, null) + res[col2].toList() shouldBe listOf(1.toShort(), 3, 6, 10, 15) + res[col3].toList() shouldBe listOf(1.toByte(), 3, 6, 10, null) } @Test fun `byte column`() { - col.map { it?.toByte() }.cumSum().toList() shouldBe expected - col.map { it?.toByte() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip + col.map { it?.toByte() }.cumSum().toList() shouldBe expected.map { it?.toByte() } + col.map { it?.toByte() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toByte() } } @Test