Skip to content

Commit

Permalink
added nothing type to cumsum for empty/null-filled columns
Browse files Browse the repository at this point in the history
  • Loading branch information
Jolanrensen committed Nov 8, 2024
2 parents 63ee929 + 79bd076 commit b2bdb4a
Show file tree
Hide file tree
Showing 14 changed files with 317 additions and 134 deletions.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.nothingType
import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
import org.jetbrains.kotlinx.dataframe.math.cumSum
import org.jetbrains.kotlinx.dataframe.math.defaultCumSumSkipNA
import org.jetbrains.kotlinx.dataframe.typeClass
Expand Down Expand Up @@ -45,6 +47,9 @@ public fun <T : Number?> DataColumn<T>.cumSum(skipNA: Boolean = defaultCumSumSki

typeOf<Number?>(), typeOf<Number>() -> convertToDouble().cumSum(skipNA).cast()

// Cumsum for empty column or column with just null is itself
nothingType, nullableNothingType -> this

else -> error("Cumsum for type ${type()} is not supported")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.api.describeImpl
import kotlin.reflect.KProperty
import kotlin.reflect.KType

// region DataSchema
@DataSchema
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -549,12 +549,10 @@ internal fun guessValueType(
internal val KType.isNothing: Boolean
get() = classifier == Nothing::class

internal fun nothingType(nullable: Boolean): KType =
if (nullable) {
typeOf<List<Nothing?>>()
} else {
typeOf<List<Nothing>>()
}.arguments.first().type!!
internal val nothingType: KType = typeOf<List<Nothing>>().arguments.first().type!!
internal val nullableNothingType: KType = typeOf<List<Nothing?>>().arguments.first().type!!

internal fun nothingType(nullable: Boolean): KType = if (nullable) nullableNothingType else nothingType

@OptIn(ExperimentalUnsignedTypes::class)
private val primitiveArrayClasses = setOf(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
import org.jetbrains.kotlinx.dataframe.api.asDataColumn
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths
import org.jetbrains.kotlinx.dataframe.api.convert
import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
import org.jetbrains.kotlinx.dataframe.api.toColumn
import org.jetbrains.kotlinx.dataframe.api.tryParse
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.parse
import org.jetbrains.kotlinx.dataframe.api.to
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
import org.jetbrains.kotlinx.dataframe.columns.size
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
Expand Down Expand Up @@ -531,17 +531,16 @@ internal fun <T> DataColumn<String?>.parse(parser: StringParser<T>, options: Par
)
}

internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: ColumnsSelector<T, Any?>): DataFrame<T> {
val convertedCols = getColumnsWithPaths(columns).map { col ->
internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: ColumnsSelector<T, Any?>): DataFrame<T> =
convert(columns).to { col ->
when {
// when a frame column is requested to be parsed,
// parse each value/frame column at any depth inside each DataFrame in the frame column
col.isFrameColumn() ->
col.values.map {
it.parseImpl(options) {
colsAtAnyDepth { !it.isColumnGroup() }
}
}.toColumn(col.name)
col.isFrameColumn() -> col.map {
it.parseImpl(options) {
colsAtAnyDepth { !it.isColumnGroup() }
}
}

// when a column group is requested to be parsed,
// parse each column in the group
Expand All @@ -552,11 +551,8 @@ internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: Column

// Base case, parse the column if it's a `String?` column
col.isSubtypeOf<String?>() ->
col.cast<String?>().tryParse(options)
col.cast<String?>().tryParseImpl(options)

else -> col
}.let { ColumnToInsert(col.path, it) }
}
}

return emptyDataFrame<T>().insertImpl(convertedCols)
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ internal const val PARSER_OPTIONS = "This constructor is only here for binary co

internal const val PARSER_OPTIONS_COPY = "This function is only here for binary compatibility. $MESSAGE_0_16"

internal const val IS_COMPARABLE = "This function is replaced by `isInterComparable()` to better reflect its purpose. $MESSAGE_0_16"
internal const val IS_COMPARABLE =
"This function is replaced by `isInterComparable()` to better reflect its purpose. $MESSAGE_0_16"
internal const val IS_COMPARABLE_REPLACE = "isInterComparable()"
internal const val IS_INTER_COMPARABLE_IMPORT = "org.jetbrains.kotlinx.dataframe.api.isInterComparable"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package org.jetbrains.kotlinx.dataframe.api
import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.alsoDebug
import org.junit.Test
import kotlin.reflect.typeOf

class DescribeTests {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,15 @@ class CumsumTests {
@Test
fun `big int column`() {
col.map { it?.toBigInteger() }.cumSum().toList() shouldBe expected.map { it?.toBigInteger() }
col.map { it?.toBigInteger() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toBigInteger() }
col.map { it?.toBigInteger() }.cumSum(skipNA = false)
.toList() shouldBe expectedNoSkip.map { it?.toBigInteger() }
}

@Test
fun `big decimal column`() {
col.map { it?.toBigDecimal() }.cumSum().toList() shouldBe expected.map { it?.toBigDecimal() }
col.map { it?.toBigDecimal() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toBigDecimal() }
col.map { it?.toBigDecimal() }.cumSum(skipNA = false)
.toList() shouldBe expectedNoSkip.map { it?.toBigDecimal() }
}

@Test
Expand Down
Loading

0 comments on commit b2bdb4a

Please sign in to comment.