Skip to content

filter kdocs #1288

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,16 @@ public typealias ColumnsSelector<T, C> = Selector<ColumnsSelectionDsl<T>, Column
// region filters

/**
* ## Row Filter
* A lambda expression that evaluates a row of the [DataFrame]
* and returns a [Boolean] indicating whether the row should be included in the result.
*
* [RowFilter] is a lambda function expecting a [Boolean] result given an instance of [DataRow]`<T>` as context
* (`this` and `it`).
* The lambda has access to the [`DataRow<T>`][DataRow] both as `this` and as `it`,
* enabling concise and readable conditions.
*
* Return `true` if the row should be included in the result.
* Commonly used in operations such as [filter][org.jetbrains.kotlinx.dataframe.api.filter],
* [drop][org.jetbrains.kotlinx.dataframe.api.drop], and others.
*
* Shorthand for:
* Equivalent to:
* ```kotlin
* DataRow<T>.(it: DataRow<T>) -> Boolean
* ```
Expand Down
42 changes: 42 additions & 0 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,28 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
import org.jetbrains.kotlinx.dataframe.columns.asColumnSet
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
import org.jetbrains.kotlinx.dataframe.documentation.Indent
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet
import org.jetbrains.kotlinx.dataframe.impl.getTrueIndices
import org.jetbrains.kotlinx.dataframe.indices
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import org.jetbrains.kotlinx.dataframe.util.FILTER_BY
import org.jetbrains.kotlinx.dataframe.util.FILTER_BY_REPLACE
import kotlin.reflect.KProperty

// region DataColumn

/**
* Returns a new [DataColumn] containing only the elements that match the given [predicate].
*
* @param predicate the condition used to filter the elements in the DataColumn.
* @return a new DataColumn containing elements that satisfy the predicate.
*/
public inline fun <T> DataColumn<T>.filter(predicate: Predicate<T>): DataColumn<T> =
indices
.filter { predicate(get(it)) }
Expand All @@ -34,21 +45,52 @@ public inline fun <T> DataColumn<T>.filter(predicate: Predicate<T>): DataColumn<

// region DataFrame

/**
* Filters the rows of this [DataFrame] based on the provided [RowFilter].
* Returns a new [DataFrame] containing only the rows that satisfy the given [predicate].
*
* A [RowFilter] provides each row as a lambda argument, allowing you to define filtering logic
* using a [Boolean] condition.
*
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
*
* For more information, see: {@include [DocumentationUrls.Filter]}
*
* See also:
* - [drop][DataFrame.drop], which drops rows based on values within the row.
* - [distinct][DataFrame.distinct], which filters out rows with duplicated values.
*
* ### Example
* ```kotlin
* // Select rows where the value in the "age" column is greater than 18
* // and the "name/firstName" column starts with 'A'
* df.filter { age > 18 && name.firstName.startsWith("A") }
* ```
*
* @param predicate A lambda that takes a row (twice for compatibility) and returns `true`
* if the row should be included in the result.
* @return A new [DataFrame] containing only the rows that satisfy the predicate.
*/
public inline fun <T> DataFrame<T>.filter(predicate: RowFilter<T>): DataFrame<T> =
indices().filter {
val row = get(it)
predicate(row, row)
}.let { get(it) }

@Deprecated(message = FILTER_BY, replaceWith = ReplaceWith(FILTER_BY_REPLACE), level = DeprecationLevel.ERROR)
public fun <T> DataFrame<T>.filterBy(column: ColumnSelector<T, Boolean>): DataFrame<T> =
getRows(getColumn(column).toList().getTrueIndices())

@Suppress("DEPRECATION_ERROR")
@Deprecated(message = FILTER_BY, replaceWith = ReplaceWith(FILTER_BY_REPLACE), level = DeprecationLevel.ERROR)
public fun <T> DataFrame<T>.filterBy(column: String): DataFrame<T> = filterBy { column.toColumnOf() }

@Suppress("DEPRECATION_ERROR")
@Deprecated(DEPRECATED_ACCESS_API)
@AccessApiOverload
public fun <T> DataFrame<T>.filterBy(column: ColumnReference<Boolean>): DataFrame<T> = filterBy { column }

@Suppress("DEPRECATION_ERROR")
@Deprecated(DEPRECATED_ACCESS_API)
@AccessApiOverload
public fun <T> DataFrame<T>.filterBy(column: KProperty<Boolean>): DataFrame<T> = filterBy { column.toColumnAccessor() }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ public fun <T, C, K, R> Gather<T, C, K, R>.where(filter: RowValueFilter<T, C>):
* Filters out `null` values from the columns previously selected by [gather],
* keeping only non-null entries.
*
* A special case of [where].
* A special case of [Gather.where].
*
* It's an intermediate step; returns a new [Gather] with filtered value columns.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,7 @@ internal interface DocumentationUrls {

/** [See `gather` on the documentation website.]({@include [Url]}/gather.html) */
interface Gather

/** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */
interface Filter
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ internal const val CONVERT_TO_URL_REPLACE = "convertToUrl()"
internal const val TO_URL = "This function is replaced by `toUrl()`. $MESSAGE_1_0"
internal const val TO_URL_REPLACE = "toUrl()"

internal const val FILTER_BY = "This function is deprecated in favor of `filter { }`. $MESSAGE_1_0"
internal const val FILTER_BY_REPLACE = "filter { column }"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure whether the ReplaceWith can fully handle this transition... in filterBy { column }, column is a ColumnAccessor<T>, in filter { column }, it's a value T. But let's keep it like this, the user can probably figure it out.


// endregion

// region WARNING in 1.0, ERROR in 1.1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import org.jetbrains.kotlinx.dataframe.api.dropNulls
import org.jetbrains.kotlinx.dataframe.api.dropWhile
import org.jetbrains.kotlinx.dataframe.api.fillNaNs
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.filterBy
import org.jetbrains.kotlinx.dataframe.api.first
import org.jetbrains.kotlinx.dataframe.api.forEach
import org.jetbrains.kotlinx.dataframe.api.gather
Expand Down Expand Up @@ -324,22 +323,6 @@ class Access : TestBase() {
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun filterBy_properties() {
// SampleStart
df.filterBy { isHappy }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun filterBy_strings() {
// SampleStart
df.filterBy("isHappy")
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun dropWhere_properties() {
Expand Down
25 changes: 0 additions & 25 deletions docs/StardustDocs/topics/filter.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,3 @@ df.filter { "age"<Int>() > 18 && "name"["firstName"]<String>().startsWith("A") }
</tab></tabs>
<inline-frame src="resources/org.jetbrains.kotlinx.dataframe.samples.api.Access.filter.html" width="100%"/>
<!---END-->

## filterBy

Returns [`DataFrame`](DataFrame.md) with rows that have value `true` in the given column of type `Boolean`.

See [column selectors](ColumnSelectors.md) for how to select the column for this operation.

<!---FUN filterBy-->
<tabs>
<tab title="Properties">

```kotlin
df.filterBy { isHappy }
```

</tab>
<tab title="Strings">

```kotlin
df.filterBy("isHappy")
```

</tab></tabs>
<inline-frame src="resources/org.jetbrains.kotlinx.dataframe.samples.api.Access.filterBy.html" width="100%"/>
<!---END-->