Ben Chuanlong Du's Blog

It is never too late to learn.

Filter a Polars DataFrame in Rust

Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!

Tips and Traps

  1. LazyFrame.filter filters rows using an Expr while DataFrame.filter filters rows using a mask of the type ChunkedArray<BooleanType>.
In [2]:
:timing
:sccache 1
:dep polars = { version = "0.21.1", features = ["lazy", "parquet"] }
In [13]:
use polars::prelude::*;
use polars::df;
In [15]:
// use macro
let frame = df![
    "names" => ["a", "b", "c"],
    "values" => [1, 2, 3],
    "values_nulls" => [Some(1), None, Some(3)]
].unwrap();
df
Out[15]:
shape: (3, 3)
┌───────┬────────┬──────────────┐
│ names ┆ values ┆ values_nulls │
│ ---   ┆ ---    ┆ ---          │
│ str   ┆ i32    ┆ i32          │
╞═══════╪════════╪══════════════╡
│ a     ┆ 1      ┆ 1            │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ b     ┆ 2      ┆ null         │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ c     ┆ 3      ┆ 3            │
└───────┴────────┴──────────────┘
In [19]:
frame["values_nulls"].is_not_null()
Out[19]:
shape: (3,)
ChunkedArray: 'values_nulls' [bool]
[
	true
	false
	true
]
In [20]:
frame.filter(&frame["values_nulls"].is_not_null())
Out[20]:
Ok(shape: (2, 3)
┌───────┬────────┬──────────────┐
│ names ┆ values ┆ values_nulls │
│ ---   ┆ ---    ┆ ---          │
│ str   ┆ i32    ┆ i32          │
╞═══════╪════════╪══════════════╡
│ a     ┆ 1      ┆ 1            │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ c     ┆ 3      ┆ 3            │
└───────┴────────┴──────────────┘)
In [21]:
fn predicator(s1: &Series, s2: &Series) -> ChunkedArray<BooleanType> {
    ChunkedArray::new("x", &[
        Some(true),
        Some(false),
        None,
    ])
}
In [22]:
df.filter(&predicator(&df["names"], &df["values"]))
Out[22]:
Ok(shape: (1, 3)
┌───────┬────────┬──────────────┐
│ names ┆ values ┆ values_nulls │
│ ---   ┆ ---    ┆ ---          │
│ str   ┆ i32    ┆ i32          │
╞═══════╪════════╪══════════════╡
│ a     ┆ 1      ┆ 1            │
└───────┴────────┴──────────────┘)
In [23]:
fn predicator2(s1: &Series, s2: &Series) -> ChunkedArray<BooleanType> {
    let arr = [10, 5, 3];
    BooleanChunked::from_iter_values("x", arr.iter().map(|&x| x > 6))
}
In [24]:
df.filter(&predicator2(&df["names"], &df["values"]))
Out[24]:
Ok(shape: (1, 3)
┌───────┬────────┬──────────────┐
│ names ┆ values ┆ values_nulls │
│ ---   ┆ ---    ┆ ---          │
│ str   ┆ i32    ┆ i32          │
╞═══════╪════════╪══════════════╡
│ a     ┆ 1      ┆ 1            │
└───────┴────────┴──────────────┘)
In [ ]:

Comments