Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
In [2]:
:dep polars = { version = "0.30", features = ["lazy", "round_series"] }
In [ ]:
fn my_filter_func(col1: &Series, col2: &Series, col2: &Series) -> ReturnType {
let it = (0..n).map(|i| {
let col1 = match col.get(i) {
AnyValue::UInt64(val) => val,
_ => panic!("Wrong type of col1!"),
};
// similar for col2 and col3
// apply user-defined function to col1, col2 and col3
}
// convert it to a collection of the required type
}
In [ ]:
fn my_black_box_function(a: f32, b: f32) -> f32 {
// do something
a
}
fn apply_multiples(col_a: &Series, col_b: &Series) -> Float32Chunked {
match (col_a.dtype(), col_b.dtype()) {
(DataType::Float32, DataType::Float32) => {
let a = col_a.f32().unwrap();
let b = col_b.f32().unwrap();
a.into_iter()
.zip(b.into_iter())
.map(|(opt_a, opt_b)| match (opt_a, opt_b) {
(Some(a), Some(b)) => Some(my_black_box_function(a, b)),
_ => None,
})
.collect()
}
_ => panic!("unpexptected dtypes"),
}
}
In [ ]:
fn apply_multiples(lf: LazyFrame) -> Result<DataFrame> {
df![
"a" => [1.0, 2.0, 3.0],
"b" => [3.0, 5.1, 0.3]
]?
.lazy()
.select([concat_lst(["col_a", "col_b"]).map(
|s| {
let ca = s.struct_()?;
let b = ca.field_by_name("col_a")?;
let a = ca.field_by_name("col_b")?;
let a = a.f32()?;
let b = b.f32()?;
let out: Float32Chunked = a
.into_iter()
.zip(b.into_iter())
.map(|(opt_a, opt_b)| match (opt_a, opt_b) {
(Some(a), Some(b)) => Some(my_black_box_function(a, b)),
_ => None,
})
.collect();
Ok(out.into_series())
},
GetOutput::from_type(DataType::Float32),
)])
.collect()
}
In [3]:
use polars::{
prelude::*,
datatypes::DataType,
};
In [12]:
let df = df!(
"column integers" => &[1, 2, 3, 4, 5, 6],
"column float64 A" => [23.654, 0.319, 10.0049, 89.01999, -3.41501, 52.0766],
"column options" => [Some(28), Some(300), None, Some(2), Some(-30), None],
"column float64 B" => [23.6499, 0.399, 10.0061, 89.0105, -3.4331, 52.099999],
).unwrap();
df
Out[12]:
In [13]:
let columns_with_float64 = vec![
"column float64 A",
"column float64 B",
];
columns_with_float64
Out[13]:
In [ ]:
lazyframe
.with_columns([
cols(col1, col2, ..., colN)
.apply(|series|
some_function(series),
GetOutput::from_type(DataType::Float64)
)
]);
References¶
How to apply a function to multiple columns of a polars DataFrame in Rust