Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
In [2]:
:timing
:sccache 1
:dep polars = { version = "0.42.0", features = ["lazy", "parquet"] }
Out[2]:
Out[2]:
In [5]:
use polars::df;
use polars::prelude::*;
use polars::datatypes::DataType;
use std::fs::File;
use std::io::BufWriter;
use std::io::Write;
Out[5]:
In [6]:
let mut frame = LazyFrame::scan_parquet(
"part-000.parquet",
ScanArgsParquet::default(),
)?
.collect()?;
frame
Out[6]:
Out[6]:
Count the Total Number of Rows of All Parquet Files¶
In [15]:
LazyFrame::scan_parquet(
"part-000.parquet",
ScanArgsParquet::default(),
).unwrap().count().collect()
Out[15]:
Out[15]:
In [12]:
LazyFrame::scan_parquet(
"part-000.parquet",
ScanArgsParquet::default(),
).unwrap().select(
&[col("*").count().cast(DataType::UInt64)]
).collect()
Out[12]:
Out[12]:
In [16]:
LazyFrame::scan_parquet(
"part-000.parquet",
ScanArgsParquet::default(),
).unwrap().select(
&[col("id0").count().cast(DataType::UInt64).alias("n")]
).collect().unwrap()["n"].u64().unwrap().get(0).unwrap()
Out[16]:
Out[16]:
In [17]:
LazyFrame::scan_parquet(
"part-000.parquet",
ScanArgsParquet::default(),
).unwrap().select(
&[lit(1).count().cast(DataType::UInt64).alias("n")]
).collect().unwrap()["n"].u64().unwrap().get(0).unwrap()
Out[17]:
Out[17]:
In [ ]: