Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
In [2]:
:timing
:sccache 1
:dep polars = { version = "0.42.0", features = ["lazy", "parquet"] }
Out[2]:
Out[2]:
In [5]:
use polars::df;
use polars::prelude::*;
use polars::datatypes::DataType;
use std::fs::File;
use std::io::BufWriter;
use std::io::Write;
Out[5]:
In [6]:
let mut frame = LazyFrame::scan_parquet(
        "part-000.parquet",
        ScanArgsParquet::default(),
    )?
    .collect()?;
frame
Out[6]:
Out[6]:
Count the Total Number of Rows of All Parquet Files¶
In [15]:
LazyFrame::scan_parquet(
        "part-000.parquet",
        ScanArgsParquet::default(),
    ).unwrap().count().collect()
Out[15]:
Out[15]:
In [12]:
LazyFrame::scan_parquet(
        "part-000.parquet",
        ScanArgsParquet::default(),
    ).unwrap().select(
        &[col("*").count().cast(DataType::UInt64)]
    ).collect()
Out[12]:
Out[12]:
In [16]:
LazyFrame::scan_parquet(
        "part-000.parquet",
        ScanArgsParquet::default(),
    ).unwrap().select(
        &[col("id0").count().cast(DataType::UInt64).alias("n")]
    ).collect().unwrap()["n"].u64().unwrap().get(0).unwrap()
Out[16]:
Out[16]:
In [17]:
LazyFrame::scan_parquet(
        "part-000.parquet",
        ScanArgsParquet::default(),
    ).unwrap().select(
        &[lit(1).count().cast(DataType::UInt64).alias("n")]
    ).collect().unwrap()["n"].u64().unwrap().get(0).unwrap()
Out[17]:
Out[17]:
In [ ]: