Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
In [1]:
import itertools as it
import polars as pl
In [2]:
df = pl.DataFrame(
{
"id": [0, 1, 2, 3, 4],
"color": ["red", "green", "green", "red", "red"],
"shape": ["square", "triangle", "square", "triangle", "square"],
}
)
df
Out[2]:
In [11]:
df.groupby("color", maintain_order=True).agg(pl.col("id"))
Out[11]:
In [17]:
df.groupby("color", maintain_order=True).agg(pl.col("id").first())
Out[17]:
In [4]:
def update_frame(frame):
frame[0, "id"] = frame[0, "id"] * 1000
return frame
In [5]:
df.groupby("color").apply(update_frame)
Out[5]:
GroupBy + Aggregation¶
In [6]:
df.groupby("color").agg(pl.count().alias("n"))
Out[6]:
In [9]:
pl.DataFrame(
data=it.combinations(range(52), 4),
orient="row"
).with_row_count().groupby([
"column_0",
"column_1",
"column_2",
]).agg(pl.col("row_nr").min()).sort([
"column_0",
"column_1",
"column_2",
])
Out[9]:
GroupBy as An Iterable¶
In [11]:
pl.Series(
(g, frame.shape[0])
for g, frame in df.groupby("color")
)
Out[11]:
In [ ]: