Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
In [26]:
import polars as pl
In [37]:
df = pl.read_csv("aws_ec2_vm_instances.csv")
df = df.with_columns(
hourly_cost=pl.col("hourly_cost").str.lstrip("$").cast(float),
memory=pl.col("memory").str.rstrip(" GiB").cast(float),
performance_per_vcpu=pl.when(pl.col("performance_per_vcpu").is_null()).then(1).otherwise(
pl.col("performance_per_vcpu")
),
).with_columns(
real_vcpu=pl.col("vcpu") * pl.col("performance_per_vcpu"),
)
df
Out[37]:
VM Instances With Highest vCPU / memory Ratio¶
In [38]:
(df["real_vcpu"] / df["memory"]).max()
Out[38]:
In [39]:
df.filter(df["real_vcpu"] / df["memory"] >= 2)
Out[39]:
Cheapest VM Instance per vCPU Hour¶
t2.nano
is the cheapest with an hourly cost of $0.0058
and a monthly cost of $4.176
per vCPU.
In [40]:
(df["hourly_cost"] / df["real_vcpu"]).min()
Out[40]:
In [42]:
df.filter(df["hourly_cost"] / df["real_vcpu"] <= 0.0058)
Out[42]:
Monthly cost (USD).
In [43]:
0.0058 * 24 * 30
Out[43]:
In [ ]: