In [67]:
import numpy as np
import holoviews as hv
hv.extension("bokeh")
In [68]:
import pandas as pd
from matplotlib.cbook import get_sample_data
fname = get_sample_data("percent_bachelors_degrees_women_usa.csv")
gender_degree_data = pd.read_csv(fname)
title = (
"Percentage of Bachelor's degrees conferred to women in "
"the U.S.A. by major (1970-2011)\n"
)
# These are the colors that will be used in the plot
color_sequence = [
"#1f77b4",
"#aec7e8",
"#ff7f0e",
"#ffbb78",
"#2ca02c",
"#98df8a",
"#d62728",
"#ff9896",
"#9467bd",
"#c5b0d5",
"#8c564b",
"#c49c94",
"#e377c2",
"#f7b6d2",
"#7f7f7f",
"#c7c7c7",
"#bcbd22",
"#dbdb8d",
"#17becf",
"#9edae5",
]
# Offsets for degree labels
y_offsets = {
"Foreign Languages": 0.5,
"English": -0.5,
"Communications and Journalism": 0.75,
"Art and Performance": -0.25,
"Agriculture": 1.25,
"Social Sciences and History": 0.25,
"Business": -0.75,
"Math and Statistics": 0.75,
"Architecture": -0.75,
"Computer Science": 0.75,
"Engineering": -0.25,
}
# Load the data into a dataframe and us pd.melt to unpivot the degree column
df = pd.DataFrame(gender_degree_data)
df = pd.melt(df, id_vars="Year", var_name="Degree", value_name="conferred")
df["Degree"] = [d.replace("_", " ").title() for d in df.Degree]
# Define a formatter that works for both bokeh and matplotlib
def percent_format(x):
try:
return "{:0.0f}%".format(x)
except:
return "%d%" % x
# Define the value dimensions
vdim = hv.Dimension("conferred", value_format=percent_format, range=(0, 90))
# Define the dataset
ds = hv.Dataset(df, vdims=vdim)
curves = ds.to(hv.Curve, "Year", groupby="Degree").overlay()
# Define a function to get the text annotations
max_year = ds["Year"].max()
def offset(row):
row["conferred"] += y_offsets.get(row.Degree, 0)
return row
label_df = df[df.Year == max_year].apply(offset, axis=1)
labels = hv.Labels(label_df, ["Year", "conferred"], "Degree")
In [69]:
def grid_cb(plot, element):
plot = plot.handles["plot"]
plot.xgrid.visible = False
plot.ygrid.grid_line_dash = [6, 4]
plot.ygrid.grid_line_width = 3
plot.grid.bounds = (1970, 2010)
# Define some custom options for bokeh
options = hv.Store.options(backend="bokeh")
options.NdOverlay = hv.Options("plot", batched=False)
options.Curve = hv.Options(
"plot",
show_frame=False,
labelled=[],
tools=["hover"],
finalize_hooks=[grid_cb],
height=900,
width=900,
show_legend=False,
xticks=[1970, 1980, 1990, 2000, 2010],
)
options.Curve = hv.Options("style", color=hv.Cycle(values=color_sequence), line_width=2)
(
curves.redim(Year=dict(range=(1970, 2030)))
* labels.options(color_index="Degree", cmap=color_sequence, text_align="left")
).relabel(title)
Out[69]:
In [ ]: