blackboxopt.visualizations.visualizer
create_hover_information(sections)
Create a hovertemplate which is used to render hover hints in plotly charts.
The data for the chart hovertext has to be provided as custom_data
attribute to
the chart and can be e.g. a list of column names.
One oddness is, that in the template the columns can't be referenced by name, but
only by index. That's why it is important to have the same ordering in the template
as in the custom_data
and the reason why this is done together in one function.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
sections |
dict |
Sections to render. The kyeys will show up as the section titles, values are expected to be a list of column names to be rendered under the section. E.g.: { "info": ["Objective #1", "Objective #2", "fidelity"] } |
required |
Returns:
Type | Description |
---|---|
Tuple[str, List] |
(plotly hover template, data column names) |
Source code in blackboxopt/visualizations/visualizer.py
def create_hover_information(sections: dict) -> Tuple[str, List]:
"""
Create a [hovertemplate](https://plotly.com/python/reference/pie/#pie-hovertemplate)
which is used to render hover hints in plotly charts.
The data for the chart hovertext has to be provided as `custom_data` attribute to
the chart and can be e.g. a list of column names.
One oddness is, that in the template the columns can't be referenced by name, but
only by index. That's why it is important to have the same ordering in the template
as in the `custom_data` and the reason why this is done together in one function.
Args:
sections: Sections to render. The kyeys will show up as the section titles,
values are expected to be a list of column names to be rendered under
the section. E.g.: { "info": ["Objective #1", "Objective #2", "fidelity"] }
Returns:
(plotly hover template, data column names)
"""
template = ""
idx = 0
for section, columns in sections.items():
template += f"<br><b>{section.replace('_', ' ').title()}</b><br>"
for column in columns:
template += f"{column}: %{{customdata[{idx}]}}<br>"
idx += 1
template += "<extra></extra>"
data_columns: list = sum(sections.values(), [])
return template, data_columns
evaluations_to_df(evaluations)
Convert evaluations into multi index dataframe.
The evaluations will be casted to dictionaries which will be normalized. The keys of the dicts will be used as secondary column index. Evaluations with one or more missing objective-value will be dropped.
Examples:
Evaluation(objectives={'loss_1': 1.0, 'loss_2': -0.0}, stacktrace=None, ...)
Will be transformed into:
| objectives | stacktrace | ... | <- "group" index | loss_1 | loss_2 | stacktrace | ... | <- "field" index | ------ | ------ | ---------- | --- | | 1.0 | -0.0 | None | ... |
Source code in blackboxopt/visualizations/visualizer.py
def evaluations_to_df(evaluations: List[Evaluation]) -> pd.DataFrame:
"""Convert evaluations into multi index dataframe.
The evaluations will be casted to dictionaries which will be normalized.
The keys of the dicts will be used as secondary column index. Evaluations
with one or more missing objective-value will be dropped.
Example:
```
Evaluation(objectives={'loss_1': 1.0, 'loss_2': -0.0}, stacktrace=None, ...)
```
Will be transformed into:
| objectives | stacktrace | ... | <- "group" index
| loss_1 | loss_2 | stacktrace | ... | <- "field" index
| ------ | ------ | ---------- | --- |
| 1.0 | -0.0 | None | ... |
"""
if not evaluations or len(evaluations) == 0:
raise NoSuccessfulEvaluationsError
# Filter out e.g. EvaluationSpecifications which might be passed into
evaluations = [e for e in evaluations if isinstance(e, Evaluation)]
# Transform to dicts, filter out evaluations with missing objectives
evaluation_dicts = [e.__dict__ for e in evaluations if not e.any_objective_none]
if len(evaluation_dicts) == 0:
raise NoSuccessfulEvaluationsError
df = pd.DataFrame(evaluation_dicts)
# Flatten json/dict columns into single multi-index dataframe
dfs_expanded = []
for column in df.columns:
# Normalize json columns keep original column for non-json columns
try:
df_temp = pd.json_normalize(df[column], errors="ignore", max_level=0)
except AttributeError:
df_temp = df[[column]]
# Use keys of dicts as second level of column index
df_temp.columns = pd.MultiIndex.from_product(
[[column], df_temp.columns], names=["group", "field"]
)
# Drop empty columns
df_temp = df_temp.dropna(axis=1, how="all")
dfs_expanded.append(df_temp)
df = pd.concat(dfs_expanded, join="outer", axis=1)
# Parse datetime columns
date_columns = [c for c in df.columns if "unixtime" in str(c)]
df[date_columns] = df[date_columns].apply(pd.to_datetime, unit="s")
# Calculate duration in seconds
df["duration", "duration"] = (
df["finished_unixtime", "finished_unixtime"]
- df["created_unixtime", "created_unixtime"]
)
return df
hypervolume_over_iterations(evaluations_per_optimizer, objectives, reference_point, percentiles=None, hex_colors=None)
Visualize the hypervolume over iterations.
In case multiple studies per optimizer are provided, a central tendency as well as variability is visualized.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
evaluations_per_optimizer |
Dict[str, List[List[blackboxopt.evaluation.Evaluation]]] |
For each key i.e. optimizer, a list of studies which each contain a list of evaluations for the respective study corresponding to the number of iterations. |
required |
objectives |
Sequence[blackboxopt.base.Objective] |
The objectives to which the reported objective values correspond. |
required |
reference_point |
List[float] |
The hypervolume reference point. |
required |
percentiles |
Optional[Tuple[float, float, float]] |
When provided (e.g. |
None |
hex_colors |
Optional[List[str]] |
A list of hex color code strings. Defaults to plotly express' Dark24 |
None |
Returns:
Type | Description |
---|---|
Plotly figure with hypervolume over iterations and a trace per optimizer. |
Source code in blackboxopt/visualizations/visualizer.py
def hypervolume_over_iterations(
evaluations_per_optimizer: Dict[str, List[List[Evaluation]]],
objectives: Sequence[Objective],
reference_point: List[float],
percentiles: Optional[Tuple[float, float, float]] = None,
hex_colors: Optional[List[str]] = None,
):
"""Visualize the hypervolume over iterations.
In case multiple studies per optimizer are provided, a central tendency as well as
variability is visualized.
Args:
evaluations_per_optimizer: For each key i.e. optimizer, a list of studies which
each contain a list of evaluations for the respective study corresponding to
the number of iterations.
objectives: The objectives to which the reported objective values correspond.
reference_point: The hypervolume reference point.
percentiles: When provided (e.g. `(25, 50, 75)`) the median is used as the
measure of central tendency, while the area between the 25 and 75
percentiles is shaded. In case no percentiles are given, the mean is used as
the central tendency and an area indicating the standard error of the mean
is shaded.
hex_colors: A list of hex color code strings. Defaults to plotly express' Dark24
Returns:
Plotly figure with hypervolume over iterations and a trace per optimizer.
"""
if hex_colors is None:
hex_colors = px.colors.qualitative.Dark24
hex_color_iterator = iter(hex_colors)
plotly_data = []
for optimizer, studies in evaluations_per_optimizer.items():
hv_per_study = []
for evaluations in studies:
iteration_steps = len(evaluations)
hvs = [
compute_hypervolume(
evaluations[: (step + 1)], objectives, reference_point
)
for step in range(iteration_steps)
]
hv_per_study.append(hvs)
if percentiles is not None:
lower = np.percentile(hv_per_study, percentiles[0], axis=0)
central = np.percentile(hv_per_study, percentiles[1], axis=0)
upper = np.percentile(hv_per_study, percentiles[2], axis=0)
else:
central = np.mean(hv_per_study, axis=0)
sem = sps.sem(hv_per_study, axis=0)
lower = central - sem
upper = central + sem
x_plotted = np.arange(len(central))
r, g, b = plotly.colors.hex_to_rgb(next(hex_color_iterator))
color_line = f"rgb({r}, {g}, {b})"
color_fill = f"rgba({r}, {g}, {b}, 0.3)"
plotly_data.extend(
[
go.Scatter(
name=optimizer,
x=x_plotted,
y=central,
mode="lines",
legendgroup=optimizer,
showlegend=True,
line=dict(color=color_line, simplify=True),
),
go.Scatter(
x=x_plotted,
y=lower,
mode="lines",
marker=dict(color=color_line),
line=dict(width=0, simplify=True),
legendgroup=optimizer,
showlegend=False,
hoverinfo="skip",
),
go.Scatter(
x=x_plotted,
y=upper,
mode="lines",
marker=dict(color=color_line),
line=dict(width=0, simplify=True),
legendgroup=optimizer,
showlegend=False,
hoverinfo="skip",
fillcolor=color_fill,
fill="tonexty",
),
]
)
fig = go.Figure(plotly_data)
return fig
parallel_coordinate_plot_parameters(evaluations, columns=None, color_by=None)
Create an interactive parallel coordinate plot.
Useful to investigate relationships in a higher dimensional search space and the optimization's objective(s).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
evaluations |
List[blackboxopt.evaluation.Evaluation] |
Evaluations to plot. |
required |
columns |
Optional[List[str]] |
Names of columns to show. Can contain parameter names, objective names
and settings keys. If |
None |
color_by |
Optional[str] |
Parameter name, objective name or settings key. The corresponding
column will be shown at the very right, it's value will be used for the
color scale. If |
None |
Returns:
Type | Description |
---|---|
Plotly figure |
Raised
In case evaluations
does not contain at least
one successful evaluation (an evaluation with objective value != None
).
Source code in blackboxopt/visualizations/visualizer.py
def parallel_coordinate_plot_parameters(
evaluations: List[Evaluation],
columns: Optional[List[str]] = None,
color_by: Optional[str] = None,
):
"""Create an interactive parallel coordinate plot.
Useful to investigate relationships in a higher dimensional search space and the
optimization's objective(s).
Args:
evaluations: Evaluations to plot.
columns: Names of columns to show. Can contain parameter names, objective names
and settings keys. If `None`, all parameters, objectives and settings are
displayed.
color_by: Parameter name, objective name or settings key. The corresponding
column will be shown at the very right, it's value will be used for the
color scale. If `None`, all lines have the same color.
Returns:
Plotly figure
Raised:
NoSuccessfulEvaluationsError: In case `evaluations` does not contain at least
one successful evaluation (an evaluation with objective value != `None`).
"""
if not evaluations:
raise NoSuccessfulEvaluationsError
# Prepare dataframe for visualization
df = evaluations_to_df(evaluations)
# Drop unused columns and indices
if "settings" in df.columns:
df = df[["configuration", "settings", "objectives"]]
settings_cols = df["settings"].columns.to_list()
else:
df = df[["configuration", "objectives"]]
settings_cols = []
objective_cols = df["objectives"].columns.to_list()
df = df.droplevel(0, axis=1)
# If no columns are specified, use all:
if not columns:
columns = df.columns.to_list()
if color_by and color_by not in columns:
raise ValueError(
f"Unknown column name in color_by='{color_by}'. Please make sure, that this"
+ "column name is correct and one of the visible columns."
)
ambigious_columns = [k for k, v in Counter(df[columns].columns).items() if v > 1]
if ambigious_columns:
raise ValueError(
"All columns to plot must have a unique name, but those are ambigious: "
+ f"{ambigious_columns}. Either rename parameters/settings/objective to "
+ "be unique or provide only the unambigious ones as `columns` argument."
)
# Prepare a coordinate (vertical line) for every column
coordinates = []
colored_coordinate = {}
for column in columns:
coordinate: Dict[str, Any] = {}
if column in objective_cols:
coordinate["label"] = f"<b>Objective: {column}</b>"
elif column in settings_cols:
coordinate["label"] = f"Setting: {column}"
else:
coordinate["label"] = column
parameter_type = df[column].dtype.name
if parameter_type.startswith("float") or parameter_type.startswith("int"):
# Handling floats and integers the same, because unfortunately it's hard to
# use integers only for ticks and still be robust regarding a large range
# of values.
coordinate["values"] = df[column]
elif parameter_type in ["object", "bool"]:
# Encode categorical values to integers. Unfortunately, ordinal parameters
# loose there ordering, as there is no information about the order in the
# evaluations.
# The string conversion is necessary for unhashable entries, e.g. of
# type List, which can't be casted to categories.
df[column] = df[column].astype(str).astype("category")
categories = df[column].cat.categories.to_list()
encoded_categories = list(range(len(categories)))
df[column].cat.categories = encoded_categories
# Use integer encodings for scale and category values as tick labels
coordinate["ticktext"] = categories
coordinate["tickvals"] = encoded_categories
coordinate["values"] = df[column].astype("str")
else:
warnings.warn(
f"Ignoring column with unknown type: {column}<{parameter_type}>"
)
continue
if column == color_by:
colored_coordinate = coordinate
else:
coordinates.append(coordinate)
# Append colored coordinate to the end (right)
if colored_coordinate:
coordinates.append(colored_coordinate)
# Plot
return go.Figure(
data=go.Parcoords(
line=dict(
# Color lines by objective value
color=df[color_by] if color_by else None,
colorscale=px.colors.diverging.Tealrose,
showscale=True,
# Use colorbar as kind of colored extension to the axis
colorbar=dict(
thickness=16, x=1, xpad=0, ypad=1, tickmode="array", tickvals=[]
),
),
dimensions=coordinates,
),
layout=dict(title="[BBO] Parallel coordinates plot"),
)