from iaa_od import AnnotationTimestamp
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
[docs]
def show_fatigue_data_with_agreement(sessions: dict[str, dict[int, list[AnnotationTimestamp]]]) -> None:
"""
Visualise the fatigue evaluation of annotators by plotting Alpha values against normalised timestamps for each GT. The graph also shows a mean Alpha value across all GTs to provide an overall trend of annotator fatigue over time.
Args:
sessions (dict[str, dict[int, list[AnnotationTimestamp]]]): A dictionary where keys are GT names and values are dictionaries mapping session IDs to lists of AnnotationTimestamp objects.
"""
_, ax = plt.subplots()
ax.set_xlabel('Normalised timestamps (s)')
ax.set_ylabel('Alpha values')
ax.set_title('Alpha values against normalised timestamps')
n_gt = len(sessions)
cmap = cm.get_cmap('tab20b', n_gt)
colours = [cmap(i) for i in range(n_gt)]
all_ts, all_alphas = [], []
for i, (gt_name, session_dict) in enumerate(sessions.items()):
for _, annotation_timestamps in session_dict.items():
ts: list[int] = list(annotation_timestamp.timestamp for annotation_timestamp in annotation_timestamps)
alpha_values: list[float] = list(annotation_timestamp.alpha_for_image for annotation_timestamp in annotation_timestamps)
all_ts.append(ts)
all_alphas.append(alpha_values)
ax.plot(ts, alpha_values, marker='.', markersize=4, linestyle='-', label=gt_name, color=colours[i])
common_ts = np.linspace(min(map(min, all_ts)), max(map(max, all_ts)), 300)
interpolated = [np.interp(common_ts, ts, alphas) for ts, alphas in zip(all_ts, all_alphas)]
mean_alpha = np.nanmean(interpolated, axis=0)
ax.plot(common_ts, mean_alpha, linestyle='--', color='red', linewidth=2, label='Mean')
handles, labels = ax.get_legend_handles_labels()
unique = dict(zip(labels, handles)) # last handle per label wins
ax.legend(unique.values(), unique.keys())
plt.tight_layout()
plt.show()
[docs]
def show_fatigue_data_with_granularity(sessions: dict[str, dict[int, list[AnnotationTimestamp]]],
granularities: dict[str, "Granularities"]) -> None:
"""
Visualise annotator fatigue through per-image granularity instead of Krippendorff's alpha.
For each GT, this plots the per-image count granularity (top) and size granularity (bottom)
against the normalised timestamps, one line per annotation session, together with a mean
trend line across all sessions. Under the dataset-wide benchmark normalisation used by
`per_image_fatigue_granularities`, a value of 1.0 is the benchmark (median) annotator; values
drifting above or below 1.0 over the course of a session indicate the annotator shifting their
granularity strategy (e.g. toward larger, group-level boxes as fatigue sets in: lower count
granularity, higher size granularity).
Args:
sessions (dict[str, dict[int, list[AnnotationTimestamp]]]): A dictionary where keys are GT
names and values are dictionaries mapping session IDs to lists of AnnotationTimestamp
objects, as returned by `evaluate_annotator_fatigue`.
granularities (dict[str, Granularities]): A mapping from image filename to that image's
Granularities, as returned by `per_image_fatigue_granularities`.
"""
# The two granularity tables to plot, paired with their axis labels.
metrics = (("count", "Count granularity"), ("size", "Size granularity"))
n_gt = len(sessions)
cmap = plt.get_cmap('tab20b', n_gt)
colours = [cmap(i) for i in range(n_gt)]
fig, axes = plt.subplots(len(metrics), 1, sharex=True, figsize=(8, 8))
for ax, (which, label) in zip(axes, metrics):
all_ts: list[list[int]] = []
all_vals: list[list[float]] = []
for i, (gt_name, session_dict) in enumerate(sessions.items()):
for _, annotation_timestamps in session_dict.items():
# Gather the (timestamp, granularity) pairs for this session, skipping the images
# where this annotator has no granularity value (singleton-only image, or the
# annotator did not contribute to any non-singleton unit on that image).
ts: list[int] = []
vals: list[float] = []
for annotation_timestamp in annotation_timestamps:
value: float = _granularity_value(granularities, annotation_timestamp.image_filename, gt_name, which)
if np.isnan(value):
continue
ts.append(annotation_timestamp.timestamp)
vals.append(value)
# Nothing to draw for this session if every point was missing.
if not ts:
continue
all_ts.append(ts)
all_vals.append(vals)
ax.plot(ts, vals, marker='.', markersize=4, linestyle='-', label=gt_name, color=colours[i])
# Reference line at the benchmark (1.0 under benchmark normalisation).
ax.axhline(1.0, color='lightgray', linewidth=1, zorder=0)
ax.set_ylabel(f'{label} (normalised)')
# Mean trend across all sessions, projected onto a common timestamp grid.
if all_ts:
common_ts = np.linspace(min(map(min, all_ts)), max(map(max, all_ts)), 300)
interpolated = [np.interp(common_ts, ts, vals) for ts, vals in zip(all_ts, all_vals)]
mean_vals = np.nanmean(interpolated, axis=0)
ax.plot(common_ts, mean_vals, linestyle='--', color='red', linewidth=2, label='Mean')
# One legend entry per GT (plus the mean); the last handle per label wins.
handles, labels = ax.get_legend_handles_labels()
unique = dict(zip(labels, handles))
ax.legend(unique.values(), unique.keys())
axes[-1].set_xlabel('Normalised timestamps (s)')
fig.suptitle('Per-image granularity against normalised timestamps')
plt.tight_layout()
plt.show()
def _granularity_value(granularities: dict[str, "Granularities"], image_filename: str, gt_name: str, which: str) -> float:
"""
Look up the normalised granularity value for a given annotator on a given image.
Returns NaN when the image is absent from the granularities mapping (its units were all
singletons) or when the annotator did not contribute to any non-singleton unit on that image.
Both cases are genuine "no data" points for the fatigue plot rather than errors.
"""
granularity = granularities.get(image_filename)
if granularity is None:
return float('nan')
table = granularity.count if which == "count" else granularity.size
return table.get(gt_name, float('nan'))