Source code for iaa_od.visualisation.show_fatigue_evaluation

from iaa_od import AnnotationTimestamp
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

[docs] def show_fatigue_data_with_agreement(sessions: dict[str, dict[int, list[AnnotationTimestamp]]]) -> None: """ Visualise the fatigue evaluation of annotators by plotting Alpha values against normalised timestamps for each GT. The graph also shows a mean Alpha value across all GTs to provide an overall trend of annotator fatigue over time. Args: sessions (dict[str, dict[int, list[AnnotationTimestamp]]]): A dictionary where keys are GT names and values are dictionaries mapping session IDs to lists of AnnotationTimestamp objects. """ _, ax = plt.subplots() ax.set_xlabel('Normalised timestamps (s)') ax.set_ylabel('Alpha values') ax.set_title('Alpha values against normalised timestamps') n_gt = len(sessions) cmap = cm.get_cmap('tab20b', n_gt) colours = [cmap(i) for i in range(n_gt)] all_ts, all_alphas = [], [] for i, (gt_name, session_dict) in enumerate(sessions.items()): for _, annotation_timestamps in session_dict.items(): ts: list[int] = list(annotation_timestamp.timestamp for annotation_timestamp in annotation_timestamps) alpha_values: list[float] = list(annotation_timestamp.alpha_for_image for annotation_timestamp in annotation_timestamps) all_ts.append(ts) all_alphas.append(alpha_values) ax.plot(ts, alpha_values, marker='.', markersize=4, linestyle='-', label=gt_name, color=colours[i]) common_ts = np.linspace(min(map(min, all_ts)), max(map(max, all_ts)), 300) interpolated = [np.interp(common_ts, ts, alphas) for ts, alphas in zip(all_ts, all_alphas)] mean_alpha = np.nanmean(interpolated, axis=0) ax.plot(common_ts, mean_alpha, linestyle='--', color='red', linewidth=2, label='Mean') handles, labels = ax.get_legend_handles_labels() unique = dict(zip(labels, handles)) # last handle per label wins ax.legend(unique.values(), unique.keys()) plt.tight_layout() plt.show()
[docs] def show_fatigue_data_with_granularity(sessions: dict[str, dict[int, list[AnnotationTimestamp]]], granularities: dict[str, "Granularities"]) -> None: """ Visualise annotator fatigue through per-image granularity instead of Krippendorff's alpha. For each GT, this plots the per-image count granularity (top) and size granularity (bottom) against the normalised timestamps, one line per annotation session, together with a mean trend line across all sessions. Under the dataset-wide benchmark normalisation used by `per_image_fatigue_granularities`, a value of 1.0 is the benchmark (median) annotator; values drifting above or below 1.0 over the course of a session indicate the annotator shifting their granularity strategy (e.g. toward larger, group-level boxes as fatigue sets in: lower count granularity, higher size granularity). Args: sessions (dict[str, dict[int, list[AnnotationTimestamp]]]): A dictionary where keys are GT names and values are dictionaries mapping session IDs to lists of AnnotationTimestamp objects, as returned by `evaluate_annotator_fatigue`. granularities (dict[str, Granularities]): A mapping from image filename to that image's Granularities, as returned by `per_image_fatigue_granularities`. """ # The two granularity tables to plot, paired with their axis labels. metrics = (("count", "Count granularity"), ("size", "Size granularity")) n_gt = len(sessions) cmap = plt.get_cmap('tab20b', n_gt) colours = [cmap(i) for i in range(n_gt)] fig, axes = plt.subplots(len(metrics), 1, sharex=True, figsize=(8, 8)) for ax, (which, label) in zip(axes, metrics): all_ts: list[list[int]] = [] all_vals: list[list[float]] = [] for i, (gt_name, session_dict) in enumerate(sessions.items()): for _, annotation_timestamps in session_dict.items(): # Gather the (timestamp, granularity) pairs for this session, skipping the images # where this annotator has no granularity value (singleton-only image, or the # annotator did not contribute to any non-singleton unit on that image). ts: list[int] = [] vals: list[float] = [] for annotation_timestamp in annotation_timestamps: value: float = _granularity_value(granularities, annotation_timestamp.image_filename, gt_name, which) if np.isnan(value): continue ts.append(annotation_timestamp.timestamp) vals.append(value) # Nothing to draw for this session if every point was missing. if not ts: continue all_ts.append(ts) all_vals.append(vals) ax.plot(ts, vals, marker='.', markersize=4, linestyle='-', label=gt_name, color=colours[i]) # Reference line at the benchmark (1.0 under benchmark normalisation). ax.axhline(1.0, color='lightgray', linewidth=1, zorder=0) ax.set_ylabel(f'{label} (normalised)') # Mean trend across all sessions, projected onto a common timestamp grid. if all_ts: common_ts = np.linspace(min(map(min, all_ts)), max(map(max, all_ts)), 300) interpolated = [np.interp(common_ts, ts, vals) for ts, vals in zip(all_ts, all_vals)] mean_vals = np.nanmean(interpolated, axis=0) ax.plot(common_ts, mean_vals, linestyle='--', color='red', linewidth=2, label='Mean') # One legend entry per GT (plus the mean); the last handle per label wins. handles, labels = ax.get_legend_handles_labels() unique = dict(zip(labels, handles)) ax.legend(unique.values(), unique.keys()) axes[-1].set_xlabel('Normalised timestamps (s)') fig.suptitle('Per-image granularity against normalised timestamps') plt.tight_layout() plt.show()
def _granularity_value(granularities: dict[str, "Granularities"], image_filename: str, gt_name: str, which: str) -> float: """ Look up the normalised granularity value for a given annotator on a given image. Returns NaN when the image is absent from the granularities mapping (its units were all singletons) or when the annotator did not contribute to any non-singleton unit on that image. Both cases are genuine "no data" points for the fatigue plot rather than errors. """ granularity = granularities.get(image_filename) if granularity is None: return float('nan') table = granularity.count if which == "count" else granularity.size return table.get(gt_name, float('nan'))