Source code for iaa_od.visualisation.show_fatigue_evaluation

from iaa_od import AnnotationTimestamp
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np


[docs]
def show_fatigue_data_with_agreement(sessions: dict[str, dict[int, list[AnnotationTimestamp]]]) -> None:
    """
    Visualise the fatigue evaluation of annotators by plotting Alpha values against normalised timestamps for each GT. The graph also shows a mean Alpha value across all GTs to provide an overall trend of annotator fatigue over time.
    Args:
        sessions (dict[str, dict[int, list[AnnotationTimestamp]]]): A dictionary where keys are GT names and values are dictionaries mapping session IDs to lists of AnnotationTimestamp objects.
    """
    _, ax = plt.subplots()
    ax.set_xlabel('Normalised timestamps (s)')
    ax.set_ylabel('Alpha values')
    ax.set_title('Alpha values against normalised timestamps')

    n_gt = len(sessions)
    cmap = cm.get_cmap('tab20b', n_gt)
    colours = [cmap(i) for i in range(n_gt)]
    all_ts, all_alphas = [], []

    for i, (gt_name, session_dict) in enumerate(sessions.items()):
        for _, annotation_timestamps in session_dict.items():
            ts: list[int] = list(annotation_timestamp.timestamp for annotation_timestamp in annotation_timestamps)
            alpha_values: list[float] = list(annotation_timestamp.alpha_for_image for annotation_timestamp in annotation_timestamps)
            all_ts.append(ts)
            all_alphas.append(alpha_values)
            ax.plot(ts, alpha_values, marker='.', markersize=4, linestyle='-', label=gt_name, color=colours[i])

    common_ts = np.linspace(min(map(min, all_ts)), max(map(max, all_ts)), 300)
    interpolated = [np.interp(common_ts, ts, alphas) for ts, alphas in zip(all_ts, all_alphas)]
    mean_alpha = np.nanmean(interpolated, axis=0)
    ax.plot(common_ts, mean_alpha, linestyle='--', color='red', linewidth=2, label='Mean')

    handles, labels = ax.get_legend_handles_labels()
    unique = dict(zip(labels, handles))  # last handle per label wins
    ax.legend(unique.values(), unique.keys())

    plt.tight_layout()
    plt.show()



[docs]
def show_fatigue_data_with_granularity(sessions: dict[str, dict[int, list[AnnotationTimestamp]]],
                                       granularities: dict[str, "Granularities"]) -> None:
    """
    Visualise annotator fatigue through per-image granularity instead of Krippendorff's alpha.

    For each GT, this plots the per-image count granularity (top) and size granularity (bottom)
    against the normalised timestamps, one line per annotation session, together with a mean
    trend line across all sessions. Under the dataset-wide benchmark normalisation used by
    `per_image_fatigue_granularities`, a value of 1.0 is the benchmark (median) annotator; values
    drifting above or below 1.0 over the course of a session indicate the annotator shifting their
    granularity strategy (e.g. toward larger, group-level boxes as fatigue sets in: lower count
    granularity, higher size granularity).

    Args:
        sessions (dict[str, dict[int, list[AnnotationTimestamp]]]): A dictionary where keys are GT
            names and values are dictionaries mapping session IDs to lists of AnnotationTimestamp
            objects, as returned by `evaluate_annotator_fatigue`.
        granularities (dict[str, Granularities]): A mapping from image filename to that image's
            Granularities, as returned by `per_image_fatigue_granularities`.
    """
    # The two granularity tables to plot, paired with their axis labels.
    metrics = (("count", "Count granularity"), ("size", "Size granularity"))

    n_gt = len(sessions)
    cmap = plt.get_cmap('tab20b', n_gt)
    colours = [cmap(i) for i in range(n_gt)]

    fig, axes = plt.subplots(len(metrics), 1, sharex=True, figsize=(8, 8))

    for ax, (which, label) in zip(axes, metrics):
        all_ts: list[list[int]] = []
        all_vals: list[list[float]] = []

        for i, (gt_name, session_dict) in enumerate(sessions.items()):
            for _, annotation_timestamps in session_dict.items():
                # Gather the (timestamp, granularity) pairs for this session, skipping the images
                # where this annotator has no granularity value (singleton-only image, or the
                # annotator did not contribute to any non-singleton unit on that image).
                ts: list[int] = []
                vals: list[float] = []
                for annotation_timestamp in annotation_timestamps:
                    value: float = _granularity_value(granularities, annotation_timestamp.image_filename, gt_name, which)
                    if np.isnan(value):
                        continue
                    ts.append(annotation_timestamp.timestamp)
                    vals.append(value)

                # Nothing to draw for this session if every point was missing.
                if not ts:
                    continue

                all_ts.append(ts)
                all_vals.append(vals)
                ax.plot(ts, vals, marker='.', markersize=4, linestyle='-', label=gt_name, color=colours[i])

        # Reference line at the benchmark (1.0 under benchmark normalisation).
        ax.axhline(1.0, color='lightgray', linewidth=1, zorder=0)
        ax.set_ylabel(f'{label} (normalised)')

        # Mean trend across all sessions, projected onto a common timestamp grid.
        if all_ts:
            common_ts = np.linspace(min(map(min, all_ts)), max(map(max, all_ts)), 300)
            interpolated = [np.interp(common_ts, ts, vals) for ts, vals in zip(all_ts, all_vals)]
            mean_vals = np.nanmean(interpolated, axis=0)
            ax.plot(common_ts, mean_vals, linestyle='--', color='red', linewidth=2, label='Mean')

        # One legend entry per GT (plus the mean); the last handle per label wins.
        handles, labels = ax.get_legend_handles_labels()
        unique = dict(zip(labels, handles))
        ax.legend(unique.values(), unique.keys())

    axes[-1].set_xlabel('Normalised timestamps (s)')
    fig.suptitle('Per-image granularity against normalised timestamps')
    plt.tight_layout()
    plt.show()



def _granularity_value(granularities: dict[str, "Granularities"], image_filename: str, gt_name: str, which: str) -> float:
    """
    Look up the normalised granularity value for a given annotator on a given image.

    Returns NaN when the image is absent from the granularities mapping (its units were all
    singletons) or when the annotator did not contribute to any non-singleton unit on that image.
    Both cases are genuine "no data" points for the fatigue plot rather than errors.
    """
    granularity = granularities.get(image_filename)
    if granularity is None:
        return float('nan')

    table = granularity.count if which == "count" else granularity.size
    return table.get(gt_name, float('nan'))