Source code for iaa_od.metrics.c_score

from iaa_od.models import CScore, Result, STD_IOM_THR, AnnotationProtocol
from iaa_od.utils import find_all_contained_bboxes
from copy import deepcopy
import numpy as np


[docs]
def c_score(result: Result, /, *, iom_thr: float = STD_IOM_THR) -> CScore:
    """
    Function which computes the C-Score metric for a given dataset. C-Score represents the agreement of annotators on categories, relaxing the spatial constraint given by object detection tasks.

    Parameters:
        result (Result): The Result object containing all the information about the dataset and its annotations.
        iom_thr (float, optional): The threshold for the grouping of bounding boxes based on their Intersection over Minimum.

    Returns:
        CScore: The computed C-Score metric for the dataset and for each individual unit.
    """
    # Filter all dataset annotations on a per-image basis
    all_annotations: dict[str, list[AnnotationProtocol]] = {}
    for gt in result.gts:
        for img_filename, annotations in gt.annotations.items():
            if len(annotations) > 0:
                if img_filename not in all_annotations:
                    all_annotations[img_filename] = []
                all_annotations[img_filename].extend(deepcopy(annotations))

    # Initialise dictionary for final average
    cscore_unit_values: dict[int, float] = {}

    # Initialise lookup table for GT names to indices
    gt_names_to_indices: dict[str, int] = {gt.name: idx for idx, gt in enumerate(result.gts)}

    # Order the lookup table by GT name to ensure consistent ordering
    gt_names_to_indices = dict(sorted(gt_names_to_indices.items(), key=lambda item: item[0]))

    # For each unit in the dataset...
    for unit in result.units:
        # Initialise matrix to store category counts per annotator
        unit_category_counts: np.ndarray = np.zeros((len(result.gts), len(result.gts[0].categories_dict.keys())), dtype=int)

        # ... find the biggest annotation in the unit...
        biggest_annotation: AnnotationProtocol = max(unit.annotations, key=lambda ann: ann.bbox_coords.area)

        # ... gather all annotations for the current image...
        all_image_annotations: list[AnnotationProtocol] = all_annotations[unit.img_filename]

        # ... find all annotations that are contained within the biggest annotation...
        contained_bboxes: list[AnnotationProtocol] = find_all_contained_bboxes(biggest_annotation, all_image_annotations, iom_thr=iom_thr)

        # and count all categories from the biggest bounding box and the contained ones.
        unit_category_counts[gt_names_to_indices[biggest_annotation.gt_name], biggest_annotation.category_id - 1] += 1
        for ann in contained_bboxes:
            unit_category_counts[gt_names_to_indices[ann.gt_name], ann.category_id - 1] += 1

        # Then, take each column of the matrix, find its minimum and sum, and save the raw agreement result in an array.
        normalised_raw_agreements: list[float] = []
        for category_idx in range(unit_category_counts.shape[1]):
            # Get the column relative to the selected category
            category_counts: np.ndarray = unit_category_counts[:, category_idx]

            # Filter out zeroes to avoid counting categories that no annotator selected
            filtered_category_counts: np.ndarray = category_counts[category_counts > 0]

            # If there are no counts for the category, skip it
            if filtered_category_counts.size == 0:
                continue

            category_min: int
            # If only a single annotator selected this category, this counts as a disagreement and we set the minimum to zero.
            if filtered_category_counts.size == 1:
                category_min = 0
            # Otherwise, we take the minimum count for the category across all annotators.
            else:
                category_min = int(np.min(filtered_category_counts))

            # Sum all the annotations created for the current category across all annotators.
            sum_count: int = np.sum(category_counts)

            # Compute the raw agreement value for the current category.
            raw_agreement: float = float(category_min) / float(sum_count) if sum_count > 0 else 0.0
            
            # Normalise it by the number of annotators which selected that category.
            normalised_raw_agreements.append(raw_agreement * filtered_category_counts.size)

        # Sanity check
        for normalised_raw_agreement in normalised_raw_agreements:
            assert 0.0 <= normalised_raw_agreement <= 1.0

        # Finally, average the normalised raw agreements across all categories to get the final agreement value for the unit.
        cscore_unit_values[unit.id] = sum(normalised_raw_agreements) / float(len(normalised_raw_agreements))

    # Now, average agreement values across all units to get the final C-Score for the dataset.
    c_score_value: float = sum(cscore_unit_values.values()) / float(len(cscore_unit_values))

    # Sanity check
    assert 0.0 <= c_score_value <= 1.0

    # Create CScore object and return it
    cscore = CScore(c_score=c_score_value, per_unit_c_scores=cscore_unit_values)
    return cscore