Source code for iaa_od.metrics.l_score

from iaa_od.metrics import alpha
from iaa_od.models import GroundTruthProtocol, KAlphaObservationsMatrix, LScore, Result
from iaa_od.models.constants import STD_IOU_THR
from typing import Optional
import numpy as np


[docs]
def l_score(gts: list[GroundTruthProtocol], /, *, iou_threshold: float = STD_IOU_THR, use_iom: bool = False, exclude_mismatched: bool = False, precomputed_result: Optional[Result] = None) -> LScore:
    """
    Computes the L-Score metric on a given dataset.
    The L-Score metric represents how much annotators agree on where there are objects,
    regardless of what labels they assigned to those objects.
    This is essentially Localization agreement, as opposed to Categorisation agreement.

    Parameters:
        gts (list[GroundTruthProtocol]): List of Ground Truths to compare.
        iou_threshold (float): The IoU threshold to use for matching. Defaults to STD_IOU_THR.
        use_iom (bool): Whether to use IoM matching. Defaults to False.
        exclude_mismatched (bool): Whether to exclude mismatched units from the L-Score calculation. Defaults to False.
        precomputed_result (Optional[Result]): An optional precomputed Result object from a previous Alpha computation to reuse its observations matrix. Defaults to None.

    Returns:
        LScore: The computed L-Score object.
    """
    # Define dictionary for %Agr_i values
    agr_i_values: dict[int, float] = {}

    iou_thr: float
    iom: bool
    observations: Optional[KAlphaObservationsMatrix] = None
    unfiltered_observations: Optional[KAlphaObservationsMatrix] = None
    n_units: int = -1

    # Check for precomputed KAplhaObservationsMatrix object in Result
    if precomputed_result:
        iou_thr = precomputed_result.iou_thr
        iom = precomputed_result.iom

        if precomputed_result.observations:
            observations = precomputed_result.observations

        if precomputed_result.unfiltered_observations:
             unfiltered_observations = precomputed_result.unfiltered_observations

        if precomputed_result.units:
            n_units = len(precomputed_result.units)
    else:
        iou_thr = iou_threshold
        iom = use_iom

    # If there is no precomputed KAlphaObservationsMatrix, compute Alpha on the dataset with the preset parameters
    if not unfiltered_observations or n_units == -1:
        result: Result = alpha(
            gts,
            iou_threshold=iou_thr,
            use_iom=iom
        )

        observations = result.observations
        unfiltered_observations = result.unfiltered_observations
        n_units = len(result.units)

    if n_units == -1:
        raise ValueError("Something very wrong happened: n_units is -1 despite computing Alpha.")
    if not unfiltered_observations:
        raise ValueError("Something very wrong happened: unfiltered_observations is None despite computing Alpha.")
    if not observations:
        raise ValueError("Something very wrong happened: observations is None despite computing Alpha.")

    # Collapse all categories into one
    unfiltered_obs_matrix: np.ndarray = unfiltered_observations.observations_matrix
    collapsed_observations: np.ndarray = np.sum(unfiltered_obs_matrix, axis=0, keepdims=True)
    
    # Compute %Agr_i for each rater
    n_raters: int = len(gts)
    count: int = 0

    for rater_idx in range(2, n_raters + 1):
        for col_idx in range(n_units):
            if collapsed_observations[0, col_idx] == rater_idx:
                count += 1

        agr_i: float = float(count) / float(n_units)
        agr_i_values[rater_idx] = agr_i

        count = 0

    # Compute L-Score
    if not observations.n_filtered_units:
        raise ValueError("Something very wrong happened: n_filtered_units is 0 despite computing Alpha.")

    mismatched_units: int = observations.n_filtered_units

    normalisation_value: float
    if exclude_mismatched:
        normalisation_value = n_raters / (n_raters - 1)
    else:
        normalisation_value = n_raters / (2 * n_raters - 1)

    sum: float = 0.0

    for rater_idx, agr_i_val in agr_i_values.items():
        sum += agr_i_val * float((rater_idx - 1)) / float(n_raters)

    mismatched_percentage: float = float(mismatched_units) / float(n_units)

    if exclude_mismatched:
        l_score_value: float = normalisation_value * sum
    else:
        l_score_value = normalisation_value * (sum - mismatched_percentage + 1.0)

    # Sanity check
    agr_i_sum: float = 0.0
    for agr_i_val in agr_i_values.values():
        agr_i_sum += agr_i_val

    assert agr_i_sum + mismatched_percentage == 1.0

    assert 0.0 <= l_score_value <= 1.0

    # Create LScore object and return it
    lscore: LScore = LScore(
        l_score=l_score_value,
        agr_i_values=agr_i_values,
        mismatched_percentage=mismatched_percentage,
        exclude_mismatched=exclude_mismatched
    )

    return lscore