from dataclasses import dataclass
[docs]
@dataclass(slots=True, kw_only=True)
class Granularities:
"""
Class to hold Count and Size Granularity values for inter-annotator agreement analysis.
Attributes:
count (dict[str, float]): Dictionary mapping annotator names to their Count Granularity values.
count_benchmark (list[str]): List of annotator names that are benchmarks for Count Granularity.
count_median (float): Median Count Granularity value across all annotators (used to identify benchmarks).
size (dict[str, float]): Dictionary mapping annotator names to their Size Granularity values.
size_benchmark (list[str]): List of annotator names that are benchmarks for Size Granularity.
size_median (float): Median Size Granularity value across all annotators (used to identify benchmarks).
true_lone_units (int): The number of true lone units identified in the analysis.
"""
# Count granularity values
count: dict[str, float]
count_benchmark: list[str]
count_median: float
# Size granularity values
size: dict[str, float]
size_benchmark: list[str]
size_median: float
# Other values
singletons: int
[docs]
def brief(self) -> str:
"""
Returns a brief string representation of the granularities.
Returns:
str: A brief string representation of the granularities, including annotator names, benchmark indicators, and granularity values with deltas from benchmarks.
"""
s = "Granularities:\n"
s += "Ann\tBmrk\tCG\tSG\tdCG\tdSG\n"
for annotator in self.count.keys():
cg = self.count[annotator]
sg = self.size[annotator]
s += f"{annotator}\t"
if annotator in self.count_benchmark and annotator in self.size_benchmark:
s += f"(C+S)"
elif annotator in self.count_benchmark:
s += f"(C)"
elif annotator in self.size_benchmark:
s += f"(S)"
s += f"\t{cg:.4f}\t{sg:.4f}\t{cg - self.count[self.count_benchmark[0]]:.4f}\t{sg - self.size[self.size_benchmark[0]]:.4f}\n"
return s
def __str__(self) -> str:
s = "Granularities:\n"
s += "Ann\tCG\tSG\tdCG\tdSG\n"
for annotator in self.count.keys():
cg = self.count[annotator]
sg = self.size[annotator]
s += f"{annotator}\t{cg:.4f}\t{sg:.4f}\t{cg - self.count[self.count_benchmark[0]]:.4f}\t{sg - self.size[self.size_benchmark[0]]:.4f}\n"
s += "\nBenchmarks:\n"
s += "Count Granularity\t"
for benchmark in self.count_benchmark:
s += f"{benchmark}\t"
s += f"(Median: {self.count_median:.4f})\n"
s += "Size Granularity\t"
for benchmark in self.size_benchmark:
s += f"{benchmark}\t"
s += f"(Median: {self.size_median:.4f})\n"
s += f"\nSingletons: {self.singletons}\n"
return s