11from collections import Counter
22from collections .abc import Mapping , Sequence
3- from typing import TypeVar
3+ from typing import Literal , TypeVar
44
55import numpy as np
66import pandas as pd
@@ -38,7 +38,56 @@ def _maybe_coerce_to_boolean(df: T) -> T:
3838 return df
3939
4040
41- def _classify_attr_columns (names : Mapping [str , Sequence [str ]]) -> dict [str , list [dict [str , str ]]]:
41+ class MetadataColumn :
42+ __slots__ = ("prefix" , "derived_name" , "count" , "_allowed_prefixes" )
43+
44+ def __init__ (
45+ self ,
46+ * ,
47+ allowed_prefixes : Sequence [str ],
48+ prefix : str | None = None ,
49+ name : str | None = None ,
50+ count : int = 0 ,
51+ ):
52+ self ._allowed_prefixes = allowed_prefixes
53+ if prefix is None :
54+ self .name = name
55+ else :
56+ self .prefix = prefix
57+ self .derived_name = name
58+ self .count = count
59+
60+ @property
61+ def name (self ) -> str :
62+ if self .prefix is not None :
63+ return f"{ self .prefix } :{ self .derived_name } "
64+ else :
65+ return self .derived_name
66+
67+ @name .setter
68+ def name (self , new_name ):
69+ if (
70+ len (name_split := new_name .split (":" , 1 )) < 2
71+ or name_split [0 ] not in self ._allowed_prefixes
72+ ):
73+ self .prefix = None
74+ self .derived_name = new_name
75+ else :
76+ self .prefix , self .derived_name = name_split
77+
78+ @property
79+ def klass (self ) -> Literal ["common" , "unique" , "nonunique" , "unknown" ]:
80+ if self .prefix is None or self .count == len (self ._allowed_prefixes ):
81+ return "common"
82+ elif self .count == 1 :
83+ return "unique"
84+ elif self .count > 0 :
85+ return "nonunique"
86+ else :
87+ return "unknown"
88+
89+
90+ def _classify_attr_columns (names : Mapping [str , Sequence [str ]]) -> dict [str , list [MetadataColumn ]]:
4291 """
4392 Classify names into common, non-unique, and unique
4493 w.r.t. to the list of prefixes.
@@ -50,72 +99,21 @@ def _classify_attr_columns(names: Mapping[str, Sequence[str]]) -> dict[str, list
5099 - Unique columns are prefixed by modality names,
51100 and there is only one modality prefix
52101 for a column with a certain name.
53-
54- E.g. {"mod1": ["annotation", "unique"], "mod2": ["annotation"]} will be classified
55- into {"mod1": [{"name": "mod1:annotation", "derived_name": "annotation", "count": 2, "class": "nonunique"},
56- {"name": "mod1:unique", "derived_name": "unique", "count": 1, "class": "unique"}}],
57- "mod2": [{"name": "mod2:annotation", "derived_name": "annotation", "count": 2, "class": "nonunique"}],
58- }
59102 """
60- n_mod = len (names )
61- res : dict [str , list [dict [str , str ]]] = {}
103+ res : dict [str , list [MetadataColumn ]] = {}
62104
63105 derived_name_counts = Counter ()
64- for prefix , names in names .items ():
106+ for prefix , pnames in names .items ():
65107 cres = []
66- for name in names :
67- cres .append (
68- {
69- "name" : f"{ prefix } :{ name } " ,
70- "derived_name" : name ,
71- }
72- )
108+ for name in pnames :
109+ cres .append (MetadataColumn (allowed_prefixes = names .keys (), prefix = prefix , name = name ))
73110 derived_name_counts [name ] += 1
74111 res [prefix ] = cres
75112
76113 for prefix , names in res .items ():
77114 for name_res in names :
78- count = derived_name_counts [name_res ["derived_name" ]]
79- name_res ["count" ] = count
80- name_res ["class" ] = (
81- "common" if count == n_mod else "unique" if count == 1 else "nonunique"
82- )
83-
84- return res
85-
86-
87- def _classify_prefixed_columns (
88- names : Sequence [str ], prefixes : Sequence [str ]
89- ) -> Sequence [dict [str , str ]]:
90- """
91- Classify names into common and prefixed
92- w.r.t. to the list of prefixes.
93-
94- - Common columns do not have modality prefixes.
95- - Prefixed columns are prefixed by modality names.
96-
97- E.g. ["global", "mod1:annotation", "mod2:annotation", "mod1:unique"] will be classified
98- into [
99- {"name": "global", "prefix": "", "derived_name": "global", "class": "common"},
100- {"name": "mod1:annotation", "prefix": "mod1", "derived_name": "annotation", "class": "prefixed"},
101- {"name": "mod2:annotation", "prefix": "mod2", "derived_name": "annotation", "class": "prefixed"},
102- {"name": "mod1:unique", "prefix": "mod1", "derived_name": "annotation", "class": "prefixed"},
103- ]
104- """
105- res : list [dict [str , str ]] = []
106-
107- for name in names :
108- if len (name_split := name .split (":" , 1 )) < 2 or name_split [0 ] not in prefixes :
109- res .append ({"name" : name , "prefix" : "" , "derived_name" : name , "class" : "common" })
110- else :
111- res .append (
112- {
113- "name" : name ,
114- "prefix" : name_split [0 ],
115- "derived_name" : name_split [1 ],
116- "class" : "prefixed" ,
117- }
118- )
115+ count = derived_name_counts [name_res .derived_name ]
116+ name_res .count = count
119117
120118 return res
121119
0 commit comments