# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%# Author: Markus Ritschel# eMail: git@markusritschel.de# Date: 2024-03-26# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#fromabcimportABC,abstractmethodfromfunctoolsimportwrapsimportloggingimportpandasaspdimportxarrayasxrlog=logging.getLogger(__name__)class_ColumnGroupBy:"""Proxy around a transposed DataFrameGroupBy that transposes results back, so column-wise groupby semantics are preserved without ``axis=1``."""def__init__(self,groupby):self._groupby=groupbydef__len__(self):returnlen(self._groupby)def__iter__(self):forkey,groupinself._groupby:yieldkey,group.Tdef__getattr__(self,name):attr=getattr(self._groupby,name)ifnotcallable(attr):returnattr@wraps(attr)defwrapper(*args,**kwargs):result=attr(*args,**kwargs)ifisinstance(result,pd.DataFrame):returnresult.Treturnresultreturnwrapper
[docs]classEnsembleAccessor(ABC):def__init__(self,data_obj)->None:super().__init__()self._obj=data_obj@propertydefkey_template(self):"""Return the key template"""template=self._obj.attrs.get('_ens_key_template')ifnottemplate:raiseKeyError("key_template not set. Make sure the attributes of the ""'member' coordinate comprise a 'key_template' value. ""You can set this via ds.ens.key_template = 'your.template'.")returntemplate@key_template.setterdefkey_template(self,template_string):if'.'notintemplate_string:raiseValueError("Elements must be divided by a dot (.).")if'member'intemplate_string.split('.'):raiseValueError("key_template must not contain 'member'! ""Please choose a different identifier.")self._obj.attrs['_ens_key_template']=template_string@propertydefmember_keys(self):returnself._obj.attrs.get('_ens_member_keys')@member_keys.setterdefmember_keys(self,value):self._obj.attrs['_ens_member_keys']=value@abstractmethoddef_init_member_keys(self):...def_set_member_keys(self,member_values):self._verify_member_keys(member_values)member_table=_build_member_mapping_table(member_values,self.key_template.split('.'))self.member_keys=member_tabledef_verify_member_keys(self,member_values):def_consistent_key_pattern():number_of_keys=[len(x.split('.'))forxinmember_values]returnlen(set(number_of_keys))==1ifnot_consistent_key_pattern():raiseValueError("Column keys must show the same pattern. ""Not all column names have the same number of keys.")iflen(set(member_values))!=len(member_values):log.warning("Member IDs are not all different!")
[docs]defgroupby(self,key):"""Group the object by a member key. Member keys are initialized beforehand."""self._init_member_keys()returnself._obj.groupby(self.member_keys[key])
def_build_member_mapping_table(member_values,member_id_elements):"""Create a mapping table for member keys. Each member name follows the same format, e.g. `source_id.member_id.grid_label`. The mapping represents a table with each key of `(source_id, member_id, grid_label)` as a column. The `member_values` could be an array of member values as follows: array(['ACCESS-CM2.r1i1p1f1.gn', 'ACCESS-CM2.r2i1p1f1.gn', 'ACCESS-CM2.r3i1p1f1.gn', ...]) And `member_id_elements` would be a list like ['source_id','member_id','grid_label']. """member_table=pd.Series(member_values).str.split('.',expand=True)member_table.index=member_valuesmember_table.index.name="member"member_table.columns=member_id_elementsreturnmember_table
[docs]@xr.register_dataset_accessor("ens")classXarrayEnsembleAccessor(EnsembleAccessor):"""An :class:`xarray.Dataset` accessor supporting the grouping of ensemble members by model id and similar. The `member` coordinate in the :class:`xr.Dataset` must have a `key_template` attribute of the form 'source_id.member_id.grid_label', following the structure of the entries of the 'member' coordinate. """def_init_member_keys(self):if'member'notinself._obj.coords:raiseAttributeError("No coordinate 'member' found in xarray object.")self._set_member_keys(self._obj.member.values)self.member_keys=self.member_keys.to_xarray()