Source code for feathr.definition.feature_derivations

from typing import List, Optional, Union, Dict

from jinja2 import Template

from feathr.definition.dtype import FeatureType
from feathr.definition.feature import FeatureBase
from feathr.definition.transformation import RowTransformation
from feathr.definition.typed_key import DUMMY_KEY, TypedKey


[docs]class DerivedFeature(FeatureBase): """A derived feature is a feature defined on top of other features, rather than external data source. Attributes: name: derived feature name feature_type: type of derived feature key: All features with corresponding keys that this derived feature depends on input_features: features that this derived features depends on transform: transformation that produces the derived feature value, based on the input_features registry_tags: A dict of (str, str) that you can pass to feature registry for better organization. For example, you can For example, you can use {"deprecated": "true"} to indicate this feature is deprecated, etc. """ def __init__(self, name: str, feature_type: FeatureType, input_features: Union[FeatureBase, List[FeatureBase]], transform: Union[str, RowTransformation], key: Optional[Union[TypedKey, List[TypedKey]]] = [DUMMY_KEY], registry_tags: Optional[Dict[str, str]] = None, ): super(DerivedFeature, self).__init__(name, feature_type, key=key, transform=transform, registry_tags=registry_tags) self.input_features = input_features if isinstance(input_features, List) else [input_features] self.validate_feature()
[docs] def validate_feature(self): """Validate the derived feature is valid""" # Validate key alias input_feature_key_alias = [] for feature in self.input_features: input_feature_key_alias.extend(feature.key_alias) for key_alias in self.key_alias: assert key_alias in input_feature_key_alias, "key alias {} in derived feature {} must come from " \ "its input features key alias list {}".format(key_alias, self.name, input_feature_key_alias)
[docs] def to_feature_config(self) -> str: tm = Template(""" {{derived_feature.name}}: { key: [{{','.join(derived_feature.key_alias)}}] inputs: { {% for feature in derived_feature.input_features %} {{feature.feature_alias}}: { key: [{{','.join(feature.key_alias)}}], feature: {{feature.name}} } {% endfor %} } definition: {{derived_feature.transform.to_feature_config(False)}} {{derived_feature.feature_type.to_feature_config()}} } """) return tm.render(derived_feature=self)