Source code for feathr.feature

from abc import ABC, abstractmethod
from copy import deepcopy
from typing import List, Optional, Union, Dict

from jinja2 import Template

from feathr.dtype import FeatureType
from feathr.transformation import ExpressionTransformation, Transformation, WindowAggTransformation
from feathr.typed_key import DUMMY_KEY, TypedKey
from feathr.frameconfig import HoconConvertible

[docs]class FeatureBase(HoconConvertible): """The base class for features It has a feature name, feature type, and a convenient transformation used to produce its feature value. Attributes: name: Unique name of the feature. Only alphabet, numbers, and '_' are allowed in the name. It can not start with numbers. Note that '.' is NOT ALLOWED! feature_type: the feature value type. e.g. INT32, FLOAT, etc. feathr.dtype key: The key of this feature. e.g. user_id. transform: A transformation used to produce its feature value. e.g. amount * 10 registry_tags: A dict of (str, str) that you can pass to feature registry for better organization. For example, you can use {"deprecated": "true"} to indicate this feature is deprecated, etc. """ def __init__(self, name: str, feature_type: FeatureType, transform: Optional[Union[str, Transformation]] = None, key: Optional[Union[TypedKey, List[TypedKey]]] = [DUMMY_KEY], registry_tags: Optional[Dict[str, str]] = None, ): self.name = name self.feature_type = feature_type self.registry_tags=registry_tags self.key = key if isinstance(key, List) else [key] # feature_alias: Rename the derived feature to `feature_alias`. Default to feature name. self.feature_alias = name # If no transformation is specified, default to referencing the a field with the same name if transform is None: self.transform = ExpressionTransformation(name) elif isinstance(transform, str): self.transform = ExpressionTransformation(transform) else: self.transform = transform # An alias for the key in this feature. Default to its key column alias. Useful in derived features. self.key_alias = [k.key_column_alias for k in self.key]
[docs] def with_key(self, key_alias: Union[str, List[str]]): """Rename the feature key with the alias. This is useful in derived features that depends on the same feature with different keys.""" cleaned_key_alias = [key_alias] if isinstance(key_alias, str) else key_alias assert(len(cleaned_key_alias) == len(self.key)) new_key = [] for i in range(0, len(cleaned_key_alias)): typed_key = deepcopy(self.key[i]) typed_key.key_column_alias = cleaned_key_alias[i] new_key.append(typed_key) res = deepcopy(self) res.key = new_key res.key_alias = cleaned_key_alias return res
[docs] def as_feature(self, feature_alias): """Provide the feature a different alias, which can be used to reference the feature in transformation expression. This is useful in derived features that depends on the same feature with different keys.""" new_feature = deepcopy(self) new_feature.feature_alias = feature_alias return new_feature
[docs]class Feature(FeatureBase): """A feature is an individual measurable property or characteristic of an entity. It has a feature name, feature type, and a convenient row transformation used to produce its feature value. Attributes: name: Unique name of the feature. Only alphabet, numbers, and '_' are allowed in the name. It can not start with numbers. Note that '.' is NOT ALLOWED! feature_type: the feature value type. e.g. INT32, FLOAT, etc. Should be part of `feathr.dtype` key: The key of this feature. e.g. user_id. transform: A row transformation used to produce its feature value. e.g. amount * 10 registry_tags: A dict of (str, str) that you can pass to feature registry for better organization. For example, you can use {"deprecated": "true"} to indicate this feature is deprecated, etc. """ def __init__(self, name: str, feature_type: FeatureType, key: Optional[Union[TypedKey, List[TypedKey]]] = [DUMMY_KEY], transform: Optional[Union[str, Transformation]] = None, registry_tags: Optional[Dict[str, str]] = None, ): super(Feature, self).__init__(name, feature_type, transform, key, registry_tags)
[docs] def to_feature_config(self) -> str: tm = Template(""" {{feature.name}}: { {{feature.transform.to_feature_config()}} {{feature.feature_type.to_feature_config()}} } """) return tm.render(feature=self)