Source code for feathr.definition.anchor

from typing import List, Optional, Dict
from feathr.definition.feature import Feature
from feathr.definition.source import Source
from feathr.definition.typed_key import DUMMY_KEY
from jinja2 import Template
from feathr.definition.source import INPUT_CONTEXT
from feathr.definition.feathrconfig import HoconConvertible


[docs]class FeatureAnchor(HoconConvertible): """ A feature anchor defines a set of features on top of a data source, a.k.a. a set of features anchored to a source. The feature producer writes multiple anchors for a feature, exposing the same feature name for the feature consumer to reference it. Attributes: name: Unique name of the anchor. source: data source that the features are anchored to. Should be either of `INPUT_CONTEXT` or `feathr.source.Source` features: list of features defined within this anchor. registry_tags: A dict of (str, str) that you can pass to feature registry for better organization. For example, you can use {"deprecated": "true"} to indicate this anchor is deprecated, etc. """ def __init__(self, name: str, source: Source, features: List[Feature], registry_tags: Optional[Dict[str, str]] = None, ): self.name = name self.features = features self.source = source self.registry_tags=registry_tags self.validate_features()
[docs] def validate_features(self): """Validate that anchor is non-empty and all its features share the same key""" assert len(self.features) > 0 if self.source != INPUT_CONTEXT: for feature in self.features: if feature.key == [DUMMY_KEY]: raise RuntimeError(f"For anchors of non-INPUT_CONTEXT source, key of feature {feature.name} " f"should be explicitly specified and not left blank.")
[docs] def to_feature_config(self) -> str: tm = Template(""" {{anchor_name}}: { source: {{source.name}} key.sqlExpr: [{{key_list}}] features: { {% for feature in features %} {{feature.to_feature_config()}} {% endfor %} } } """) key_list = ','.join(key for key in self.features[0].key_alias) return tm.render(anchor_name = self.name, key_list = key_list, features = self.features, source = self.source)
def __str__(self): return self.to_feature_config()