from typing import Optional
from jinja2 import Template
from loguru import logger
from feathr.definition.feathrconfig import HoconConvertible
[docs]class ObservationSettings(HoconConvertible):
"""Time settings of the observation data. Used in feature join.
Attributes:
observation_path: path to the observation dataset, i.e. input dataset to get with features
event_timestamp_column (Optional[str]): The timestamp field of your record. As sliding window aggregation feature assume each record in the source data should have a timestamp column.
timestamp_format (Optional[str], optional): The format of the timestamp field. Defaults to "epoch". Possible values are:
- `epoch` (seconds since epoch), for example `1647737463`
- `epoch_millis` (milliseconds since epoch), for example `1647737517761`
- Any date formats supported by [SimpleDateFormat](https://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html).
"""
def __init__(self,
observation_path: str,
event_timestamp_column: Optional[str] = None,
timestamp_format: str = "epoch") -> None:
self.event_timestamp_column = event_timestamp_column
self.timestamp_format = timestamp_format
self.observation_path = observation_path
if observation_path.startswith("http"):
logger.warning("Your observation_path {} starts with http, which is not supported. Consider using paths starting with wasb[s]/abfs[s]/s3.", observation_path)
[docs] def to_feature_config(self) -> str:
tm = Template("""
{% if setting.event_timestamp_column is not none %}
settings: {
joinTimeSettings: {
timestampColumn: {
def: "{{setting.event_timestamp_column}}"
format: "{{setting.timestamp_format}}"
}
}
}
{% endif %}
observationPath: "{{setting.observation_path}}"
""")
return tm.render(setting=self)