Source code for gluonts.transform.feature

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

from typing import List

import numpy as np
import pandas as pd

from gluonts.core.component import validated, DType
from gluonts.dataset.common import DataEntry
from gluonts.time_feature import TimeFeature

from ._base import SimpleTransformation, MapTransformation
from .split import shift_timestamp


[docs]def target_transformation_length( target: np.array, pred_length: int, is_train: bool ) -> int: return target.shape[-1] + (0 if is_train else pred_length)
[docs]class AddObservedValuesIndicator(SimpleTransformation): """ Replaces missing values in a numpy array (NaNs) with a dummy value and adds an "observed"-indicator that is ``1`` when values are observed and ``0`` when values are missing. Parameters ---------- target_field Field for which missing values will be replaced output_field Field name to use for the indicator dummy_value Value to use for replacing missing values. convert_nans If set to true (default) missing values will be replaced. Otherwise they will not be replaced. In any case the indicator is included in the result. """ @validated() def __init__( self, target_field: str, output_field: str, dummy_value: float = 0.0, convert_nans: bool = True, dtype: DType = np.float32, ) -> None: self.dummy_value = dummy_value self.target_field = target_field self.output_field = output_field self.convert_nans = convert_nans self.dtype = dtype
[docs] def transform(self, data: DataEntry) -> DataEntry: value = data[self.target_field] nan_indices = np.where(np.isnan(value)) nan_entries = np.isnan(value) if self.convert_nans: value[nan_indices] = self.dummy_value data[self.target_field] = value # Invert bool array so that missing values are zeros and store as float data[self.output_field] = np.invert(nan_entries).astype(self.dtype) return data
[docs]class AddConstFeature(MapTransformation): """ Expands a `const` value along the time axis as a dynamic feature, where the T-dimension is defined as the sum of the `pred_length` parameter and the length of a time series specified by the `target_field`. If `is_train=True` the feature matrix has the same length as the `target` field. If `is_train=False` the feature matrix has length len(target) + pred_length Parameters ---------- output_field Field name for output. target_field Field containing the target array. The length of this array will be used. pred_length Prediction length (this is necessary since features have to be available in the future) const Constant value to use. dtype Numpy dtype to use for resulting array. """ @validated() def __init__( self, output_field: str, target_field: str, pred_length: int, const: float = 1.0, dtype: DType = np.float32, ) -> None: self.pred_length = pred_length self.const = const self.dtype = dtype self.output_field = output_field self.target_field = target_field
[docs] def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: length = target_transformation_length( data[self.target_field], self.pred_length, is_train=is_train ) data[self.output_field] = self.const * np.ones( shape=(1, length), dtype=self.dtype ) return data
[docs]class AddTimeFeatures(MapTransformation): """ Adds a set of time features. If `is_train=True` the feature matrix has the same length as the `target` field. If `is_train=False` the feature matrix has length len(target) + pred_length Parameters ---------- start_field Field with the start time stamp of the time series target_field Field with the array containing the time series values output_field Field name for result. time_features list of time features to use. pred_length Prediction length """ @validated() def __init__( self, start_field: str, target_field: str, output_field: str, time_features: List[TimeFeature], pred_length: int, ) -> None: self.date_features = time_features self.pred_length = pred_length self.start_field = start_field self.target_field = target_field self.output_field = output_field self._min_time_point: pd.Timestamp = None self._max_time_point: pd.Timestamp = None self._full_range_date_features: np.ndarray = None self._date_index: pd.DatetimeIndex = None def _update_cache(self, start: pd.Timestamp, length: int) -> None: end = shift_timestamp(start, length) if self._min_time_point is not None: if self._min_time_point <= start and end <= self._max_time_point: return if self._min_time_point is None: self._min_time_point = start self._max_time_point = end self._min_time_point = min( shift_timestamp(start, -50), self._min_time_point ) self._max_time_point = max( shift_timestamp(end, 50), self._max_time_point ) self.full_date_range = pd.date_range( self._min_time_point, self._max_time_point, freq=start.freq ) self._full_range_date_features = ( np.vstack( [feat(self.full_date_range) for feat in self.date_features] ) if self.date_features else None ) self._date_index = pd.Series( index=self.full_date_range, data=np.arange(len(self.full_date_range)), )
[docs] def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: start = data[self.start_field] length = target_transformation_length( data[self.target_field], self.pred_length, is_train=is_train ) self._update_cache(start, length) i0 = self._date_index[start] features = ( self._full_range_date_features[..., i0 : i0 + length] if self.date_features else None ) data[self.output_field] = features return data
[docs]class AddAgeFeature(MapTransformation): """ Adds an 'age' feature to the data_entry. The age feature starts with a small value at the start of the time series and grows over time. If `is_train=True` the age feature has the same length as the `target` field. If `is_train=False` the age feature has length len(target) + pred_length Parameters ---------- target_field Field with target values (array) of time series output_field Field name to use for the output. pred_length Prediction length log_scale If set to true the age feature grows logarithmically otherwise linearly over time. """ @validated() def __init__( self, target_field: str, output_field: str, pred_length: int, log_scale: bool = True, dtype: DType = np.float32, ) -> None: self.pred_length = pred_length self.target_field = target_field self.feature_name = output_field self.log_scale = log_scale self._age_feature = np.zeros(0) self.dtype = dtype
[docs] def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: length = target_transformation_length( data[self.target_field], self.pred_length, is_train=is_train ) if self.log_scale: age = np.log10(2.0 + np.arange(length, dtype=self.dtype)) else: age = np.arange(length, dtype=self.dtype) data[self.feature_name] = age.reshape((1, length)) return data