Synthetic Data Generation Tutorial¶
In [1]:
import json
from itertools import islice
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
In [2]:
from gluonts.dataset.artificial import recipe as rcp
from gluonts.core.serde import dump_code, load_code
In [3]:
# plotting utils
def plot_recipe(recipe, length):
output_dict = rcp.evaluate(recipe, length)
K = len(output_dict)
lct = MultipleLocator(288)
minor = AutoMinorLocator(12)
fig, axs = plt.subplots(K, 1, figsize=(16, 2 * len(recipe)))
for i, k in enumerate(output_dict):
axs[i].xaxis.set_major_locator(lct)
axs[i].xaxis.set_minor_locator(minor)
axs[i].plot(output_dict[k])
axs[i].grid()
axs[i].set_ylabel(k)
def plot_examples(recipe, target, length, num, anomaly_indicator=None):
fix, axs = plt.subplots(num, 1, figsize=(16, num * 2))
for i in range(num):
xx = rcp.evaluate(recipe, length)
axs[i].plot(xx[target])
axs[i].set_ylim(0, 1.1*np.max(xx[target]))
axs[i].grid()
if anomaly_indicator is not None:
axs[i].fill_between(
np.arange(len(xx[target])),
xx[anomaly_indicator] * 1.1*np.max(xx[target]),
np.zeros(len(xx[target])),
alpha=0.3,
color="red")
def print_dicts(*dicts):
for d in dicts:
print("{")
for k,v in d.items():
print("\t", k, ": ", v)
print("}\n")
Recipes¶
Recipes are lists of (name, expression)
tuples. The role of a recipe
is to describe the generative process of a single time series. In order
to do so, the expression
s in the (name, expression)
pairs are
evaluated for each time series in the order given in the list to produce
a {name: value}
dictionary as output.
In [4]:
recipe = [
("myOutput1", 1.0),
("myOutput2", 42)
]
rcp.evaluate(recipe, length=5)
Out[4]:
{'myOutput1': 1.0, 'myOutput2': 42}
Expressions¶
Each expression
can either be a Python value, a string (interpreted
as a reference to previously defined name
), or a special type of
Callable
, that is evaluated each time the recipe is evaluated.
In [5]:
recipe = [
("myOutput1", 1.0),
("myOutput2", "myOutput1") # reference to previously defined name
]
rcp.evaluate(recipe, length=5)
Out[5]:
{'myOutput1': 1.0, 'myOutput2': 1.0}
In [6]:
recipe = [
("myOutput1", rcp.RandomGaussian()), # callable as expression
]
# multiple evaluations lead to different results, due to randomness
print_dicts(
rcp.evaluate(recipe, length=5),
rcp.evaluate(recipe, length=5),
)
{
myOutput1 : [-1.69674452 -0.42197157 1.38592954 -1.13811894 -1.06239691]
}
{
myOutput1 : [-0.4850381 -0.65920443 0.68951118 1.36561071 -0.71162324]
}
Expressions with References¶
In [7]:
recipe = [
("stddev1", 2.0),
("stddev2", rcp.RandomUniform(low=0, high=1, shape=(1, ))),
("myOutput1", rcp.RandomGaussian(stddev="stddev1")),
("myOutput2", rcp.RandomGaussian(stddev="stddev2"))
]
# multiple evaluations lead to different results, due to randomness
print_dicts(
rcp.evaluate(recipe, length=5),
rcp.evaluate(recipe, length=5)
)
{
stddev1 : 2.0
stddev2 : [0.38008483]
myOutput1 : [-0.17208844 3.87596545 -0.3283231 -2.51976359 3.72707279]
myOutput2 : [-0.48293124 0.20628998 0.04645834 -0.59160444 -0.11661102]
}
{
stddev1 : 2.0
stddev2 : [0.33249927]
myOutput1 : [-0.45888149 -2.41575306 -2.32953018 0.61609625 -1.48939901]
myOutput2 : [-0.33175128 0.28107296 0.32919377 0.23818991 -0.15756851]
}
In [8]:
recipe = [
("random_out", rcp.RandomGaussian(shape=(1,))),
("fixed_out", np.random.randn(1))
]
# note that fixed_out stays the same;
# it's evaluated only once when the recipe is created
print_dicts(
rcp.evaluate(recipe, length=1),
rcp.evaluate(recipe, length=1)
)
{
random_out : [0.63621815]
fixed_out : [0.09259595]
}
{
random_out : [0.98416592]
fixed_out : [0.09259595]
}
Length¶
Most operators in the recipe
package have a length
argument that
is automatically passed when the expression is evaluated. The idea is
that these recipes are used to generate fixed-length time series, and
most operators produce individual components of the time series that
have the same length.
In [9]:
recipe = [
("random_gaussian", rcp.RandomGaussian()),
("constant_vec", rcp.ConstantVec(42))
]
print_dicts(
rcp.evaluate(recipe, length=3),
rcp.evaluate(recipe, length=5)
)
{
random_gaussian : [ 0.80892307 -0.48501767 -0.09222491]
constant_vec : [42. 42. 42.]
}
{
random_gaussian : [-0.91181503 -0.76814061 -1.14260113 -0.64974641 0.57390245]
constant_vec : [42. 42. 42. 42. 42.]
}
Operator Overloading¶
The Callable
operators defined in the recipe
package overload
the basic arithmetic operations (addition, subtraction, multiplication,
division).
In [10]:
recipe = [
("x1", 42 * rcp.ConstantVec(1)),
("x2", "x1" * rcp.RandomUniform()),
("x3", rcp.RandomGaussian() + rcp.RandomUniform()),
("x4", rcp.Ref("x1") + "x2" + "x3")
]
rcp.evaluate(recipe, 3)
Out[10]:
{'x1': array([42., 42., 42.]),
'x2': array([40.85152138, 31.15185218, 41.71600366]),
'x3': array([ 1.74629618, -0.36399104, 0.23023716]),
'x4': array([84.59781756, 72.78786114, 83.94624082])}
SerDe¶
Recipes composed of serializable / representable components can easily be serialized / deserialized.
In [11]:
dumped = dump_code(recipe)
print(dumped)
reconstructed = load_code(dumped)
rcp.evaluate(reconstructed, 3)
[["x1", gluonts.dataset.artificial.recipe.LiftedMul(left=42, right=gluonts.dataset.artificial.recipe.ConstantVec(constant=1))], ["x2", gluonts.dataset.artificial.recipe.LiftedMul(left="x1", right=gluonts.dataset.artificial.recipe.RandomUniform(high=1.0, low=0.0, shape=[0]))], ["x3", gluonts.dataset.artificial.recipe.LiftedAdd(left=gluonts.dataset.artificial.recipe.RandomGaussian(shape=[0], stddev=1.0), right=gluonts.dataset.artificial.recipe.RandomUniform(high=1.0, low=0.0, shape=[0]))], ["x4", gluonts.dataset.artificial.recipe.LiftedAdd(left=gluonts.dataset.artificial.recipe.LiftedAdd(left=gluonts.dataset.artificial.recipe.Ref(field_name="x1"), right="x2"), right="x3")]]
Out[11]:
{'x1': array([42., 42., 42.]),
'x2': array([18.90442409, 23.75295266, 14.56077472]),
'x3': array([ 0.10218595, -1.04778157, 3.05428979]),
'x4': array([61.00661004, 64.70517109, 59.61506451])}
Simple Examples¶
In [12]:
recipe = [
("daily_smooth_seasonality", rcp.SmoothSeasonality(period=288, phase=-72)),
("noise", rcp.RandomGaussian(stddev=0.1)),
("signal", rcp.Add(["daily_smooth_seasonality", "noise"]))
]
plot_recipe(recipe, 3 * 288)

In [13]:
recipe = [
("slope", rcp.RandomUniform(low=0, high=3, shape=(1,))),
("trend", rcp.LinearTrend(slope="slope")),
("daily_smooth_seasonality", rcp.SmoothSeasonality(period=288, phase=-72)),
("noise", rcp.RandomGaussian(stddev=0.1)),
("signal", rcp.Add(["trend", "daily_smooth_seasonality", "noise"]))
]
plot_examples(recipe, "signal", 3 * 288, 5)

In [14]:
weekly_seasonal_unscaled = [
('daily_smooth_seasonality', rcp.SmoothSeasonality(period=288, phase=-72)),
('weekday_scale', rcp.RandomUniform(0.1, 10, shape=(1,))),
('weekly_pattern', rcp.NormalizeMax(rcp.Concatenate([rcp.Ref("weekday_scale") * np.ones(5), np.ones(2)]))),
('day_of_week', rcp.Dilated(rcp.Repeated('weekly_pattern'), 288)),
('level', rcp.RandomUniform(low=0, high=10, shape=1)),
('noise_level', rcp.RandomUniform(low=0.01, high=1, shape=1)),
('noise', 'noise_level' * rcp.RandomGaussian()),
('signal', rcp.Mul(['daily_smooth_seasonality','day_of_week'])),
('unscaled', rcp.Add(['level', 'signal', 'noise']))
]
plot_recipe(weekly_seasonal_unscaled, 10 * 288)
plot_examples(weekly_seasonal_unscaled, "unscaled", 10 * 288, 5)


Composing Recipes¶
As recipes are just lists of expressions that evaluated sequentially,
recipes can simply be composed from smaller component recipes by
concatenating the corresponding lists. It is also possible to include
the output of one recipe inside another one using the EvalRecipe
operator.
In [15]:
scaling = [
("scale", rcp.RandomUniform(0, 1000)),
("z", "scale" * rcp.Ref("unscaled"))
]
weekly_seasonal = weekly_seasonal_unscaled + scaling
plot_examples(weekly_seasonal, "z", 10 * 288, 5)

In [16]:
weekly_seasonality = [
('daily_pattern', rcp.RandomUniform(0, 1, shape=(24,))),
('daily_seasonality', rcp.Dilated(rcp.Repeated("daily_pattern"), 12)),
('weekly_pattern', rcp.RandomUniform(0, 1, shape=(7,))),
('weekly_seasonality', rcp.Dilated(rcp.Repeated("weekly_pattern"), 288)),
('unnormalized_seasonality', rcp.Mul(['daily_seasonality', 'weekly_seasonality'])),
('seasonality', rcp.NormalizeMax("unnormalized_seasonality")),
]
gaussian_noise_low = [
('noise_level', rcp.RandomUniform(low=0.01, high=0.1, shape=1)),
('noise', rcp.Ref('noise_level') * rcp.RandomGaussian()),
]
complex_weekly_seasonal = (
weekly_seasonality
+ [
('level', rcp.RandomUniform(low=0, high=10, shape=1)),
('signal', rcp.Add(['level', 'seasonality']))
]
+ gaussian_noise_low
+ [("unscaled", rcp.Add(["signal", "noise"]))]
+ scaling
)
plot_examples(complex_weekly_seasonal, "z", 10 * 288, 5)

Generating Anomalies¶
Anomalies are just another effect added/multiplied to a base time series. We can define a recipe for creating certain types of anomalies, and then compose it with a base recipe.
In [17]:
constant_recipe = [
("z", rcp.ConstantVec(1.0))
]
bmc_scale_anomalies = [
('normal_indicator', rcp.BinaryMarkovChain(one_to_zero=1/(288*7), zero_to_one=0.1)),
('anomaly_indicator', rcp.OneMinus('normal_indicator')),
('anomaly_scale', 0.5 + rcp.RandomUniform(-1.0, 1.0, shape=1)),
('anomaly_multiplier', 1 + rcp.Ref('anomaly_scale') * rcp.Ref('anomaly_indicator')),
('target', rcp.Mul(['z', 'anomaly_multiplier']))
]
plot_examples(constant_recipe + bmc_scale_anomalies, "target", 10*288, 5, "anomaly_indicator")

In [18]:
plot_examples(weekly_seasonal + bmc_scale_anomalies, 'target', 288*7, 5, "anomaly_indicator")

Generating Changepoints¶
In [19]:
homoskedastic_gaussian_noise = [
('level', rcp.RandomUniform(0, 10, shape=1)),
('noise_level', rcp.RandomUniform(0.01, 1, shape=1)),
('noise', rcp.RandomGaussian("noise_level")),
('unscaled', rcp.Add(['level', 'noise'])),
]
In [20]:
changepoint_noise_to_seasonal = [
('z_1', rcp.EvalRecipe(homoskedastic_gaussian_noise, "unscaled")),
('z_2', rcp.EvalRecipe(weekly_seasonal_unscaled, "unscaled")),
('z_stacked', rcp.StackPrefix('z')),
('change', rcp.RandomChangepoints(1)),
('unscaled', rcp.Choose("z_stacked", "change"))
]
changepoint_noise_to_seasonal_scaled = changepoint_noise_to_seasonal + scaling
In [21]:
plot_examples(changepoint_noise_to_seasonal_scaled + bmc_scale_anomalies, 'target', 288*7, 10, "anomaly_indicator")

Generating several time series¶
In [22]:
rcp.take_as_list(rcp.generate(10, weekly_seasonal_unscaled, "2018-01-01", {}), 2)
Out[22]:
[{'daily_smooth_seasonality': array([0. , 0.00011899, 0.00047589, 0.00107054, 0.00190265,
0.00297183, 0.00427757, 0.00581924, 0.00759612, 0.00960736]),
'weekday_scale': array([5.53325369]),
'weekly_pattern': array([1. , 1. , 1. , 1. , 1. ,
0.18072549, 0.18072549]),
'day_of_week': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
'level': array([7.15189366]),
'noise_level': array([0.60673574]),
'noise': array([-1.37627579, 0.80910965, -0.5113108 , 1.19522357, 0.76819937,
-0.30693338, 1.54426428, 0.65576722, 0.29384949, 0.35138523]),
'signal': array([0. , 0.00011899, 0.00047589, 0.00107054, 0.00190265,
0.00297183, 0.00427757, 0.00581924, 0.00759612, 0.00960736]),
'unscaled': array([5.77561787, 7.9611223 , 6.64105875, 8.34818777, 7.92199568,
6.84793212, 8.70043552, 7.81348013, 7.45333928, 7.51288625]),
'item_id': '0',
'start': '2018-01-01'},
{'daily_smooth_seasonality': array([0. , 0.00011899, 0.00047589, 0.00107054, 0.00190265,
0.00297183, 0.00427757, 0.00581924, 0.00759612, 0.00960736]),
'weekday_scale': array([8.71312027]),
'weekly_pattern': array([1. , 1. , 1. , 1. , 1. ,
0.11476945, 0.11476945]),
'day_of_week': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
'level': array([9.78618342]),
'noise_level': array([0.80116698]),
'noise': array([ 1.19700682, -0.16436603, 0.2508195 , -0.6842733 , -2.04537114,
0.52365764, 0.69255774, -0.59459811, 1.81845245, -1.16518975]),
'signal': array([0. , 0.00011899, 0.00047589, 0.00107054, 0.00190265,
0.00297183, 0.00427757, 0.00581924, 0.00759612, 0.00960736]),
'unscaled': array([10.98319024, 9.62193638, 10.03747882, 9.10298066, 7.74271494,
10.31281289, 10.48301873, 9.19740456, 11.612232 , 8.63060103]),
'item_id': '1',
'start': '2018-01-01'}]
Saving to a file¶
In [23]:
def write_to_file(recipe, length, num_ts, fields, fn):
with open(fn, 'w') as f, open(fn+"-all", 'w') as g:
for x in islice(rcp.generate(length, recipe, "2019-01-07 00:00"), num_ts):
z = {}
for k in x:
if type(x[k]) == np.ndarray:
z[k] = x[k].tolist()
else:
z[k] = x[k]
xx = {}
for fi in fields:
xx[fi] = z[fi]
try:
f.write(json.dumps(xx))
except Exception as e:
print(xx)
print(z)
raise e
f.write('\n')
g.write(json.dumps(z))
g.write('\n')