1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Auto-Regressive models for time series data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib import distributions
from tensorflow.contrib.timeseries.python.timeseries import model
from tensorflow.contrib.timeseries.python.timeseries import model_utils
from tensorflow.contrib.timeseries.python.timeseries.feature_keys import PredictionFeatures
from tensorflow.contrib.timeseries.python.timeseries.feature_keys import TrainEvalFeatures
from tensorflow.python.estimator import estimator_lib
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.ops import variable_scope
class ARModel(model.TimeSeriesModel):
"""Auto-regressive model, both linear and non-linear.
Features to the model include time and values of input_window_size timesteps,
and times for output_window_size timesteps. These are passed through zero or
more hidden layers, and then fed to a loss function (e.g. squared loss).
Note that this class can also be used to regress against time only by setting
the input_window_size to zero.
"""
SQUARED_LOSS = "squared_loss"
NORMAL_LIKELIHOOD_LOSS = "normal_likelihood_loss"
def __init__(self,
periodicities,
input_window_size,
output_window_size,
num_features,
num_time_buckets=10,
loss=NORMAL_LIKELIHOOD_LOSS,
hidden_layer_sizes=None):
"""Constructs an auto-regressive model.
Args:
periodicities: periodicities of the input data, in the same units as the
time feature. Note this can be a single value or a list of values for
multiple periodicities.
input_window_size: Number of past time steps of data to look at when doing
the regression.
output_window_size: Number of future time steps to predict. Note that
setting it to > 1 empiricaly seems to give a better fit.
num_features: number of input features per time step.
num_time_buckets: Number of buckets into which to divide (time %
periodicity) for generating time based features.
loss: Loss function to use for training. Currently supported values are
SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for
NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For
SQUARED_LOSS, the evaluation loss is reported based on un-scaled
observations and predictions, while the training loss is computed on
normalized data (if input statistics are available).
hidden_layer_sizes: list of sizes of hidden layers.
"""
self.input_window_size = input_window_size
self.output_window_size = output_window_size
if hidden_layer_sizes is None:
hidden_layer_sizes = []
self.hidden_layer_sizes = hidden_layer_sizes
self.window_size = self.input_window_size + self.output_window_size
self.loss = loss
super(ARModel, self).__init__(
num_features=num_features)
assert num_time_buckets > 0
self._buckets = int(num_time_buckets)
if periodicities is None or not periodicities:
periodicities = []
elif (not isinstance(periodicities, list) and
not isinstance(periodicities, tuple)):
periodicities = [periodicities]
self._periods = [int(p) for p in periodicities]
for p in self._periods:
assert p > 0
assert len(self._periods) or self.input_window_size
assert output_window_size > 0
def get_start_state(self):
# State which matches the format we'll return later. Typically this will not
# be used by the model directly, but the shapes and dtypes should match so
# that the serving input_receiver_fn gets placeholder shapes correct.
return (array_ops.zeros([self.input_window_size], dtype=dtypes.int64),
array_ops.zeros(
[self.input_window_size, self.num_features], dtype=self.dtype))
# TODO(allenl,agarwal): Support sampling for AR.
def random_model_parameters(self, seed=None):
pass
def generate(self, number_of_series, series_length,
model_parameters=None, seed=None):
pass
def _predicted_covariance_op(self, activations, num_values):
activation, activation_size = activations[-1]
if self.loss == ARModel.NORMAL_LIKELIHOOD_LOSS:
log_sigma_square = model_utils.fully_connected(
activation,
activation_size,
self.output_window_size * num_values,
name="log_sigma_square",
activation=None)
predicted_covariance = gen_math_ops.exp(log_sigma_square)
predicted_covariance = array_ops.reshape(
predicted_covariance, [-1, self.output_window_size, num_values])
else:
shape = array_ops.stack([
array_ops.shape(activation)[0],
constant_op.constant(self.output_window_size),
constant_op.constant(num_values)
])
predicted_covariance = array_ops.ones(shape=shape, dtype=activation.dtype)
return predicted_covariance
def _predicted_mean_op(self, activations):
activation, activation_size = activations[-1]
predicted_mean = model_utils.fully_connected(
activation,
activation_size,
self.output_window_size * self.num_features,
name="predicted_mean",
activation=None)
return array_ops.reshape(predicted_mean,
[-1, self.output_window_size, self.num_features])
def _create_hidden_stack(self, activation, activation_size):
activations = []
for layer_number, layer_size in enumerate(self.hidden_layer_sizes):
# TODO(agarwal): Migrate to fully_connected in tf slim
activation = model_utils.fully_connected(
activation, activation_size, layer_size,
name="layer_{}".format(layer_number))
activation_size = layer_size
activations.append((activation, activation_size))
return activations
def prediction_ops(self, times, values):
"""Compute model predictions given input data.
Args:
times: A [batch size, self.window_size] integer Tensor, the first
self.input_window_size times in each part of the batch indicating
input features, and the last self.output_window_size times indicating
prediction times.
values: A [batch size, self.input_window_size, self.num_features] Tensor
with input features.
Returns:
Tuple (predicted_mean, predicted_covariance), where each element is a
Tensor with shape [batch size, self.output_window_size,
self.num_features].
"""
times.get_shape().assert_is_compatible_with([None, self.window_size])
activations = []
if self.input_window_size:
values.get_shape().assert_is_compatible_with(
[None, self.input_window_size, self.num_features])
# Create input features.
if self._periods:
_, time_features = self._compute_time_features(times)
activation_size = self.window_size * self._buckets * len(self._periods)
activation = array_ops.reshape(time_features, [-1, activation_size])
else:
activation_size = 0
activation = None
if self.input_window_size:
inp = array_ops.slice(values, [0, 0, 0], [-1, self.input_window_size, -1])
inp_size = self.input_window_size * self.num_features
inp = array_ops.reshape(inp, [-1, inp_size])
if activation is not None:
activation = array_ops.concat([inp, activation], 1)
else:
activation = inp
activation_size += inp_size
assert activation_size
activations.append((activation, activation_size))
# Create hidden layers.
activations += self._create_hidden_stack(activation, activation_size)
# Create mean and convariance ops.
predicted_mean = self._predicted_mean_op(activations)
predicted_covariance = self._predicted_covariance_op(activations,
self.num_features)
return {"activations": activations,
"mean": predicted_mean,
"covariance": predicted_covariance}
def loss_op(self, targets, prediction_ops):
"""Create loss_op."""
prediction = prediction_ops["mean"]
if self.loss == ARModel.NORMAL_LIKELIHOOD_LOSS:
covariance = prediction_ops["covariance"]
sigma = math_ops.sqrt(gen_math_ops.maximum(covariance, 1e-5))
normal = distributions.Normal(loc=targets, scale=sigma)
loss_op = -math_ops.reduce_sum(normal.log_prob(prediction))
else:
assert self.loss == ARModel.SQUARED_LOSS, self.loss
loss_op = math_ops.reduce_sum(math_ops.square(prediction - targets))
loss_op /= math_ops.cast(
math_ops.reduce_prod(array_ops.shape(targets)), loss_op.dtype)
return loss_op
# TODO(allenl, agarwal): Consider better ways of warm-starting predictions.
def predict(self, features):
"""Computes predictions multiple steps into the future.
Args:
features: A dictionary with the following key/value pairs:
PredictionFeatures.TIMES: A [batch size, predict window size]
integer Tensor of times, after the window of data indicated by
`STATE_TUPLE`, to make predictions for.
PredictionFeatures.STATE_TUPLE: A tuple of (times, values), times with
shape [batch size, self.input_window_size], values with shape [batch
size, self.input_window_size, self.num_features] representing a
segment of the time series before `TIMES`. This data is used
to start of the autoregressive computation. This should have data for
at least self.input_window_size timesteps.
Returns:
A dictionary with keys, "mean", "covariance". The
values are Tensors of shape [batch_size, predict window size,
num_features] and correspond to the values passed in `TIMES`.
"""
predict_times = math_ops.cast(
ops.convert_to_tensor(features[PredictionFeatures.TIMES]), dtypes.int32)
batch_size = array_ops.shape(predict_times)[0]
num_predict_values = array_ops.shape(predict_times)[1]
prediction_iterations = ((num_predict_values + self.output_window_size - 1)
// self.output_window_size)
# Pad predict_times so as to have exact multiple of self.output_window_size
# values per example.
padding_size = (prediction_iterations * self.output_window_size -
num_predict_values)
padding = array_ops.zeros([batch_size, padding_size], predict_times.dtype)
predict_times = control_flow_ops.cond(
padding_size > 0, lambda: array_ops.concat([predict_times, padding], 1),
lambda: predict_times)
state = features[PredictionFeatures.STATE_TUPLE]
(state_times, state_values) = state
state_times = math_ops.cast(
ops.convert_to_tensor(state_times), dtypes.int32)
state_values = ops.convert_to_tensor(state_values, dtype=self.dtype)
initial_input_times = predict_times[:, :self.output_window_size]
if self.input_window_size > 0:
initial_input_times = array_ops.concat(
[state_times[:, -self.input_window_size:], initial_input_times], 1)
values_size = array_ops.shape(state_values)[1]
times_size = array_ops.shape(state_times)[1]
with ops.control_dependencies([
check_ops.assert_greater_equal(values_size, self.input_window_size),
check_ops.assert_equal(values_size, times_size)
]):
initial_input_values = state_values[:, -self.input_window_size:, :]
else:
initial_input_values = 0
# Iterate over the predict_times, predicting self.output_window_size values
# in each iteration.
def _while_condition(iteration_number, *unused_args):
return math_ops.less(iteration_number, prediction_iterations)
def _while_body(iteration_number, input_times, input_values,
mean_ta, covariance_ta):
"""Predict self.output_window_size values."""
prediction_ops = self.prediction_ops(input_times, input_values)
predicted_mean = prediction_ops["mean"]
predicted_covariance = prediction_ops["covariance"]
offset = self.output_window_size * gen_math_ops.minimum(
iteration_number + 1, prediction_iterations - 1)
if self.input_window_size > 0:
if self.output_window_size < self.input_window_size:
new_input_values = array_ops.concat(
[input_values[:, self.output_window_size:, :], predicted_mean], 1)
new_input_times = array_ops.concat([
input_times[:, self.output_window_size:],
predict_times[:, offset:offset + self.output_window_size]
], 1)
else:
new_input_values = predicted_mean[:, -self.input_window_size:, :]
new_input_times = predict_times[
:,
offset - self.input_window_size:offset + self.output_window_size]
else:
new_input_values = input_values
new_input_times = predict_times[:,
offset:offset + self.output_window_size]
new_input_times.set_shape(initial_input_times.get_shape())
new_mean_ta = mean_ta.write(iteration_number, predicted_mean)
if isinstance(covariance_ta, tensor_array_ops.TensorArray):
new_covariance_ta = covariance_ta.write(iteration_number,
predicted_covariance)
else:
new_covariance_ta = covariance_ta
return (iteration_number + 1,
new_input_times,
new_input_values,
new_mean_ta,
new_covariance_ta)
# Note that control_flow_ops.while_loop doesn't seem happy with None. Hence
# using 0 for cases where we don't want to predict covariance.
covariance_ta_init = (tensor_array_ops.TensorArray(
dtype=self.dtype, size=prediction_iterations)
if self.loss != ARModel.SQUARED_LOSS else 0.)
mean_ta_init = tensor_array_ops.TensorArray(
dtype=self.dtype, size=prediction_iterations)
_, _, _, mean_ta, covariance_ta = control_flow_ops.while_loop(
_while_condition, _while_body, [
0, initial_input_times, initial_input_values, mean_ta_init,
covariance_ta_init
])
def _parse_ta(values_ta):
"""Helper function to parse the returned TensorArrays."""
if not isinstance(values_ta, tensor_array_ops.TensorArray):
return None
predictions_length = prediction_iterations * self.output_window_size
# Shape [prediction_iterations, batch_size, self.output_window_size,
# self.num_features]
values_packed = values_ta.stack()
# Transpose to move batch dimension outside.
output_values = array_ops.reshape(
array_ops.transpose(values_packed, [1, 0, 2, 3]),
array_ops.stack([batch_size, predictions_length, -1]))
# Clip to desired size
return output_values[:, :num_predict_values, :]
predicted_mean = _parse_ta(mean_ta)
predicted_covariance = _parse_ta(covariance_ta)
if predicted_covariance is None:
predicted_covariance = array_ops.ones_like(predicted_mean)
# Transform and scale the mean and covariance appropriately.
predicted_mean = self._scale_back_data(predicted_mean)
predicted_covariance = self._scale_back_variance(predicted_covariance)
return {"mean": predicted_mean,
"covariance": predicted_covariance}
def _process_window(self, features, mode):
"""Compute model outputs on a single window of data."""
# TODO(agarwal): Use exogenous features
times = math_ops.cast(features[TrainEvalFeatures.TIMES], dtypes.int64)
values = math_ops.cast(features[TrainEvalFeatures.VALUES], dtype=self.dtype)
original_values = values
# Extra shape checking for the window size (above that in
# `head.create_estimator_spec`).
expected_times_shape = [None, self.window_size]
if not times.get_shape().is_compatible_with(expected_times_shape):
raise ValueError(
("ARModel with input_window_size={input_window_size} "
"and output_window_size={output_window_size} expects "
"feature '{times_feature}' to have shape (batch_size, "
"{window_size}) (for any batch_size), but got shape {times_shape}. "
"If you are using RandomWindowInputFn, set "
"window_size={window_size} or adjust the input_window_size and "
"output_window_size arguments to ARModel.").format(
input_window_size=self.input_window_size,
output_window_size=self.output_window_size,
times_feature=TrainEvalFeatures.TIMES,
window_size=self.window_size,
times_shape=times.get_shape()))
values = self._scale_data(values)
if self.input_window_size > 0:
input_values = values[:, :self.input_window_size, :]
else:
input_values = None
prediction_ops = self.prediction_ops(times, input_values)
prediction = prediction_ops["mean"]
covariance = prediction_ops["covariance"]
targets = array_ops.slice(values, [0, self.input_window_size, 0],
[-1, -1, -1])
targets.get_shape().assert_is_compatible_with(prediction.get_shape())
if (mode == estimator_lib.ModeKeys.EVAL
and self.loss == ARModel.SQUARED_LOSS):
# Report an evaluation loss which matches the expected
# (observed - predicted) ** 2.
# Note that this affects only evaluation; the training loss is unaffected.
loss = self.loss_op(
self._scale_back_data(targets),
{"mean": self._scale_back_data(prediction_ops["mean"])})
else:
loss = self.loss_op(targets, prediction_ops)
# Scale back the prediction.
prediction = self._scale_back_data(prediction)
covariance = self._scale_back_variance(covariance)
return model.ModelOutputs(
loss=loss,
end_state=(times[:, -self.input_window_size:],
values[:, -self.input_window_size:, :]),
predictions={"mean": prediction, "covariance": covariance,
"observed": original_values[:, -self.output_window_size:]},
prediction_times=times[:, -self.output_window_size:])
def get_batch_loss(self, features, mode, state):
"""Computes predictions and a loss.
Args:
features: A dictionary (such as is produced by a chunker) with the
following key/value pairs (shapes are given as required for training):
TrainEvalFeatures.TIMES: A [batch size, self.window_size] integer
Tensor with times for each observation. To train on longer
sequences, the data should first be chunked.
TrainEvalFeatures.VALUES: A [batch size, self.window_size,
self.num_features] Tensor with values for each observation.
When evaluating, `TIMES` and `VALUES` must have a window size of at
least self.window_size, but it may be longer, in which case the last
window_size - self.input_window_size times (or fewer if this is not
divisible by self.output_window_size) will be evaluated on with
non-overlapping output windows (and will have associated
predictions). This is primarily to support qualitative
evaluation/plotting, and is not a recommended way to compute evaluation
losses (since there is no overlap in the output windows, which for
window-based models is an undesirable bias).
mode: The tf.estimator.ModeKeys mode to use (TRAIN or EVAL).
state: Unused
Returns:
A model.ModelOutputs object.
Raises:
ValueError: If `mode` is not TRAIN or EVAL, or if static shape information
is incorrect.
"""
features = {feature_name: ops.convert_to_tensor(feature_value)
for feature_name, feature_value in features.items()}
if mode == estimator_lib.ModeKeys.TRAIN:
# For training, we require the window size to be self.window_size as
# iterating sequentially on larger windows could introduce a bias.
return self._process_window(features, mode=mode)
elif mode == estimator_lib.ModeKeys.EVAL:
# For evaluation, we allow the user to pass in a larger window, in which
# case we try to cover as much of the window as possible without
# overlap. Quantitative evaluation is more efficient/correct with fixed
# windows matching self.window_size (as with training), but this looping
# allows easy plotting of "in-sample" predictions.
times = features[TrainEvalFeatures.TIMES]
times.get_shape().assert_has_rank(2)
static_window_size = times.get_shape()[1].value
if (static_window_size is not None
and static_window_size < self.window_size):
raise ValueError(
("ARModel requires a window of at least input_window_size + "
"output_window_size to evaluate on (input_window_size={}, "
"output_window_size={}, and got shape {} for feature '{}' (batch "
"size, window size)).").format(
self.input_window_size, self.output_window_size,
times.get_shape(), TrainEvalFeatures.TIMES))
num_iterations = ((array_ops.shape(times)[1] - self.input_window_size)
// self.output_window_size)
output_size = num_iterations * self.output_window_size
# Rather than dealing with overlapping windows of output, discard a bit at
# the beginning if output windows don't cover evenly.
crop_length = output_size + self.input_window_size
features = {feature_name: feature_value[:, -crop_length:]
for feature_name, feature_value in features.items()}
# Note that, unlike the ARModel's predict() while_loop and the
# SequentialTimeSeriesModel while_loop, each iteration here can run in
# parallel, since we are not feeding predictions or state from previous
# iterations.
def _while_condition(iteration_number, loss_ta, mean_ta, covariance_ta):
del loss_ta, mean_ta, covariance_ta # unused
return iteration_number < num_iterations
def _while_body(iteration_number, loss_ta, mean_ta, covariance_ta):
"""Perform a processing step on a single window of data."""
base_offset = iteration_number * self.output_window_size
model_outputs = self._process_window(
features={
feature_name:
feature_value[:, base_offset:base_offset + self.window_size]
for feature_name, feature_value in features.items()},
mode=mode)
# This code needs to be updated if new predictions are added in
# self._process_window
assert len(model_outputs.predictions) == 3
assert "mean" in model_outputs.predictions
assert "covariance" in model_outputs.predictions
assert "observed" in model_outputs.predictions
return (iteration_number + 1,
loss_ta.write(
iteration_number, model_outputs.loss),
mean_ta.write(
iteration_number, model_outputs.predictions["mean"]),
covariance_ta.write(
iteration_number, model_outputs.predictions["covariance"]))
_, loss_ta, mean_ta, covariance_ta = control_flow_ops.while_loop(
_while_condition, _while_body,
[0,
tensor_array_ops.TensorArray(dtype=self.dtype, size=num_iterations),
tensor_array_ops.TensorArray(dtype=self.dtype, size=num_iterations),
tensor_array_ops.TensorArray(dtype=self.dtype, size=num_iterations)])
values = math_ops.cast(features[TrainEvalFeatures.VALUES],
dtype=self.dtype)
batch_size = array_ops.shape(times)[0]
prediction_shape = [batch_size, self.output_window_size * num_iterations,
self.num_features]
previous_state_times, previous_state_values = state
# Make sure returned state always has windows of self.input_window_size,
# even if we were passed fewer than self.input_window_size points this
# time.
if self.input_window_size > 0:
new_state_times = array_ops.concat(
[previous_state_times,
math_ops.cast(times, dtype=dtypes.int64)],
axis=1)[:, -self.input_window_size:]
new_state_times.set_shape((None, self.input_window_size))
new_state_values = array_ops.concat(
[previous_state_values,
self._scale_data(values)], axis=1)[:, -self.input_window_size:, :]
new_state_values.set_shape((None, self.input_window_size,
self.num_features))
else:
# There is no state to keep, and the strided slices above do not handle
# input_window_size=0.
new_state_times = previous_state_times
new_state_values = previous_state_values
return model.ModelOutputs(
loss=math_ops.reduce_mean(loss_ta.stack(), axis=0),
end_state=(new_state_times, new_state_values),
predictions={
"mean": array_ops.reshape(
array_ops.transpose(mean_ta.stack(), [1, 0, 2, 3]),
prediction_shape),
"covariance": array_ops.reshape(
array_ops.transpose(covariance_ta.stack(), [1, 0, 2, 3]),
prediction_shape),
"observed": values[:, -output_size:]},
prediction_times=times[:, -output_size:])
else:
raise ValueError(
"Unknown mode '{}' passed to get_batch_loss.".format(mode))
def _compute_time_features(self, time):
"""Compute some features on the time value."""
batch_size = array_ops.shape(time)[0]
num_periods = len(self._periods)
# Reshape to 3D.
periods = constant_op.constant(
self._periods, shape=[1, 1, num_periods, 1], dtype=time.dtype)
time = array_ops.reshape(time, [batch_size, -1, 1, 1])
window_offset = time / self._periods
# Cast to appropriate type and scale to [0, 1) range
mod = (math_ops.cast(time % periods, self.dtype) * self._buckets /
math_ops.cast(periods, self.dtype))
# Bucketize based on some fixed width intervals. For a value t and interval
# [a, b), we return (t - a) if a <= t < b, else 0.
intervals = array_ops.reshape(
math_ops.range(self._buckets, dtype=self.dtype),
[1, 1, 1, self._buckets])
mod = nn_ops.relu(mod - intervals)
mod = array_ops.where(mod < 1.0, mod, array_ops.zeros_like(mod))
return window_offset, mod
class AnomalyMixtureARModel(ARModel):
"""Model data as a mixture of normal and anomaly distributions.
Note that this model works by changing the loss function to reduce the penalty
when predicting an anomalous target. However the predictions are still based
on anomalous input features, and this may affect the quality of fit. One
possible solution is to downweight/filter anomalous inputs, but that requires
more sequential processing instead of completely random windows.
"""
GAUSSIAN_ANOMALY = "gaussian"
CAUCHY_ANOMALY = "cauchy"
def __init__(self,
periodicities,
anomaly_prior_probability,
input_window_size,
output_window_size,
num_features,
anomaly_distribution=GAUSSIAN_ANOMALY,
num_time_buckets=10,
hidden_layer_sizes=None):
assert (anomaly_prior_probability < 1.0 and
anomaly_prior_probability > 0.0)
self._anomaly_prior_probability = anomaly_prior_probability
assert anomaly_distribution in [
AnomalyMixtureARModel.GAUSSIAN_ANOMALY,
AnomalyMixtureARModel.CAUCHY_ANOMALY]
self._anomaly_distribution = anomaly_distribution
super(AnomalyMixtureARModel, self).__init__(
periodicities=periodicities,
num_features=num_features,
num_time_buckets=num_time_buckets,
input_window_size=input_window_size,
output_window_size=output_window_size,
loss=ARModel.NORMAL_LIKELIHOOD_LOSS,
hidden_layer_sizes=hidden_layer_sizes)
def _create_anomaly_ops(self, times, values, prediction_ops_dict):
anomaly_log_param = variable_scope.get_variable(
"anomaly_log_param",
shape=[],
dtype=self.dtype,
initializer=init_ops.zeros_initializer())
# Anomaly param is the variance for Gaussian and scale for Cauchy
# distribution.
prediction_ops_dict["anomaly_params"] = gen_math_ops.exp(anomaly_log_param)
def prediction_ops(self, times, values):
prediction_ops_dict = super(AnomalyMixtureARModel, self).prediction_ops(
times, values)
self._create_anomaly_ops(times, values, prediction_ops_dict)
return prediction_ops_dict
def _anomaly_log_prob(self, targets, prediction_ops):
prediction = prediction_ops["mean"]
if self._anomaly_distribution == AnomalyMixtureARModel.GAUSSIAN_ANOMALY:
anomaly_variance = prediction_ops["anomaly_params"]
anomaly_sigma = math_ops.sqrt(
gen_math_ops.maximum(anomaly_variance, 1e-5))
normal = distributions.Normal(loc=targets, scale=anomaly_sigma)
log_prob = normal.log_prob(prediction)
else:
assert self._anomaly_distribution == AnomalyMixtureARModel.CAUCHY_ANOMALY
anomaly_scale = prediction_ops["anomaly_params"]
cauchy = distributions.StudentT(
df=array_ops.ones([], dtype=anomaly_scale.dtype),
loc=targets,
scale=anomaly_scale)
log_prob = cauchy.log_prob(prediction)
return log_prob
def loss_op(self, targets, prediction_ops):
"""Create loss_op."""
prediction = prediction_ops["mean"]
covariance = prediction_ops["covariance"]
# Normal data log probability.
sigma = math_ops.sqrt(gen_math_ops.maximum(covariance, 1e-5))
normal1 = distributions.Normal(loc=targets, scale=sigma)
log_prob1 = normal1.log_prob(prediction)
log_prob1 += math_ops.log(1 - self._anomaly_prior_probability)
# Anomaly log probability.
log_prob2 = self._anomaly_log_prob(targets, prediction_ops)
log_prob2 += math_ops.log(self._anomaly_prior_probability)
# We need to compute log(exp(log_prob1) + exp(log_prob2). For numerical
# stability, we rewrite the expression as below.
p1 = gen_math_ops.minimum(log_prob1, log_prob2)
p2 = gen_math_ops.maximum(log_prob1, log_prob2)
mixed_log_prob = p2 + math_ops.log(1 + gen_math_ops.exp(p1 - p2))
loss_op = -math_ops.reduce_sum(mixed_log_prob)
loss_op /= math_ops.cast(
math_ops.reduce_prod(array_ops.shape(targets)), self.dtype)
return loss_op
|