I am new in ML .Net Forecasting. I am trying to build a simple application for Prediction.
Time Series (step is 1 month, 1993-1995), Data = values incrementing with step 10 starting from 100.
Then I save the model and load it again just to simulate closing and loading application.
Then I used a loaded model to do BackTest Prediction for 1993. I add several values as examples (initial conditions to the model). But the result I got is wrong.
Full Application code:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using Microsoft.ML;
using Microsoft.ML.Transforms.TimeSeries;
namespace Samples.Dynamic
{
public static class Forecasting
{
public static void Main(string[] args)
{
var ml = new MLContext();
var data = new List<TimeSeriesData>();
int price = 100;
for (int month = 1; month <= 12; month++)
{
data.Add(new TimeSeriesData(new DateTime(1993, month, 1), price));
price = price + 10;
}
for (int month = 1; month <= 12; month++)
{
data.Add(new TimeSeriesData(new DateTime(1994, month, 1), price));
price = price + 10;
}
for (int month = 1; month <= 12; month++)
{
data.Add(new TimeSeriesData(new DateTime(1995, month, 1), price));
price = price + 10;
}
//printing input data
foreach (TimeSeriesData a in data)
Debug.WriteLine("Time {0}, Price {1}", a.Date, a.Price);
var dataView = ml.Data.LoadFromEnumerable(data);
var inputColumnName = nameof(TimeSeriesData.Price);
var outputColumnName = nameof(ForecastResult.Forecast);
var model = ml.Forecasting.ForecastBySsa(outputColumnName,
inputColumnName, 6, 7, data.Count, 5);
var transformer = model.Fit(dataView);
var forecastEngine = transformer.CreateTimeSeriesEngine<TimeSeriesData,
ForecastResult>(ml);
forecastEngine.CheckPoint(ml, "model.zip");
// here we have another app - we load from disk saved model
ITransformer modelCopy;
using (var file = File.OpenRead("model.zip"))
modelCopy = ml.Model.Load(file, out DataViewSchema schema);
var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine<TimeSeriesData, ForecastResult>(ml);
// here I add some examples of 1993 year
var initialConditions = new List<TimeSeriesData>
{
new TimeSeriesData(new DateTime(1993, 1, 1), 100),
new TimeSeriesData(new DateTime(1993, 2, 1), 110),
new TimeSeriesData(new DateTime(1993, 3, 1), 120),
new TimeSeriesData(new DateTime(1993, 4, 1), 130),
new TimeSeriesData(new DateTime(1993, 5, 1), 140),
new TimeSeriesData(new DateTime(1993, 6, 1), 150),
new TimeSeriesData(new DateTime(1993, 6, 1), 160),
};
// Feed initial conditions to the model
foreach (var condition in initialConditions)
{
forecastEngineCopy.Predict(condition);
}
Debug.WriteLine("Forecast for 1993:");
var forecast = forecastEngineCopy.Predict(horizon: 5);
Debug.WriteLine("[{0}]", string.Join(", ", forecast.Forecast));
}
class ForecastResult
{
public float[] Forecast { get; set; }
}
class TimeSeriesData
{
public DateTime Date { get; set; }
public float Price { get; set; }
public TimeSeriesData(DateTime date, float price)
{
Date = date;
Price = price;
}
}
}
}
In this code after adding examples (initial conditions to the model) beginning of the 1993 year (start of backtest) and call function Prediction(horizon: 5) I expect to get something like this: 170, 180, 190, 200, 210 But I am getting this: 5.619335, -32.734127, -68.469055, -102.133865, -136.30606
var initialConditions = new List<TimeSeriesData>
{
new TimeSeriesData(new DateTime(1993, 1, 1), 100),
new TimeSeriesData(new DateTime(1993, 2, 1), 110),
new TimeSeriesData(new DateTime(1993, 3, 1), 120),
new TimeSeriesData(new DateTime(1993, 4, 1), 130),
new TimeSeriesData(new DateTime(1993, 5, 1), 140),
new TimeSeriesData(new DateTime(1993, 6, 1), 150),
new TimeSeriesData(new DateTime(1993, 6, 1), 160),
};
// Feed initial conditions to the model
foreach (var condition in initialConditions)
{
forecastEngineCopy.Predict(condition);
}
//Prediction till the end 1993
var forecast = forecastEngineCopy.Predict(horizon: 5);
I found that my output related to ml.Forecasting.ForecastBySsa function and parameters: windowSize, seriesLenght.
var model = ml.Forecasting.ForecastBySsa(outputColumnName,
inputColumnName, 11, 12, data.Count, 5);
I also found that if I setup parameters: windowSize = 4, seriesLenght = 5 then I get correct prediction values: 169.83334, 179.6128, 189.2648, 198.84006, 208.30637
Can someone explain to me what I do wrong?
Why even small changes parameters like - windowSize = 6 ,seriesLenght = 7 (size of my new examples) breaking result : 169.08212, 191.27914, 207.15486, 218.63303, 235.29007 ?
I should point out that if I use this model for Forecasting 1996 (not back test ) without initial data - I am getting the correct result.
Can I even use Forecasting for BackTest?