PMML 4.0 - Time Series Models
A Time Series is a sequence of
data points, measured at points in time, usually, but not necessarily, forming
equidistant intervals. Time series analysis strives to understand such time
series, often with the goal of making forecasts (predictions) or of filling in
missing values between known data points. Time series prediction is the use of a model
to predict future events
based on known past events before they are measured.
Interpolation is the use of a model to complement or amend values
between known data points.
The model must contain information on the general trend, a description of
periodic behavior and an overall fitting function that can be used for
forecasting and/or interpolation. It may also contain detailed information on
various aspects of the time series and the expected forecasting accuracy.
In addition to the entries common to all models, a TimeSeriesModel
contains results of at least one time series algorithm, for example
SpectralAnalysis, ARIMA, ExponentialSmoothing or SeasonalTrendDecomposition.
In PMML 4.0, only Exponential Smoothing is defined,
the other algorithms are planned for later versions.
There are up to three TimeSeries
elements holding original or predicted time series values.
<xs:element name="TimeSeriesModel">
<xs:complexType>
<xs:sequence>
<xs:element ref="Extension" minOccurs="0" maxOccurs="unbounded"/>
<xs:element ref="MiningSchema"/>
<xs:element ref="Output" minOccurs="0"/>
<xs:element ref="ModelStats" minOccurs="0"/>
<xs:element ref="LocalTransformations" minOccurs="0"/>
<xs:element ref="TimeSeries" minOccurs="0" maxOccurs="3"/>
<xs:element ref="SpectralAnalysis" minOccurs="0" maxOccurs="1"/>
<xs:element ref="ARIMA" minOccurs="0" maxOccurs="1"/>
<xs:element ref="ExponentialSmoothing" minOccurs="0" maxOccurs="1"/>
<xs:element ref="SeasonalTrendDecomposition" minOccurs="0" maxOccurs="1"/>
<xs:element ref="ModelVerification" minOccurs="0"/>
<xs:element ref="Extension" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
<xs:attribute name="modelName" type="xs:string" use="optional"/>
<xs:attribute name="functionName" type="MINING-FUNCTION" use="required"/>
<xs:attribute name="algorithmName" type="xs:string" use="optional"/>
<xs:attribute name="bestFit" type="TIMESERIES-ALGORITHM" use="required"/>
</xs:complexType>
</xs:element>
<xs:simpleType name="TIMESERIES-ALGORITHM">
<xs:restriction base="xs:string">
<xs:enumeration value="ARIMA"/>
<xs:enumeration value="ExponentialSmoothing"/>
<xs:enumeration value="SeasonalTrendDecomposition"/>
<xs:enumeration value="SpectralAnalysis"/>
</xs:restriction>
</xs:simpleType>
<xs:element name="TimeSeries">
<xs:complexType>
<xs:sequence>
<xs:element ref="TimeAnchor" minOccurs="0" maxOccurs="1"/>
<xs:element ref="TimeException" minOccurs="0" maxOccurs="2"/>
<xs:element ref="TimeValue" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
<xs:attribute name="usage" type="TIMESERIES-USAGE" default="original"/>
<xs:attribute name="startTime" type="REAL-NUMBER"/>
<xs:attribute name="endTime" type="REAL-NUMBER"/>
<xs:attribute name="interpolationMethod" type="INTERPOLATION-METHOD"
default="none"/>
</xs:complexType>
</xs:element>
<xs:simpleType name="TIMESERIES-USAGE">
<xs:restriction base="xs:string">
<xs:enumeration value="original"/>
<xs:enumeration value="logical"/>
<xs:enumeration value="prediction"/>
</xs:restriction>
</xs:simpleType>
<xs:element name="TimeValue">
<xs:complexType>
<xs:sequence>
<xs:element ref="Timestamp" minOccurs="0" maxOccurs="1"/>
</xs:sequence>
<xs:attribute name="index" type="INT-NUMBER" use="optional"/>
<xs:attribute name="time" type="NUMBER" use="optional"/>
<xs:attribute name="value" type="REAL-NUMBER" use="required"/>
<xs:attribute name="standardError" type="REAL-NUMBER" use="optional"/>
</xs:complexType>
</xs:element>
<xs:element name="TimeAnchor">
<xs:complexType>
<xs:sequence>
<xs:element ref="TimeCycle" minOccurs="0" maxOccurs="unbounded"/>
<xs:element ref="TimeException" minOccurs="0" maxOccurs="2"/>
</xs:sequence>
<xs:attribute name="type" type="TIME-ANCHOR"/>
<xs:attribute name="offset" type="INT-NUMBER"/>
<xs:attribute name="stepsize" type="INT-NUMBER"/>
<xs:attribute name="displayName" use="optional"/>
</xs:complexType>
</xs:element>
<xs:element name="TimeCycle">
<xs:complexType>
<xs:sequence>
<xs:group ref="INT-ARRAY" minOccurs="0" maxOccurs="1"/>
</xs:sequence>
<xs:attribute name="length" type="INT-NUMBER"/>
<xs:attribute name="type" type="VALID-TIME-SPEC"/>
<xs:attribute name="displayName" use="optional"/>
</xs:complexType>
</xs:element>
<xs:simpleType name="TIME-ANCHOR">
<xs:restriction base="xs:string">
<xs:enumeration value="dateTimeMillisecdondsSince[0]"/>
<xs:enumeration value="dateTimeMillisecdondsSince[1960]"/>
<xs:enumeration value="dateTimeMillisecdondsSince[1970]"/>
<xs:enumeration value="dateTimeMillisecdondsSince[1980]"/>
<xs:enumeration value="dateTimeSecdondsSince[0]"/>
<xs:enumeration value="dateTimeSecdondsSince[1960]"/>
<xs:enumeration value="dateTimeSecdondsSince[1970]"/>
<xs:enumeration value="dateTimeSecdondsSince[1980]"/>
<xs:enumeration value="dateDaysSince[0]"/>
<xs:enumeration value="dateDaysSince[1960]"/>
<xs:enumeration value="dateDaysSince[1970]"/>
<xs:enumeration value="dateDaysSince[1980]"/>
<xs:enumeration value="dateMonthsSince[0]"/>
<xs:enumeration value="dateMonthsSince[1960]"/>
<xs:enumeration value="dateMonthsSince[1970]"/>
<xs:enumeration value="dateMonthsSince[1980]"/>
<xs:enumeration value="dateYearsSince[0]"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="VALID-TIME-SPEC">
<xs:restriction base="xs:string">
<xs:enumeration value="includeAll"/>
<xs:enumeration value="includeFromTo"/>
<xs:enumeration value="excludeFromTo"/>
<xs:enumeration value="includeSet"/>
<xs:enumeration value="excludeSet"/>
</xs:restriction>
</xs:simpleType>
<xs:element name="TimeException">
<xs:complexType>
<xs:sequence>
<xs:group ref="INT-ARRAY" minOccurs="1"/>
</xs:sequence>
<xs:attribute name="type" type="TIME-EXCEPTION-TYPE"/>
<xs:attribute name="count" type="INT-NUMBER"/>
</xs:complexType>
</xs:element>
<xs:simpleType name="TIME-EXCEPTION-TYPE">
<xs:restriction base="xs:string">
<xs:enumeration value="exclude"/>
<xs:enumeration value="include"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="INTERPOLATION-METHOD">
<xs:restriction base="xs:string">
<xs:enumeration value="none"/>
<xs:enumeration value="linear"/>
<xs:enumeration value="exponentialSpline"/>
<xs:enumeration value="cubicSpline"/>
</xs:restriction>
</xs:simpleType>
|
The element TimeSeries contains a time series consisting of several
TimeValue objects. The time series can
be an original time series as read from the input data; in this case the
attribute interpolationMethod is 'none'.
Or it can be a pre-processed and interpolated time series;
pre-processing and interpolation may be necessary to produce a logical time series,
for most time series algorithms require a sequence of logically
equidistant time steps.
If a logical time series is present, it was used as input into the algorithm.
Finally, the time series (usage = 'prediction') may hold values predicted by the best-fitting model.
StartTime and endTime refer to points
in an input time series between which the points were used for fitting.
They can be integers indicating the index into a logical time series
or real numbers indicating original points in time.
The attribute interpolationMethod
names the interpolation method used to compute values between
existing (or predicted) data points. It is one of {'none', 'linear',
'exponentialSpline', 'cubicSpline'}.
TimeValue contains one single point of a time series.
The point can either be a known point from the past; in
this case, only the attribute value
is required. In addition, time or index must be used.
In case of a logical TimeSeries, index values must be present.
Or the time point is a predicted future value;
in this case, the attribute standardError
can contain the incertitude (predicted standard error)
of the prediction standard based on the empirically determined error.
Note: TimeAnchor and TimeCycle define the correlation to
calendar times. Optionally, a contained element Timestamp
may hold a string describing the time for presentation purposes,
see Header.
TimeAnchor optionally defines the relationship between time points in
a time series and a calendar. It is not used for computing predictions, but it may be
used by applications or visualization tools that want to come up with predictions based on
points of time in a calendar as opposed to just a look-ahead index.
Time is anchored at an offset with respect
to a specified calendar point given by type.
And the flow of time is defined in smallest steps of size stepsize.
Both offset and stepsize are (long) integer values in the
units specified in type. An optional displayName,
e.g. "day" can be provided as a name for the time step.
TimeCycle allows to express the
situation where time steps are not contiguous on a calendar. As an
example, consider hourly revenue data of a store that opens Mo-Sat from 7am to 9pm.
One has to represent hours as being the step size of the data, but one also
wants to be able to specify that Sundays and night-shifts should be disregarded, i.e.
for the time series prediction, the value for Monday 8am (aggregated revenue
between 7am and 8am) immediately follows that of Saturday 9pm.
Each TimeCycle divides the sequence of time steps defined by the previous
TimeCycle (or the TimeAnchor) into cycles of equal length, each cycle
consisting of length steps. Index
values for these steps run from 0 to length - 1, and are used in the
specification of valid steps. Type
defines whether this definition is by interval or enumeration and whether by
inclusion or exclusion, and the contained Array
element provides the interval boundaries or enumerates the values. The
following is the specification of the shop hours in the example:
<TimeAnchor type="dateTimeSecondsSince[1960]" offset="1530543600"
stepsize="3600" displayName="hour">
<TimeCycle length="24" type="includeFromTo" displaName="day">
<Array n="2">7 20</Array>
</TimeCycle>
<TimeCycle length="7" type="excludeSet" displayName="week">
<Array n="1">6</Array>
</TimeCycle>
</TimeAnchor>
|
Calendar entries can now be described as a sequence of values. The 15th
hour of the 6th day in the 30th week since the time
anchor (1530543600 seconds after the beginning of 1960) would become <29, 5,
14>.
In addition to the regular behavior, there may be exceptions to the TimeStep
specification. The store may, for example, be closed on July 4th,
but exceptionally open late because of an event on some other day. This is
captured by up to two TimeExceptions,
which contain lists of unsystematic exclusions or inclusions as arrays of index
values. All index values of a certain TimeCycle can be specified by using the
length value instead of a valid index; -1 is used for the regular indexes..
The following TimeExceptions specify additional shop closure and opening hours.
<TimeExceptions type="exclude" count="2">
<--closed in the 5th week on the 6th day at the 8th hour-->
<Array type="int">4 5 7</Array>
<--closed in the 33rd week, throughout the 1st day-->
<Array type="int">32 0 24</Array>
</TimeExceptions>
<TimeExceptions type="include" count="2">
<--open in the 1st week on the 7th day at regular hours-->
<Array type="int">0 6 -1</Array>
<--open in the 34th week on the 6th day at the 20th hour-->
<Array type="int">33 5 19</Array>
</TimeExceptions>
|
ExponentialSmoothing
contains an exponential smoothing model for the time series.
It is one out of the
15 possible model type combinations (no trend N,
additive trend A,
damped additive trend DA,
multiplicative trend M, damped
multiplicative trend DM) *
(no seasonality N, additive seasonality A,
multiplicative seasonality M).
If the model contains a seasonality,
the seasonality info is captured in the Seasonality
sub-element. Each TimeValue
sub-element contains one predicted time point. The predicted time points are
calculated from Gardner's (1)
model for the given Trend combined
with the Seasonality type. The
number of predicted time points contained in the model may be determined by the
modeling kernel, for example by using the incertitude ranges of each
prediction.
<xs:element name="ExponentialSmoothing">
<xs:complexType>
<xs:sequence>
<xs:element ref="Level" minOccurs="1" maxOccurs="1"/>
<xs:element ref="Trend" minOccurs="0" maxOccurs="1"/>
<xs:element ref="Seasonality_ExpoSmooth" minOccurs="0" maxOccurs="1"/>
<xs:element ref="TimeValue" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
<xs:attribute name="RMSE" type="REAL-NUMBER"/>
<xs:attribute name="transformation" default="none">
<xs:simpleType>
<xs:restriction base="xs:NMTOKEN">
<xs:enumeration value="none"/>
<xs:enumeration value="logarithmic"/>
<xs:enumeration value="squareroot"/>
</xs:restriction>
</xs:simpleType>
</xs:attribute>
</xs:complexType>
</xs:element>
|
RMSE is the root mean squared error of the predictions.
Alpha is the smoothing parameter for the level.
Gamma is the smoothing parameter for the trend.
Delta is the smoothing parameter for seasonal indices.
Phi is the autoregressive or damping parameter.
Transformation specifies what
transformation has been applied to the time series prior to executing the
algorithm. Possible values are "none," "logarithmic"
and "squareroot."
Seasonality_ExpoSmooth describes a periodic
oscillation cycle with a length of period
time units, where period must be a positive integer. The
phase indicates the season index of the last known data point; it
defaults to period. The oscillation can be additive, that means of the
form 'trend + oscillation' or multiplicative, that means of the form
'trend * oscillation'. Unit is a string
used for naming the cycles, such as "week" or "year." It is
optional and serves only for explanatory purposes. The sub-element
RealArray (of size period) contains
floating point numbers which describe the local values of the oscillation at
each of the season indices. In the additive case, the sum of all these numbers
may be normalized to 0. In the multiplicative case, the product of all these
numbers may be normalized to 1.
<xs:element name="Seasonality_ExpoSmooth">
<xs:complexType>
<xs:sequence>
<xs:group ref="REAL-ARRAY"/>
</xs:sequence>
<xs:attribute name="type" use="required">
<xs:simpleType>
<xs:restriction base="xs:NMTOKEN">
<xs:enumeration value="additive"/>
<xs:enumeration value="multiplicative"/>
</xs:restriction>
</xs:simpleType>
</xs:attribute>
<xs:attribute name="period" type="INT-NUMBER" use="required"/>
<xs:attribute name="unit" type="xs:string" use="optional"/>
<xs:attribute name="phase" type="INT-NUMBER" use="optional"/>
<xs:attribute name="delta" type="REAL-NUMBER" use="optional"/>
</xs:complexType>
</xs:element>
|
Level specifies smoothedValue
the smoothed value of the time series at the last known point of the history.
The optional quadraticSmoothed and cubicSmoothedValue
values are additional smoothed values used for double and triple
exponential smoothing. The
optional attribute alpha is the optimal
smoothing parameter for the level. It can be used to continue the fitting
process if more data become known, but it is not needed for scoring.
However, it may be used to compute theoretical confidence intervals.
<xs:element name="Level">
<xs:complexType>
<xs:attribute name="alpha" type="REAL-NUMBER" use="optional"/>
<xs:attribute name="smoothedValue" type="REAL-NUMBER"/>
<xs:attribute name="quadraticSmoothedValue" type="REAL-NUMBER"/>
<xs:attribute name="cubicSmoothedValue" type="REAL-NUMBER"/>
</xs:complexType>
</xs:element>
|
Trend specifies the smoothed value of the
trend at the last known point of the history. The optional attribute
gamma is the optimal smooting parameter for the trend. It can be used
to continue the fitting process if more data become known, but it is not needed
for scoring. The damping parameter phi
is needed for prediction.
<xs:element name="Trend">
<xs:complexType>
<xs:attribute name="trend" default="additive">
<xs:simpleType>
<xs:restriction base="xs:NMTOKEN">
<xs:enumeration value="additive"/>
<xs:enumeration value="damped_additive"/>
<xs:enumeration value="multiplicative"/>
<xs:enumeration value="damped_multiplicative"/>
<xs:enumeration value="double_exponential"/>
</xs:restriction>
</xs:simpleType>
</xs:attribute>
<xs:attribute name="gamma" type="REAL-NUMBER" use="optional"/>
<xs:attribute name="phi" type="REAL-NUMBER" use="optional" default="1"/>
<xs:attribute name="smoothedValue" type="REAL-NUMBER" use="required"/>
</xs:complexType>
</xs:element>
|
The following elements are is not used in this version of PMML and only
serve as placeholders for future versions.
SpectralAnalysis describes the Fourier spectrum of a time series.
ARIMA may contain one or more ARIMA(p,d,q,P,D,Q) models of
the time series.
SeasonalTrendDecomposition contains one or more fit functions which
represent the trend component of the time series and optionally
contain information on
seasonal oscillations which are modeled on top of the trend component.
<xs:element name="SpectralAnalysis">
</xs:element>
<xs:element name="ARIMA">
</xs:element>
<xs:element name="SeasonalTrendDecomposition">
</xs:element>
|
Example for a time series model:
<?xml version="1.0" ?>
<PMML version="4.0" xmlns="https://www.dmg.org/PMML-4_0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<Header copyright="DMG.org"/>
<Application name="test application"/>
<Timestamp>2008-06-23 10:30:00</Timestamp>
</Header>
<DataDictionary numberOfFields="2">
<DataField dataType="dateTimeSecondsSince[1970]"
optype="continuous" name="TS" displayName="TS"/>
<DataField dataType="double"
optype="continuous" name="VALUE" displayName="TS-VALUE"/>
</DataDictionary>
<TimeSeriesModel modelName="AA2Model" functionName="timeSeries"
algorithmName="exponential smoothing">
<MiningSchema>
<MiningField name="TS" usageType="order"/>
<MiningField name="VALUE" usageType="predicted"/>
</MiningSchema>
<TimeAnchor type="dateTimeSecondsSince[1960]"
offset="1530543600"
stepsize="3600" displayName="hour">
<TimeCycle length="24" type="includeFromTo" displaName="day";>
<Array n="2">7 20</Array>
</TimeCycle>
<TimeCycle length="7" type="excludeSet" displayName="week">
<Array n="1">6</Array>
</TimeCycle>
</TimeAnchor>
<TimeSeries usage="logical" startTime="1" endTime="24"
interpolationMethod="none">
<TimeValue index="1" value="112"/>
<TimeValue index="2" value="118"/>
<TimeValue index="3" value="132"/>
<TimeValue index="4" value="129"/>
<TimeValue index="5" value="121"/>
<TimeValue index="6" value="135"/>
<TimeValue index="7" value="148"/>
<TimeValue index="8" value="148"/>
<TimeValue index="9" value="136"/>
<TimeValue index="10" value="119"/>
<TimeValue index="11" value="104"/>
<TimeValue index="12" value="118"/>
<TimeValue index="13" value="115"/>
<TimeValue index="14" value="126"/>
<TimeValue index="15" value="141"/>
<TimeValue index="16" value="135"/>
<TimeValue index="17" value="125"/>
<TimeValue index="18" value="149"/>
<TimeValue index="19" value="170"/>
<TimeValue index="20" value="170"/>
<TimeValue index="21" value="158"/>
<TimeValue index="22" value="133"/>
<TimeValue index="23" value="114"/>
<TimeValue index="24" value="140"/>
</TimeSeries>
<TimeSeries usage="prediction" interpolationMethod="none">
<TimeValue index="25" value="145" standard-deviation="7.3"/>
<TimeValue index="26" value="150" standard-deviation="8.3"/>
<TimeValue index="27" value="178" standard-deviation="9.3"/>
<TimeValue index="28" value="163" standard-deviation="10.3"/>
<TimeValue index="29" value="172" standard-deviation="11.3"/>
<TimeValue index="30" value="178" standard-deviation="12.3"/>
<TimeValue index="31" value="199" standard-deviation="13.3"/>
<TimeValue index="32" value="199" standard-deviation="14.3"/>
<TimeValue index="33" value="184" standard-deviation="15.3"/>
<TimeValue index="34" value="162" standard-deviation="16.3"/>
<TimeValue index="35" value="146" standard-deviation="17.3"/>
<TimeValue index="36" value="166" standard-deviation="18.3"/>
</TimeSeries>
<ExponentialSmoothing RMSE="7.3">
<Level alpha="0.233984" smoothedValue="139.8"/>
<Trend smoothedValue="4.139" gamma="3.910E-4"
phi="1.006" trend="damped_additive"/>
<Seasonality_ExpoSmooth type="multiplicative" period="12"
unit="month" delta="0.8254" phase="12">
<Array n="12" type="real">
.900 .840 .924 .976 .994 1.120 0.981 1.025 1.038 1.038 0.908 1.259
</Array>
</Seasonality>
</ExponentialSmoothing>
</TimeSeriesModel>
</PMML>
|