PMML 4.0 - Time Series Models
PMML4.0 Menu

Home

PMML Notice and License

Changes

XML Schema

Conformance

Interoperability

General Structure

Header

Data
Dictionary


Mining
Schema


Transformations

Statistics

Taxomony

Targets

Output

Functions

Built-in Functions

Model Verification

Model Explanation

Multiple Models

Association Rules

Cluster
Models


General
Regression


Naive
Bayes


Neural
Network


Regression

Ruleset

Sequences

Text Models

Time Series

Trees

Vector Machine

PMML 4.0 - Time Series Models

A Time Series is a sequence of data points, measured at points in time, usually, but not necessarily, forming equidistant intervals. Time series analysis strives to understand such time series, often with the goal of making forecasts (predictions) or of filling in missing values between known data points. Time series prediction is the use of a model to predict future events based on known past events before they are measured. Interpolation is the use of a model to complement or amend values between known data points.

The model must contain information on the general trend, a description of periodic behavior and an overall fitting function that can be used for forecasting and/or interpolation. It may also contain detailed information on various aspects of the time series and the expected forecasting accuracy.

In addition to the entries common to all models, a TimeSeriesModel contains results of at least one time series algorithm, for example SpectralAnalysis, ARIMA, ExponentialSmoothing or SeasonalTrendDecomposition. In PMML 4.0, only Exponential Smoothing is defined, the other algorithms are planned for later versions. There are up to three TimeSeries elements holding original or predicted time series values.


  <xs:element name="TimeSeriesModel">
    <xs:complexType>
      <xs:sequence>
        <xs:element ref="Extension" minOccurs="0" maxOccurs="unbounded"/>
        <xs:element ref="MiningSchema"/>
        <xs:element ref="Output" minOccurs="0"/>
        <xs:element ref="ModelStats" minOccurs="0"/>
        <xs:element ref="LocalTransformations" minOccurs="0"/>
        <xs:element ref="TimeSeries" minOccurs="0" maxOccurs="3"/>
        <xs:element ref="SpectralAnalysis" minOccurs="0" maxOccurs="1"/>
        <xs:element ref="ARIMA" minOccurs="0" maxOccurs="1"/>
        <xs:element ref="ExponentialSmoothing" minOccurs="0" maxOccurs="1"/>
        <xs:element ref="SeasonalTrendDecomposition" minOccurs="0" maxOccurs="1"/>
        <xs:element ref="ModelVerification" minOccurs="0"/>
        <xs:element ref="Extension" minOccurs="0" maxOccurs="unbounded"/>
      </xs:sequence>
      <xs:attribute name="modelName" type="xs:string" use="optional"/>
      <xs:attribute name="functionName" type="MINING-FUNCTION" use="required"/>
      <xs:attribute name="algorithmName" type="xs:string" use="optional"/>
      <xs:attribute name="bestFit" type="TIMESERIES-ALGORITHM" use="required"/>
    </xs:complexType>
  </xs:element>    
    
  <xs:simpleType name="TIMESERIES-ALGORITHM">
    <xs:restriction base="xs:string">
      <xs:enumeration value="ARIMA"/>
      <xs:enumeration value="ExponentialSmoothing"/>
      <xs:enumeration value="SeasonalTrendDecomposition"/>
      <xs:enumeration value="SpectralAnalysis"/>
    </xs:restriction>
  </xs:simpleType>    
    
  <xs:element name="TimeSeries">
    <xs:complexType>
      <xs:sequence>
        <xs:element ref="TimeAnchor" minOccurs="0" maxOccurs="1"/>
        <xs:element ref="TimeException" minOccurs="0" maxOccurs="2"/>
        <xs:element ref="TimeValue" minOccurs="0" maxOccurs="unbounded"/>
      </xs:sequence>
      <xs:attribute name="usage" type="TIMESERIES-USAGE" default="original"/>
      <xs:attribute name="startTime" type="REAL-NUMBER"/>
      <xs:attribute name="endTime" type="REAL-NUMBER"/>
      <xs:attribute name="interpolationMethod" type="INTERPOLATION-METHOD"
                    default="none"/>
    </xs:complexType>
  </xs:element>
    
  <xs:simpleType name="TIMESERIES-USAGE">
    <xs:restriction base="xs:string">
      <xs:enumeration value="original"/>
      <xs:enumeration value="logical"/>
      <xs:enumeration value="prediction"/>
    </xs:restriction>
  </xs:simpleType>
    
  <xs:element name="TimeValue">
    <xs:complexType>
      <xs:sequence>
        <xs:element ref="Timestamp" minOccurs="0" maxOccurs="1"/>
      </xs:sequence>
      <xs:attribute name="index" type="INT-NUMBER" use="optional"/>
      <xs:attribute name="time" type="NUMBER" use="optional"/>
      <xs:attribute name="value" type="REAL-NUMBER" use="required"/>
      <xs:attribute name="standardError" type="REAL-NUMBER" use="optional"/>
    </xs:complexType>
  </xs:element>
    
  <xs:element name="TimeAnchor">
    <xs:complexType>
      <xs:sequence>
        <xs:element ref="TimeCycle" minOccurs="0" maxOccurs="unbounded"/>
        <xs:element ref="TimeException" minOccurs="0" maxOccurs="2"/>
      </xs:sequence>
      <xs:attribute name="type" type="TIME-ANCHOR"/>
      <xs:attribute name="offset" type="INT-NUMBER"/>
      <xs:attribute name="stepsize" type="INT-NUMBER"/>
      <xs:attribute name="displayName" use="optional"/>
    </xs:complexType>
  </xs:element>
    
  <xs:element name="TimeCycle">
    <xs:complexType>
      <xs:sequence>
        <xs:group ref="INT-ARRAY" minOccurs="0" maxOccurs="1"/>
      </xs:sequence>
      <xs:attribute name="length" type="INT-NUMBER"/>
      <xs:attribute name="type" type="VALID-TIME-SPEC"/>
      <xs:attribute name="displayName" use="optional"/>
    </xs:complexType>
  </xs:element>
    
  <xs:simpleType name="TIME-ANCHOR">
    <xs:restriction base="xs:string">
      <xs:enumeration value="dateTimeMillisecdondsSince[0]"/>
      <xs:enumeration value="dateTimeMillisecdondsSince[1960]"/>
      <xs:enumeration value="dateTimeMillisecdondsSince[1970]"/>
      <xs:enumeration value="dateTimeMillisecdondsSince[1980]"/>
      <xs:enumeration value="dateTimeSecdondsSince[0]"/>
      <xs:enumeration value="dateTimeSecdondsSince[1960]"/>
      <xs:enumeration value="dateTimeSecdondsSince[1970]"/>
      <xs:enumeration value="dateTimeSecdondsSince[1980]"/>
      <xs:enumeration value="dateDaysSince[0]"/>
      <xs:enumeration value="dateDaysSince[1960]"/>
      <xs:enumeration value="dateDaysSince[1970]"/>
      <xs:enumeration value="dateDaysSince[1980]"/>
      <xs:enumeration value="dateMonthsSince[0]"/>
      <xs:enumeration value="dateMonthsSince[1960]"/>
      <xs:enumeration value="dateMonthsSince[1970]"/>
      <xs:enumeration value="dateMonthsSince[1980]"/>
      <xs:enumeration value="dateYearsSince[0]"/>
    </xs:restriction>
  </xs:simpleType>
    
  <xs:simpleType name="VALID-TIME-SPEC">
    <xs:restriction base="xs:string">
      <xs:enumeration value="includeAll"/>
      <xs:enumeration value="includeFromTo"/>
      <xs:enumeration value="excludeFromTo"/>
      <xs:enumeration value="includeSet"/>
      <xs:enumeration value="excludeSet"/>
    </xs:restriction>
  </xs:simpleType>
  
  <xs:element name="TimeException">
    <xs:complexType>
      <xs:sequence>
        <xs:group ref="INT-ARRAY" minOccurs="1"/>
      </xs:sequence>
      <xs:attribute name="type" type="TIME-EXCEPTION-TYPE"/>
      <xs:attribute name="count" type="INT-NUMBER"/>
    </xs:complexType>
  </xs:element>
  
  <xs:simpleType name="TIME-EXCEPTION-TYPE">
    <xs:restriction base="xs:string">
      <xs:enumeration value="exclude"/>
      <xs:enumeration value="include"/>
    </xs:restriction>
  </xs:simpleType>

  <xs:simpleType name="INTERPOLATION-METHOD">
    <xs:restriction base="xs:string">
      <xs:enumeration value="none"/>
      <xs:enumeration value="linear"/>
      <xs:enumeration value="exponentialSpline"/>
      <xs:enumeration value="cubicSpline"/>
    </xs:restriction>
  </xs:simpleType>

The element TimeSeries contains a time series consisting of several TimeValue objects. The time series can be an original time series as read from the input data; in this case the attribute interpolationMethod is 'none'. Or it can be a pre-processed and interpolated time series; pre-processing and interpolation may be necessary to produce a logical time series, for most time series algorithms require a sequence of logically equidistant time steps. If a logical time series is present, it was used as input into the algorithm. Finally, the time series (usage = 'prediction') may hold values predicted by the best-fitting model.
StartTime and endTime refer to points in an input time series between which the points were used for fitting. They can be integers indicating the index into a logical time series or real numbers indicating original points in time.
The attribute interpolationMethod names the interpolation method used to compute values between existing (or predicted) data points. It is one of {'none', 'linear', 'exponentialSpline', 'cubicSpline'}.

TimeValue contains one single point of a time series. The point can either be a known point from the past; in this case, only the attribute value is required. In addition, time or index must be used. In case of a logical TimeSeries, index values must be present. Or the time point is a predicted future value; in this case, the attribute standardError can contain the incertitude (predicted standard error) of the prediction standard based on the empirically determined error.
Note: TimeAnchor and TimeCycle define the correlation to calendar times. Optionally, a contained element Timestamp may hold a string describing the time for presentation purposes, see Header.

TimeAnchor optionally defines the relationship between time points in a time series and a calendar. It is not used for computing predictions, but it may be used by applications or visualization tools that want to come up with predictions based on points of time in a calendar as opposed to just a look-ahead index. Time is anchored at an offset with respect to a specified calendar point given by type. And the flow of time is defined in smallest steps of size stepsize. Both offset and stepsize are (long) integer values in the units specified in type. An optional displayName, e.g. "day" can be provided as a name for the time step.

TimeCycle allows to express the situation where time steps are not contiguous on a calendar. As an example, consider hourly revenue data of a store that opens Mo-Sat from 7am to 9pm. One has to represent hours as being the step size of the data, but one also wants to be able to specify that Sundays and night-shifts should be disregarded, i.e. for the time series prediction, the value for Monday 8am (aggregated revenue between 7am and 8am) immediately follows that of Saturday 9pm.
Each TimeCycle divides the sequence of time steps defined by the previous TimeCycle (or the TimeAnchor) into cycles of equal length, each cycle consisting of length steps. Index values for these steps run from 0 to length - 1, and are used in the specification of valid steps. Type defines whether this definition is by interval or enumeration and whether by inclusion or exclusion, and the contained Array element provides the interval boundaries or enumerates the values. The following is the specification of the shop hours in the example:


  <TimeAnchor type="dateTimeSecondsSince[1960]" offset="1530543600" 
              stepsize="3600" displayName="hour">
    <TimeCycle length="24" type="includeFromTo" displaName="day">
      <Array n="2">7 20</Array>
    </TimeCycle>
    <TimeCycle length="7" type="excludeSet" displayName="week">
      <Array n="1">6</Array>
    </TimeCycle>
  </TimeAnchor>

Calendar entries can now be described as a sequence of values. The 15th hour of the 6th day in the 30th week since the time anchor (1530543600 seconds after the beginning of 1960) would become <29, 5, 14>.
In addition to the regular behavior, there may be exceptions to the TimeStep specification. The store may, for example, be closed on July 4th, but exceptionally open late because of an event on some other day. This is captured by up to two TimeExceptions, which contain lists of unsystematic exclusions or inclusions as arrays of index values. All index values of a certain TimeCycle can be specified by using the length value instead of a valid index; -1 is used for the regular indexes..
The following TimeExceptions specify additional shop closure and opening hours.


  <TimeExceptions type="exclude" count="2">
    <--closed in the 5th week on the 6th day at the 8th hour-->
    <Array type="int">4 5 7</Array>
    <--closed in the 33rd week, throughout the 1st day-->
    <Array type="int">32 0 24</Array>
  </TimeExceptions>
    
  <TimeExceptions type="include" count="2">
    <--open in the 1st week on the 7th day at regular hours-->
    <Array type="int">0 6 -1</Array>
    <--open in the 34th week on the 6th day at the 20th hour-->
    <Array type="int">33 5 19</Array>
  </TimeExceptions>

ExponentialSmoothing contains an exponential smoothing model for the time series. It is one out of the 15 possible model type combinations (no trend N, additive trend A, damped additive trend DA, multiplicative trend M, damped multiplicative trend DM) * (no seasonality N, additive seasonality A, multiplicative seasonality M). If the model contains a seasonality, the seasonality info is captured in the Seasonality sub-element. Each TimeValue sub-element contains one predicted time point. The predicted time points are calculated from Gardner's (1) model for the given Trend combined with the Seasonality type. The number of predicted time points contained in the model may be determined by the modeling kernel, for example by using the incertitude ranges of each prediction.


  <xs:element name="ExponentialSmoothing">
    <xs:complexType>
      <xs:sequence>
        <xs:element ref="Level" minOccurs="1" maxOccurs="1"/>
        <xs:element ref="Trend" minOccurs="0" maxOccurs="1"/>
        <xs:element ref="Seasonality_ExpoSmooth" minOccurs="0" maxOccurs="1"/>
        <xs:element ref="TimeValue" minOccurs="0" maxOccurs="unbounded"/>
      </xs:sequence>
      <xs:attribute name="RMSE" type="REAL-NUMBER"/>
      <xs:attribute name="transformation" default="none">
        <xs:simpleType>
          <xs:restriction base="xs:NMTOKEN">
            <xs:enumeration value="none"/>
            <xs:enumeration value="logarithmic"/>
            <xs:enumeration value="squareroot"/>
          </xs:restriction>
        </xs:simpleType>
      </xs:attribute>
    </xs:complexType>
  </xs:element>

RMSE is the root mean squared error of the predictions.
Alpha is the smoothing parameter for the level.
Gamma is the smoothing parameter for the trend.
Delta is the smoothing parameter for seasonal indices.
Phi is the autoregressive or damping parameter.

Transformation specifies what transformation has been applied to the time series prior to executing the algorithm. Possible values are "none," "logarithmic" and "squareroot."

Seasonality_ExpoSmooth describes a periodic oscillation cycle with a length of period time units, where period must be a positive integer. The phase indicates the season index of the last known data point; it defaults to period. The oscillation can be additive, that means of the form 'trend + oscillation' or multiplicative, that means of the form 'trend * oscillation'. Unit is a string used for naming the cycles, such as "week" or "year." It is optional and serves only for explanatory purposes. The sub-element RealArray (of size period) contains floating point numbers which describe the local values of the oscillation at each of the season indices. In the additive case, the sum of all these numbers may be normalized to 0. In the multiplicative case, the product of all these numbers may be normalized to 1.


  <xs:element name="Seasonality_ExpoSmooth">
    <xs:complexType>
      <xs:sequence>
        <xs:group ref="REAL-ARRAY"/>
      </xs:sequence>
      <xs:attribute name="type" use="required">
        <xs:simpleType>
          <xs:restriction base="xs:NMTOKEN">
            <xs:enumeration value="additive"/>
            <xs:enumeration value="multiplicative"/>
          </xs:restriction>
        </xs:simpleType>
      </xs:attribute>
      <xs:attribute name="period" type="INT-NUMBER" use="required"/>
      <xs:attribute name="unit" type="xs:string" use="optional"/>
      <xs:attribute name="phase" type="INT-NUMBER" use="optional"/>
      <xs:attribute name="delta" type="REAL-NUMBER" use="optional"/>
    </xs:complexType>
  </xs:element>

Level specifies smoothedValue the smoothed value of the time series at the last known point of the history. The optional quadraticSmoothed and cubicSmoothedValue values are additional smoothed values used for double and triple exponential smoothing. The optional attribute alpha is the optimal smoothing parameter for the level. It can be used to continue the fitting process if more data become known, but it is not needed for scoring. However, it may be used to compute theoretical confidence intervals.


  <xs:element name="Level">
      <xs:complexType>
        <xs:attribute name="alpha" type="REAL-NUMBER" use="optional"/>
        <xs:attribute name="smoothedValue" type="REAL-NUMBER"/>
        <xs:attribute name="quadraticSmoothedValue" type="REAL-NUMBER"/>
        <xs:attribute name="cubicSmoothedValue" type="REAL-NUMBER"/>
      </xs:complexType>
    </xs:element>

Trend specifies the smoothed value of the trend at the last known point of the history. The optional attribute gamma is the optimal smooting parameter for the trend. It can be used to continue the fitting process if more data become known, but it is not needed for scoring. The damping parameter phi is needed for prediction.


  <xs:element name="Trend">
    <xs:complexType>
      <xs:attribute name="trend" default="additive">
        <xs:simpleType>
          <xs:restriction base="xs:NMTOKEN">
            <xs:enumeration value="additive"/>
            <xs:enumeration value="damped_additive"/>
            <xs:enumeration value="multiplicative"/>
            <xs:enumeration value="damped_multiplicative"/>
            <xs:enumeration value="double_exponential"/>
          </xs:restriction>
        </xs:simpleType>
      </xs:attribute> 
      <xs:attribute name="gamma" type="REAL-NUMBER" use="optional"/>
      <xs:attribute name="phi" type="REAL-NUMBER" use="optional" default="1"/>
      <xs:attribute name="smoothedValue" type="REAL-NUMBER" use="required"/>
    </xs:complexType>
  </xs:element>

The following elements are is not used in this version of PMML and only serve as placeholders for future versions.
SpectralAnalysis describes the Fourier spectrum of a time series.
ARIMA may contain one or more ARIMA(p,d,q,P,D,Q) models of the time series.
SeasonalTrendDecomposition contains one or more fit functions which represent the trend component of the time series and optionally contain information on seasonal oscillations which are modeled on top of the trend component.


  <xs:element name="SpectralAnalysis">
  </xs:element>

  <xs:element name="ARIMA">
  </xs:element>

  <xs:element name="SeasonalTrendDecomposition">
  </xs:element>

Example for a time series model:


  <?xml version="1.0" ?>
  <PMML version="4.0" xmlns="https://www.dmg.org/PMML-4_0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <Header copyright="DMG.org"/>
    <Application name="test application"/>
    <Timestamp>2008-06-23 10:30:00</Timestamp>
  </Header>
  <DataDictionary numberOfFields="2">
    <DataField dataType="dateTimeSecondsSince[1970]" 
                  optype="continuous" name="TS" displayName="TS"/>
    <DataField dataType="double" 
                  optype="continuous" name="VALUE" displayName="TS-VALUE"/>
  </DataDictionary>
  <TimeSeriesModel modelName="AA2Model" functionName="timeSeries" 
                      algorithmName="exponential smoothing">
    <MiningSchema>
      <MiningField name="TS" usageType="order"/>
      <MiningField name="VALUE" usageType="predicted"/>
    </MiningSchema>
    <TimeAnchor type="dateTimeSecondsSince[1960]"
                offset="1530543600"
                stepsize="3600" displayName="hour">
      <TimeCycle length="24" type="includeFromTo" displaName="day";>
        <Array n="2">7 20</Array>
      </TimeCycle>
      <TimeCycle length="7" type="excludeSet" displayName="week">
        <Array n="1">6</Array>
      </TimeCycle>
    </TimeAnchor>
    <TimeSeries usage="logical" startTime="1" endTime="24"
                interpolationMethod="none">
      <TimeValue index="1" value="112"/>
      <TimeValue index="2" value="118"/>
      <TimeValue index="3" value="132"/>
      <TimeValue index="4" value="129"/>
      <TimeValue index="5" value="121"/>
      <TimeValue index="6" value="135"/>
      <TimeValue index="7" value="148"/>
      <TimeValue index="8" value="148"/>
      <TimeValue index="9" value="136"/>
      <TimeValue index="10" value="119"/>
      <TimeValue index="11" value="104"/>
      <TimeValue index="12" value="118"/>
      <TimeValue index="13" value="115"/>
      <TimeValue index="14" value="126"/>
      <TimeValue index="15" value="141"/>
      <TimeValue index="16" value="135"/>
      <TimeValue index="17" value="125"/>
      <TimeValue index="18" value="149"/>
      <TimeValue index="19" value="170"/>
      <TimeValue index="20" value="170"/>
      <TimeValue index="21" value="158"/>
      <TimeValue index="22" value="133"/>
      <TimeValue index="23" value="114"/>
      <TimeValue index="24" value="140"/>
    </TimeSeries>
    <TimeSeries usage="prediction" interpolationMethod="none">
      <TimeValue index="25" value="145" standard-deviation="7.3"/>
      <TimeValue index="26" value="150" standard-deviation="8.3"/>
      <TimeValue index="27" value="178" standard-deviation="9.3"/>
      <TimeValue index="28" value="163" standard-deviation="10.3"/>
      <TimeValue index="29" value="172" standard-deviation="11.3"/>
      <TimeValue index="30" value="178" standard-deviation="12.3"/>
      <TimeValue index="31" value="199" standard-deviation="13.3"/>
      <TimeValue index="32" value="199" standard-deviation="14.3"/>
      <TimeValue index="33" value="184" standard-deviation="15.3"/>
      <TimeValue index="34" value="162" standard-deviation="16.3"/>
      <TimeValue index="35" value="146" standard-deviation="17.3"/>
      <TimeValue index="36" value="166" standard-deviation="18.3"/>
    </TimeSeries>
    <ExponentialSmoothing RMSE="7.3">
      <Level alpha="0.233984" smoothedValue="139.8"/>
      <Trend smoothedValue="4.139" gamma="3.910E-4"
             phi="1.006" trend="damped_additive"/>
      <Seasonality_ExpoSmooth type="multiplicative" period="12"
                              unit="month" delta="0.8254" phase="12">
        <Array n="12" type="real">
        .900 .840 .924 .976 .994 1.120 0.981 1.025 1.038 1.038 0.908 1.259
        </Array>
      </Seasonality>
    </ExponentialSmoothing>
  </TimeSeriesModel>
</PMML>
e-mail info at dmg.org