import logging
import pandas as pd
import plotly as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import json
import datetime

from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt

import tim_client


with open('credentials.json') as f:
    credentials_json = json.load(f)                     # loading the credentials from credentials.json

TIM_URL = 'https://timws.tangent.works/v4/api'          # URL to which the requests are sent

SAVE_JSON = False                                       # if True - JSON requests and responses are saved to JSON_SAVING_FOLDER
JSON_SAVING_FOLDER = 'logs/'                            # folder where the requests and responses are stored

LOGGING_LEVEL = 'INFO'


level = logging.getLevelName(LOGGING_LEVEL)
logging.basicConfig(level=level, format='[%(levelname)s] %(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(message)s')
logger = logging.getLogger(__name__)


credentials = tim_client.Credentials(credentials_json['license_key'], credentials_json['email'], credentials_json['password'], tim_url=TIM_URL)
api_client = tim_client.ApiClient(credentials)

api_client.save_json = SAVE_JSON
api_client.json_saving_folder_path = JSON_SAVING_FOLDER

[INFO] 2021-01-15 10:59:25,533 - tim_client.api_client:save_json:66 - Saving JSONs functionality has been disabled
[INFO] 2021-01-15 10:59:25,535 - tim_client.api_client:json_saving_folder_path:75 - JSON destination folder changed to logs


data = tim_client.load_dataset_from_csv_file('metro_traffic_situation_at_14_2.csv', sep=',')


data.head()


data.tail(35)


fig = plt.subplots.make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.02)  

fig.add_trace( go.Scatter( x = data.loc[:, "date_time"], y=data.loc[:, "traffic_volume"], name = "traffic_volume", line=dict(color='blue')), row=1, col=1) 

fig.update_layout(height=500, width=1000, title = 'Traffic volume')                           

fig.show()


backtest_length = int( data.shape[0] * .3 )

backtest_length

6925


configuration_backtest = {
    'usage': {                                 
        'predictionTo': { 
            'baseUnit': 'Sample',             
            'offset': 34                        # number of units we want to predict into the future (24 hours in this case)
        },
        'backtestLength': backtest_length       # number of samples that are used for backtesting (note that these samples are excluded from model building period)
    },
    'extendedOutputConfiguration': {
        'returnExtendedImportances': True       # flag that specifies if the importances of features are returned in the response
    }
}


backtest = api_client.prediction_build_model_predict(data, configuration_backtest)     # running the RTInstantML forecasting using data and defined configuration
backtest.status                                                                        # status of the job

'FinishedWithWarning'


backtest.result_explanations

[{'index': 1,
  'message': 'Predictor traffic_volume has a value missing for timestamp 2016-01-01 02:00:00.'},
 {'index': 2,
  'message': 'Predictor rain_1h contains an outlier or a structural change in its most recent records.'}]


out_of_sample_predictions = backtest.aggregated_predictions[3]['values']  # 3 will point to ouf-of-sample interval and the next day values only


out_of_sample_predictions.rename( columns = {'Prediction':'traffic_volume_pred'}, inplace=True)


out_of_sample_timestamps = out_of_sample_predictions.index.tolist()


evaluation_data = data.copy()

evaluation_data['date_time'] = pd.to_datetime(data['date_time']).dt.tz_localize('UTC')
evaluation_data = evaluation_data[ evaluation_data['date_time'].isin( out_of_sample_timestamps ) ]

evaluation_data.set_index('date_time',inplace=True)

evaluation_data = evaluation_data[ ['traffic_volume'] ]


evaluation_data = evaluation_data.join( out_of_sample_predictions )
evaluation_data.head()


simple_importances = backtest.predictors_importances['simpleImportances']
simple_importances = sorted(simple_importances, key = lambda i: i['importance'], reverse=True) 

simple_importances = pd.DataFrame.from_dict( simple_importances )

simple_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = simple_importances['predictorName'],
                      y = simple_importances['importance'] )
             )

fig.update_layout(
        title='Simple importances'
)

fig.show()


extended_importances = backtest.predictors_importances['extendedImportances']
extended_importances = sorted(extended_importances, key = lambda i: i['importance'], reverse=True) 

extended_importances = pd.DataFrame.from_dict( extended_importances )


fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '11:00:00' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '11:00:00' ]['importance'] )
             )

fig.update_layout(
        title='Predictor importances for the model used for predictions of values for 11:00',
        height = 700
)

fig.show()

fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '02:00:00' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '02:00:00' ]['importance'] )
             )

fig.update_layout(
        title='Predictor importances for the model used for predictions of values for 02:00',
        height = 700
)

fig.show()


fig = go.Figure()

fig.add_trace( go.Scatter(x=evaluation_data.index, y=evaluation_data['traffic_volume'].values, name='Actual', line_shape='linear'))

fig.add_trace( go.Scatter(x=evaluation_data.index, y=evaluation_data['traffic_volume_pred'].values, name='Predicted', line_shape='linear'))

fig.update_layout( autosize=False, width=1200, height=500 )

fig.show()


MAE = mean_absolute_error( evaluation_data['traffic_volume'].values , evaluation_data['traffic_volume_pred'].values )
MAE

228.01552092686455


RMSE = sqrt( mean_squared_error( evaluation_data['traffic_volume'].values , evaluation_data['traffic_volume_pred'].values ) )
RMSE

378.52242798669397


evaluation_data['err_pct'] = ( evaluation_data['traffic_volume'] - evaluation_data['traffic_volume_pred'] ) / evaluation_data['traffic_volume']


MAPE = abs(evaluation_data['err_pct']).mean()
MAPE

0.10241788405181448


Title:	Metro traffic forecasting
Author:	Michal Bezak, Tangent Works
Industry:	Transportation
Area:	Public transport
Type:	Forecasting


Business objective:	Optimal resource allocation
Business value:	Customer (traveler) experience, Service availability, Employee experience
KPI:	-

Column name	Description	Type	Availability
date_time	DateTime Hour of the data collected in local CST time	Timestamp column
traffic_volume	I-94 ATR 301 reported westbound traffic volume	Target	t-1
holiday	US National holidays plus regional holidays, Minnesota State Fair (0 o 1)	Predictor	t+34
temp	Average temperature in kelvin	Predictor	t+34
rain_1h	mm of rain occurred in the hour	Predictor	t+34
snow_1h	mm of snow that occurred in the hour	Predictor	t+34
clouds_all	Percentage of cloud cover	Predictor	t+34
Clear	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Clouds	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Drizzle	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Fog	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Haze	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Mist	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Rain	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Smoke	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Snow	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Squall	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34
Thunderstorm	Weather descriptor stored as binary value (0 or 1)	Predictor	t+34

	date_time	traffic_volume	holiday	temp	clouds_all	Clouds	Haze	Snow
0	2016-01-01 00:00:00	1513.0	1	265.94	90	0	1	0
1	2016-01-01 01:00:00	1550.0	1	266.00	90	0	0	1
2	2016-01-01 03:00:00	719.0	1	266.01	90	0	0	1
3	2016-01-01 04:00:00	533.0	1	264.80	90	1	0	0
4	2016-01-01 05:00:00	586.0	1	264.38	90	1	0	0

	date_time	traffic_volume	temp	rain_1h	clouds_all	Clouds	Drizzle	Rain	Thunderstorm
23049	2018-09-29 13:00:00	4553.0	279.66	0.00	90	1	0	0	0
23050	2018-09-29 14:00:00	NaN	280.31	0.00	90	1	0	0	0
23051	2018-09-29 15:00:00	NaN	280.99	0.00	90	1	0	0	0
23052	2018-09-29 16:00:00	NaN	281.41	0.00	90	0	1	0	0
23053	2018-09-29 17:00:00	NaN	281.44	0.00	90	1	0	0	0
23054	2018-09-29 18:00:00	NaN	281.02	0.00	90	1	0	0	0
23055	2018-09-29 19:00:00	NaN	280.68	0.00	90	1	0	0	0
23056	2018-09-29 20:00:00	NaN	280.55	0.00	90	1	0	0	0
23057	2018-09-29 21:00:00	NaN	280.40	0.00	90	1	0	0	0
23058	2018-09-29 22:00:00	NaN	280.54	0.00	90	1	0	0	0
23059	2018-09-29 23:00:00	NaN	280.32	0.00	90	1	0	0	0
23060	2018-09-30 00:00:00	NaN	280.30	0.00	90	1	0	0	0
23061	2018-09-30 01:00:00	NaN	280.19	0.00	90	1	0	0	0
23062	2018-09-30 02:00:00	NaN	280.07	0.00	90	1	0	0	0
23063	2018-09-30 03:00:00	NaN	280.08	0.00	90	1	0	0	0
23064	2018-09-30 04:00:00	NaN	279.88	0.00	90	1	0	0	0
23065	2018-09-30 05:00:00	NaN	279.96	0.00	90	1	0	0	0
23066	2018-09-30 06:00:00	NaN	280.17	0.00	90	1	0	0	0
23067	2018-09-30 07:00:00	NaN	280.16	0.00	90	1	0	0	0
23068	2018-09-30 08:00:00	NaN	280.28	0.00	90	1	0	0	0
23069	2018-09-30 09:00:00	NaN	280.62	0.00	90	1	0	0	0
23070	2018-09-30 10:00:00	NaN	281.38	0.00	75	1	0	0	0
23071	2018-09-30 11:00:00	NaN	282.18	0.00	90	1	0	0	0
23072	2018-09-30 12:00:00	NaN	282.69	0.00	75	1	0	0	0
23073	2018-09-30 13:00:00	NaN	283.03	0.00	90	0	0	1	0
23074	2018-09-30 14:00:00	NaN	283.48	0.00	90	0	0	1	0
23075	2018-09-30 15:00:00	NaN	283.84	0.00	75	0	0	1	0
23076	2018-09-30 16:00:00	NaN	284.38	0.00	75	0	0	1	0
23077	2018-09-30 17:00:00	NaN	284.79	0.00	75	1	0	0	0
23078	2018-09-30 18:00:00	NaN	284.20	0.25	75	0	0	1	0
23079	2018-09-30 19:00:00	NaN	283.45	0.00	75	1	0	0	0
23080	2018-09-30 20:00:00	NaN	282.76	0.00	90	1	0	0	0
23081	2018-09-30 21:00:00	NaN	282.73	0.00	90	0	0	0	1
23082	2018-09-30 22:00:00	NaN	282.09	0.00	90	1	0	0	0
23083	2018-09-30 23:00:00	NaN	282.12	0.00	90	1	0	0	0

Metro traffic forecasting¶

Description¶

Business parameters¶

Dataset¶

Sampling and gaps¶

Data¶

Forecasting situation¶

Source¶

Engine settings¶

Experiment iteration(s)¶

Insights - inspecting ML models¶

Evaluation of results¶

Summary¶

	traffic_volume	traffic_volume_pred
date_time
2017-12-15 01:00:00+00:00	490.0	491.966
2017-12-15 02:00:00+00:00	358.0	385.901
2017-12-15 03:00:00+00:00	387.0	379.254
2017-12-15 04:00:00+00:00	836.0	720.545
2017-12-15 05:00:00+00:00	2773.0	2540.960

	importance	predictorName
0	88.94	traffic_volume
1	6.13	holiday
2	2.12	clouds_all
3	1.55	Snow
4	0.80	Clouds
5	0.38	Clear
6	0.07	Rain