import logging
import pandas as pd
import plotly as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import json
import datetime
import copy
import math

import tim_client


with open('credentials.json') as f:
    credentials_json = json.load(f)                     # loading the credentials from credentials.json

TIM_URL = 'https://timws.tangent.works/v4/api'          # URL to which the requests are sent

SAVE_JSON = False                                       # if True - JSON requests and responses are saved to JSON_SAVING_FOLDER
JSON_SAVING_FOLDER = 'logs/'                            # folder where the requests and responses are stored

LOGGING_LEVEL = 'INFO'


level = logging.getLevelName(LOGGING_LEVEL)
logging.basicConfig(level=level, format='[%(levelname)s] %(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(message)s')
logger = logging.getLogger(__name__)


credentials = tim_client.Credentials(credentials_json['license_key'], credentials_json['email'], credentials_json['password'], tim_url=TIM_URL)
api_client = tim_client.ApiClient(credentials)

api_client.save_json = SAVE_JSON
api_client.json_saving_folder_path = JSON_SAVING_FOLDER

[INFO] 2021-01-15 11:56:53,486 - tim_client.api_client:save_json:66 - Saving JSONs functionality has been disabled
[INFO] 2021-01-15 11:56:53,489 - tim_client.api_client:json_saving_folder_path:75 - JSON destination folder changed to logs


data = tim_client.load_dataset_from_csv_file('data.csv', sep=',')


data.tail(7)


data.shape

(49735, 4)


data_for_chart_event = data['Event'].apply( lambda x: None if x==0 else x )*40


fig = plt.subplots.make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.02)  

fig.add_trace(go.Scatter(x = data.loc[:, "TS"], y=data.loc[:, "Volume"], name = "Volume", line=dict(color='blue')), row=1, col=1) 

fig.add_trace(go.Scatter(x = data.loc[:, "TS"], y=data_for_chart_event, name = "Event",  line=dict(color='red')), row=1, col=1) 

fig.update_layout(height=500, width=1000)                           

fig.show()


backtest_length = int( data.shape[0] * .3 )

backtest_length

14920


engine_settings = {
    'usage': {                                 
        'predictionTo': { 
            'baseUnit': 'Sample',              
            'offset': 6                        # predict next 30 minutes
        },
        'backtestLength': backtest_length     
    },
    'extendedOutputConfiguration': {
        'returnExtendedImportances': True      # flag that specifies if the importances of features are returned in the response
    },
    'interpolation':{
        'maxLength': 24*3,
        'type': 'Linear'
    }
}


backtest = api_client.prediction_build_model_predict(data, engine_settings)

backtest.status

'FinishedWithWarning'


backtest.result_explanations

[{'index': 1,
  'message': 'Predictor Volume has a value missing for timestamp 2005-04-15 10:20:00.'}]


out_of_sample_timestamps = backtest.aggregated_predictions[1]['values'].index.tolist()

out_of_sample_predictions = pd.DataFrame.from_dict( backtest.aggregated_predictions[1]['values'] )


out_of_sample_predictions['Volume_pred'] = out_of_sample_predictions['Prediction']

out_of_sample_predictions = out_of_sample_predictions[ ['Volume_pred'] ]


evaluation_data = data.copy()

evaluation_data['TS'] = pd.to_datetime(data['TS']).dt.tz_localize('UTC')
evaluation_data = evaluation_data[ evaluation_data['TS'].isin( out_of_sample_timestamps ) ]

evaluation_data.set_index('TS',inplace=True)

evaluation_data = evaluation_data[ ['Volume'] ]


evaluation_data = evaluation_data.join( out_of_sample_predictions )


evaluation_data.head()


simple_importances = backtest.predictors_importances['simpleImportances']
simple_importances = sorted(simple_importances, key = lambda i: i['importance'], reverse=True) 

simple_importances = pd.DataFrame.from_dict( simple_importances )

simple_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = simple_importances['predictorName'],
                      y = simple_importances['importance'] )
             )

fig.update_layout(
        title='Simple importances'
)

fig.show()


extended_importances = backtest.predictors_importances['extendedImportances']
extended_importances = sorted(extended_importances, key = lambda i: i['importance'], reverse=True) 

extended_importances = pd.DataFrame.from_dict( extended_importances )

extended_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[1]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[1]' ]['importance'] )
             )

fig.update_layout(
        title='Importances for the model used for predictions of the 1st point in prediction horizon',
        height = 700,
        width = 1000
)

fig.show()

fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[6]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[6]' ]['importance'] )
             )

fig.update_layout(
        title='Importances for the model used for predictions of the 6th point in prediction horizon',
        height = 700,
        width = 1000
)

fig.show()


evaluation_data['err'] = evaluation_data['Volume'] - evaluation_data['Volume_pred'] 
evaluation_data['err_squared'] = evaluation_data['err']**2


MAE = abs(evaluation_data['err']).mean()
RMSE = math.sqrt( evaluation_data['err_squared'].mean() )

MAE

4.332961734396008


RMSE

5.878749520817067


fig = plt.subplots.make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.02)  

fig.add_trace(go.Scatter(x = evaluation_data.index, y=evaluation_data.loc[:, "Volume"], name = "Actual", line=dict(color='blue')), row=1, col=1) 

fig.add_trace(go.Scatter(x = evaluation_data.index, y=evaluation_data.loc[:, "Volume_pred"], name = "Predicted",  line=dict(color='red')), row=1, col=1) 

fig.update_layout(height=500, width=1000)                           

fig.show()


Title:	Predicting car traffic for on-ramp in LA
Author:	Michal Bezak - Tangent Works
Industry:	Transportation, Smart cities and infrastructure
Area:	Traffic management
Type:	Forecasting


Business objective:	Cut time required to deliver goods in certain area
Business value:	Higher utilization of vans (measured by goods delivered in given time-frame); Shorter time of delivery
KPI:	-

Column name	Description	Type	Availability
TS	Timestamp	Timestamp column
volume	Number of cars measured for the previous five minutes	Target	t-1
event	Binary value indicating whether match was played at nearby stadium	Predictor	t+6

	TS	Volume
49728	2005-09-30 23:35:00	15.0
49729	2005-09-30 23:40:00	NaN
49730	2005-09-30 23:45:00	NaN
49731	2005-09-30 23:50:00	NaN
49732	2005-09-30 23:55:00	NaN
49733	2005-10-01 00:00:00	NaN
49734	2005-10-01 00:05:00	NaN

	Volume	Volume_pred
TS
2005-08-10 04:20:00+00:00	3.0	1.81030
2005-08-10 04:25:00+00:00	2.0	1.97560
2005-08-10 04:30:00+00:00	5.0	2.69418
2005-08-10 04:35:00+00:00	6.0	3.32686
2005-08-10 04:40:00+00:00	5.0	4.46732

Predicting car traffic for on-ramp in LA¶

Description¶

Business parameters¶

Dataset¶

Sampling and gaps¶

Data¶

Forecasting situations¶

Source¶

Engine settings¶

Experiment iteration(s)¶

Inspecting ML models internals¶

Evaluation of results¶

Summary¶

	time	type	termName	importance
0	[2]	TargetAndTargetTransformation	Volume(t-2)	29.01
1	[3]	TargetAndTargetTransformation	Volume(t-3)	28.65
2	[6]	TargetAndTargetTransformation	Volume(t-6)	28.47
3	[4]	TargetAndTargetTransformation	Volume(t-4)	28.17
4	[1]	TargetAndTargetTransformation	Volume(t-1)	27.66
...	...	...	...	...
115	[1]	Interaction	DoW(t-36) ≤ Sat & Volume(t-5)	1.70
116	[2]	Calendar	DoW(t-276) ≤ Thu	1.65
117	[2]	TargetAndTargetTransformation	Volume(t-5)	1.64
118	[1]	Calendar	DoW(t-276) ≤ Thu	1.54
119	[1]	Interaction	sin(2πt / 4.0 hours) & DoW(t-48) ≤ Fri	1.52

	importance	predictorName
0	83.07	Volume
1	11.20	Event
2	5.73	Holiday