import logging
import pandas as pd
import plotly as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import json
import datetime

import tim_client


with open('credentials.json') as f:
    credentials_json = json.load(f)                     # loading the credentials from credentials.json

TIM_URL = 'https://timws.tangent.works/v4/api'          # URL to which the requests are sent

SAVE_JSON = False                                        # if True - JSON requests and responses are saved to JSON_SAVING_FOLDER
JSON_SAVING_FOLDER = 'logs/'                            # folder where the requests and responses are stored

LOGGING_LEVEL = 'INFO'


level = logging.getLevelName(LOGGING_LEVEL)
logging.basicConfig(level=level, format='[%(levelname)s] %(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(message)s')
logger = logging.getLogger(__name__)


credentials = tim_client.Credentials(credentials_json['license_key'], credentials_json['email'], credentials_json['password'], tim_url=TIM_URL)
api_client = tim_client.ApiClient(credentials)

api_client.save_json = SAVE_JSON
api_client.json_saving_folder_path = JSON_SAVING_FOLDER

[INFO] 2021-01-15 13:50:24,436 - tim_client.api_client:save_json:66 - Saving JSONs functionality has been disabled
[INFO] 2021-01-15 13:50:24,438 - tim_client.api_client:json_saving_folder_path:75 - JSON destination folder changed to logs


dataset1 ='data_jackets.csv'
dataset2 ='data_shorts.csv'


data = tim_client.load_dataset_from_csv_file( dataset1 , sep=',')


data.tail()


timestamp_column = 'DATE'
target_column = 'UNITS'


fig = plt.subplots.make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.02)  

fig.add_trace( go.Scatter( x = data.loc[:, timestamp_column ], y=data.loc[:, target_column ], name = target_column, line=dict(color='blue')), row=1, col=1) 

fig.update_layout(height=500, width=1000)                           

fig.show()


backtest_length = int( data.shape[0] * .3 )

backtest_length

207


configuration_backtest = {
    'usage': {                                 
        'predictionTo': { 
            'baseUnit': 'Sample',             
            'offset': 2                         # number of units we want to predict into the future (24 hours in this case)
        },
        'backtestLength': backtest_length       # number of samples that are used for backtesting (note that these samples are excluded from model building period)
    },
    'extendedOutputConfiguration': {
        'returnExtendedImportances': True       # flag that specifies if the importances of features are returned in the response
    }
}


backtest = api_client.prediction_build_model_predict(data, configuration_backtest)     # running the RTInstantML forecasting using data and defined configuration
backtest.status                                                                        # status of the job

'FinishedWithWarning'


backtest.result_explanations

[{'index': 1,
  'message': 'Predictor UNITS has a value missing for timestamp 2019-05-01 00:00:00.'}]


out_of_sample_values = backtest.aggregated_predictions[1]['values']
out_of_sample_values.rename( columns = {'Prediction': target_column+'_pred'}, inplace=True)
out_of_sample_timestamps = out_of_sample_values.index.tolist()


evaluation_data = data.copy()

evaluation_data[ timestamp_column ] = pd.to_datetime(data[ timestamp_column ]).dt.tz_localize('UTC')
evaluation_data = evaluation_data[ evaluation_data[ timestamp_column ].isin( out_of_sample_timestamps ) ]

evaluation_data.set_index( timestamp_column, inplace=True)

evaluation_data = evaluation_data[ [ target_column ] ].join( out_of_sample_values )

evaluation_data.head()


simple_importances = backtest.predictors_importances['simpleImportances']
simple_importances = sorted(simple_importances, key = lambda i: i['importance'], reverse=True) 

simple_importances = pd.DataFrame.from_dict( simple_importances )

simple_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = simple_importances['predictorName'],
                      y = simple_importances['importance'] )
             )

fig.update_layout(
        title='Simple importances'
)

fig.show()


extended_importances = backtest.predictors_importances['extendedImportances']
extended_importances = sorted(extended_importances, key = lambda i: i['importance'], reverse=True) 

extended_importances = pd.DataFrame.from_dict( extended_importances )

extended_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[1]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[1]' ]['importance'] )
             )

fig.update_layout(
        title='Importances for the 1st day of prediction horizon'
)

fig.show()

fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[2]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[2]' ]['importance'] )
             )

fig.update_layout(
        title='Importances for the 2nd day of prediction horizon'
)

fig.show()


fig = go.Figure()

fig.add_trace( go.Scatter(x=evaluation_data.index, y=evaluation_data[ target_column ].values, name='Actual', line_shape='linear'))

fig.add_trace( go.Scatter(x=evaluation_data.index, y=evaluation_data[ target_column+'_pred'].values, name='Predicted', line_shape='linear'))

fig.update_layout( autosize=False, width=1200, height=500 )

fig.show()


backtest.aggregated_predictions[1]['accuracyMetricsForHorizons']['S+2']

{'MAE': 720.6999662587592,
 'MSE': 1365283.018892031,
 'MAPE': 230.38067882398673,
 'RMSE': 1168.4532591815691}


data = tim_client.load_dataset_from_csv_file( dataset2 , sep=',')


fig = plt.subplots.make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.02)  

fig.add_trace( go.Scatter( x = data.loc[:, timestamp_column ], y=data.loc[:, target_column ], name = target_column, line=dict(color='blue')), row=1, col=1) 

fig.update_layout(height=500, width=1000)                           

fig.show()


backtest_length

207


configuration_backtest

{'usage': {'predictionTo': {'baseUnit': 'Sample', 'offset': 2},
  'backtestLength': 207},
 'extendedOutputConfiguration': {'returnExtendedImportances': True}}


backtest = api_client.prediction_build_model_predict(data, configuration_backtest) 
backtest.status

'FinishedWithWarning'


backtest.result_explanations

[{'index': 1,
  'message': 'Predictor UNITS has a value missing for timestamp 2019-05-01 00:00:00.'}]


out_of_sample_values = backtest.aggregated_predictions[1]['values']
out_of_sample_values.rename( columns = {'Prediction': target_column+'_pred'}, inplace=True)
out_of_sample_timestamps = out_of_sample_values.index.tolist()


evaluation_data = data.copy()

evaluation_data[ timestamp_column ] = pd.to_datetime(data[ timestamp_column ]).dt.tz_localize('UTC')
evaluation_data = evaluation_data[ evaluation_data[ timestamp_column ].isin( out_of_sample_timestamps ) ]

evaluation_data.set_index( timestamp_column, inplace=True)

evaluation_data = evaluation_data[ [ target_column ] ].join( out_of_sample_values )

evaluation_data.head()


simple_importances = backtest.predictors_importances['simpleImportances']
simple_importances = sorted(simple_importances, key = lambda i: i['importance'], reverse=True) 

simple_importances = pd.DataFrame.from_dict( simple_importances )

simple_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = simple_importances['predictorName'],
                      y = simple_importances['importance'] )
             )

fig.update_layout(
        title='Simple importances'
)

fig.show()


fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[1]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[1]' ]['importance'] )
             )

fig.update_layout(
        title='Importances for the 1st day of prediction horizon'
)

fig.show()

fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[2]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[2]' ]['importance'] )
             )

fig.update_layout(
        title='Importances for the 2nd day of prediction horizon'
)

fig.show()


fig = go.Figure()

fig.add_trace( go.Scatter(x=evaluation_data.index, y=evaluation_data[ target_column ].values, name='Actual', line_shape='linear'))

fig.add_trace( go.Scatter(x=evaluation_data.index, y=evaluation_data[ target_column+'_pred'].values, name='Predicted', line_shape='linear'))

fig.update_layout( autosize=False, width=1200, height=500 )

fig.show()


backtest.aggregated_predictions[1]['accuracyMetricsForHorizons']['S+2']

{'MAE': 1422.5082361228722,
 'MSE': 3534266.9658089527,
 'MAPE': 342.0953718480556,
 'RMSE': 1879.9646182332667}


Title:	Demand forecasting in retail
Author:	Michal Bezak, Tangent Works
Industry:	Retail
Area:	Demand
Type:	Forecasting
Use Case Library:	Open Use Case Library


Business objective:	Availability of product(s) in demand
Value:	Revenue maximization
KPI:	Revenue per given product in given timeframe


Business objective:	Minimize turnaround time of items on stock
Value:	Maximize value of warehouse
KPI:	Average turnaround time for given product

Column name	Description	Type	Availability
DATE	Date	Timestamp column
UNITS	Units sold	Target	t-1
Min_TEMP_C	Min. temperature for particular day	Predictor	t+2
Max_TEMP_C	Max. temperature for particular day	Predictor	t+2
Clear	Indicator of weather - clear sky	Predictor	t+2
Clouds	Indicator of weather - clouds	Predictor	t+2
Drizzle	Indicator of weather - drizzle	Predictor	t+2
Fog	Indicator of weather - fog	Predictor	t+2
Mist	Indicator of weather - mist	Predictor	t+2
Rain	Indicator of weather - rain	Predictor	t+2
Snow	Indicator of weather - snow	Predictor	t+2
Thunderstorm	Indicator of weather - thunderstorm	Predictor	t+2
Christmas	Christmas period indicator (binary)	Predictor	t+2
Black Friday	Black Friday period indicator (binary)	Predictor	t+2
New Year	New year period indicator (binary)	Predictor	t+2
Peak Period	The most active period indicator (binary)	Predictor	t+2
Lockdown	Indicator of lockdown (binary)	Predictor	t+2

	DATE	UNITS	Min_TEMP_C	Max_TEMP_C	Clear	Clouds	Rain
688	2020-10-01	1579.0	12.491094	22.814398	36	367	54
689	2020-10-02	2284.0	12.933676	18.963764	14	229	214
690	2020-10-03	2326.0	12.688293	21.127352	41	349	67
691	2020-10-04	NaN	13.179190	25.712166	298	107	52
692	2020-10-05	NaN	15.018446	24.809737	36	384	37

Demand forecasting in retail¶

Description¶

Business parameters¶

Datasets¶

Sampling and gaps¶

Data¶

Forecasting situation¶

Product: Jackets¶

Engine settings¶

Experiment iteration¶

Insights - inspecting ML models¶

Evaluation of results¶

Product: Shorts¶

Engine settings¶

Experiment iteration¶

Insights - inspecting ML models¶

Evaluation of results¶

Summary¶

	UNITS	UNITS_pred
DATE
2020-03-11 00:00:00+00:00	1199.0	1256.67
2020-03-12 00:00:00+00:00	1404.0	1290.55
2020-03-13 00:00:00+00:00	2724.0	2179.27
2020-03-14 00:00:00+00:00	3086.0	3069.69
2020-03-15 00:00:00+00:00	1261.0	1404.71

	time	type	termName	importance
0	[1]	Interaction	UNITS(t-1) & DoW(t) ≤ Sat	44.41
1	[1]	TargetAndTargetTransformation	UNITS(t-1)	38.12
2	[2]	TargetAndTargetTransformation	UNITS(t-7)	25.75
3	[2]	Interaction	UNITS(t-2) & DoW(t) ≤ Sat	14.61
4	[2]	Interaction	UNITS(t-2) & sin(2πt / 168.0 hours)	11.66
5	[2]	Interaction	UNITS(t-2) & cos(2πt / 168.0 hours)	11.33
6	[2]	Interaction	UNITS(t-2) & DoW(t) ≤ Mon	8.68
7	[2]	TargetAndTargetTransformation	UNITS(t-2)	8.06
8	[1]	Interaction	UNITS(t-1) & DoW(t) ≤ Thu	7.51
9	[2]	Predictor	Max_TEMP_C	6.98
10	[2]	Predictor	Black Friday	6.43
11	[1]	Predictor	Black Friday	5.31
12	[1]	Interaction	UNITS(t-1) & (Max_TEMP_C(t) - 19.99)⁺	4.66
13	[2]	Interaction	sin(2πt / 168.0 hours) & Fog	4.06
14	[2]	Interaction	UNITS(t-7) & UNITS(t-2)	2.43
15	[1]	TargetAndTargetTransformation	Intercept	0.00
16	[2]	TargetAndTargetTransformation	Intercept	0.00

	UNITS	UNITS_pred
DATE
2020-03-11 00:00:00+00:00	2189.0	4496.91
2020-03-12 00:00:00+00:00	2223.0	2454.35
2020-03-13 00:00:00+00:00	3175.0	4708.24
2020-03-14 00:00:00+00:00	4777.0	5927.58
2020-03-15 00:00:00+00:00	2875.0	3076.07

	importance	predictorName
0	55.16	UNITS
1	20.42	Peak Period
2	7.83	Black Friday
3	7.61	Christmas
4	7.07	Max_TEMP_C
5	1.39	Clouds
6	0.52	Min_TEMP_C