import logging

import pandas as pd
import numpy as np

import plotly.graph_objects as go

import json
import datetime
import random
import time

from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

import tim_client


with open('credentials.json') as f:
    credentials_json = json.load(f)                     # loading the credentials from credentials.json

TIM_URL = 'https://timws.tangent.works/v4/api'          # URL to which the requests are sent

SAVE_JSON = False                                       # if True - JSON requests and responses are saved to JSON_SAVING_FOLDER
JSON_SAVING_FOLDER = 'logs/'                            # folder where the requests and responses are stored

LOGGING_LEVEL = 'INFO'


level = logging.getLevelName(LOGGING_LEVEL)
logging.basicConfig(level=level, format='[%(levelname)s] %(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(message)s')
logger = logging.getLogger(__name__)

credentials = tim_client.Credentials(credentials_json['license_key'], credentials_json['email'], credentials_json['password'], tim_url=TIM_URL)
api_client = tim_client.ApiClient(credentials)


def read_datasets( path_to_dir ):
    datasets = dict()
    for f in os.listdir( path_to_dir ):
        store_id_ = int( f.replace('.csv','') )        
        df_ = pd.read_csv( os.path.join( path_to_dir, f ) )
        datasets[ store_id_ ] = df_
    
    return datasets


datasets = read_datasets( 'data_synthetic_2' )


stores = list( datasets.keys() )

len(stores)

50


datasets[ stores[0] ]


sample_store_data = datasets[ stores[0] ].iloc[:7*12]

fig = go.Figure()   
fig.add_trace(go.Scatter( x = sample_store_data.index, y = sample_store_data['Sales'], name=stores[0] ) )
fig.update_layout( height = 700, width = 1200, title='Sales' )
fig.show()


target_column = 'Sales'

timestamp_column = 'Date'

prediction_horizon = 48


# MAPE metric
def mean_absolute_percent_error( actual, predicted ):
    # exclude 0 values
    temp = [ (x[ 0 ], x[ 1 ]) for x in zip(actual ,predicted) if x[ 0 ]!=0 ]
    actual, predicted = zip( *temp )    
    actual, predicted = np.array( actual ), np.array( predicted )

    mape = np.mean( abs( actual - predicted ) / actual )

    return mape

# RMSPE metric
def root_mean_square_percent_error(actual, predicted):
    # exclude 0 values
    temp = [ (x[ 0 ], x[ 1 ]) for x in zip(actual ,predicted) if x[ 0 ]!=0 ]
    actual, predicted = zip( *temp )    
    actual, predicted = np.array(actual), np.array(predicted)
    
    rmspe = np.sqrt( np.mean( np.square( ( (actual - predicted) / actual) ), axis=0) )

    return rmspe

# Return all metrics for given evaluation dataframe
def extract_metrics( eval_data, target_column ):
    if target_column not in eval_data.columns:
        raise ValueError( 'Missing %c in evaluation data' % ( target_column ) )
    if target_column+'_pred' not in eval_data.columns:
        raise ValueError( 'Missing %c in evaluation data' % ( target_column+'_pred' ) )

    y = eval_data[ target_column ]
    y_pred = eval_data[ target_column+'_pred']

    rmse = np.sqrt( mean_squared_error( y, y_pred ) )
    mae = mean_absolute_error( y, y_pred )
    mape = mean_absolute_percent_error( y, y_pred )
    rmspe = root_mean_square_percent_error(y, y_pred )

    return { 'RMSE': rmse, 'MAE': mae, 'MAPE': mape, 'RMSPE': rmspe }

def make_report( results ):
    return pd.DataFrame.from_dict( results, orient='index').describe()


def extract_eval_data_from_tim_result( prediction, data, target_column ):
    prediction['Date'] = pd.to_datetime( prediction.index.date )
    prediction.reset_index( drop=True, inplace=True )
    prediction = prediction[ ['Date','Prediction'] ]
   
    data['Date'] = pd.to_datetime( data['Date'] )
    filter_actual_ = data['Date'].isin( prediction['Date'] )
    actual_values = data[filter_actual_]

    actual_values = actual_values[ ['Date',target_column] ] 
    result = prediction.merge(actual_values, on='Date')     
    result = result.rename( columns={'Prediction': target_column+'_pred'} )

    return result


def extract_explanations_from_tim_result( predictors_importances ):
    simple_importances = None
    if 'simpleImportances' in predictors_importances.keys():
        simple_importances = pd.DataFrame.from_dict( predictors_importances['simpleImportances'], orient='columns' )

    extended_importances = None
    if 'extendedImportances' in predictors_importances.keys():
        extended_importances_temp = predictors_importances['extendedImportances']
        extended_importances_temp = sorted( extended_importances_temp, key = lambda i: i['importance'], reverse=True ) 
        extended_importances = pd.DataFrame.from_dict( extended_importances_temp )

    return { 'simple': simple_importances, 'extended': extended_importances }


def prepare_data( data, timestamp_column, target_column, prediction_horizon ):
    data_for_prediction = data.copy()
    data_for_prediction.iloc[ -prediction_horizon:, [1] ] = np.nan 

    return data_for_prediction


tim_engine_configuration = {   'usage': {                                 
                                'predictionTo': { 
                                    'baseUnit': 'Sample',              
                                    'offset': prediction_horizon
                                },
                                'backtestLength': 0
                                },
                                'extendedOutputConfiguration': {
                                    'returnExtendedImportances': True
                                }
                            }


def build_model_predict( data_for_prediction, data, engine_settings ):      
    backtest = api_client.prediction_build_model_predict( data_for_prediction, engine_settings )  

    print( 'Status: ', backtest.status )
    print( 'Result explanations: ', backtest.result_explanations )
    print( '---------------' )

    eval_data = extract_eval_data_from_tim_result( backtest.prediction, data, target_column )   
    metrics = extract_metrics( eval_data, target_column )    
    explanations = extract_explanations_from_tim_result( backtest.predictors_importances )

    return eval_data, metrics, explanations


result_eval_data, result_metrics, result_explanations, result_durations = dict(), dict(), dict(), dict()


progress = 1


for store_id in stores:
    if store_id in list( result_metrics.keys() ): continue 

    progress_pct = '{0:.1f}%'.format( progress / len( stores ) * 100)
    print( 'Starting store:', store_id ) 

    start_time_ = time.time()

    d = datasets[ store_id ].copy()

    data_for_tim = prepare_data( d, timestamp_column, target_column, prediction_horizon )

    e_d_, m_, e_ = build_model_predict( data_for_tim, d, tim_engine_configuration )    
    
    result_eval_data[ store_id ] = e_d_
    result_metrics[ store_id ] = m_
    result_explanations[ store_id ] = e_
    result_durations[ store_id ] = time.time() - start_time_

    print( 'Progress:', progress_pct )

    progress += 1

Starting store: 4
Status:  Finished
Result explanations:  []
---------------
Progress: 2.0%
Starting store: 37
Status:  Finished
Result explanations:  []
---------------
Progress: 4.0%
Starting store: 10
Status:  Finished
Result explanations:  []
---------------
Progress: 6.0%
Starting store: 34
Status:  Finished
Result explanations:  []
---------------
Progress: 8.0%
Starting store: 12
Status:  Finished
Result explanations:  []
---------------
Progress: 10.0%
Starting store: 24
Status:  Finished
Result explanations:  []
---------------
Progress: 12.0%
Starting store: 3
Status:  Finished
Result explanations:  []
---------------
Progress: 14.0%
Starting store: 45
Status:  Finished
Result explanations:  []
---------------
Progress: 16.0%
Starting store: 18
Status:  Finished
Result explanations:  []
---------------
Progress: 18.0%
Starting store: 26
Status:  Finished
Result explanations:  []
---------------
Progress: 20.0%
Starting store: 6
Status:  Finished
Result explanations:  []
---------------
Progress: 22.0%
Starting store: 29
Status:  Finished
Result explanations:  []
---------------
Progress: 24.0%
Starting store: 8
Status:  Finished
Result explanations:  []
---------------
Progress: 26.0%
Starting store: 17
Status:  Finished
Result explanations:  []
---------------
Progress: 28.0%
Starting store: 20
Status:  Finished
Result explanations:  []
---------------
Progress: 30.0%
Starting store: 19
Status:  Finished
Result explanations:  []
---------------
Progress: 32.0%
Starting store: 0
Status:  Finished
Result explanations:  []
---------------
Progress: 34.0%
Starting store: 38
Status:  Finished
Result explanations:  []
---------------
Progress: 36.0%
Starting store: 13
Status:  Finished
Result explanations:  []
---------------
Progress: 38.0%
Starting store: 21
Status:  Finished
Result explanations:  []
---------------
Progress: 40.0%
Starting store: 39
Status:  Finished
Result explanations:  []
---------------
Progress: 42.0%
Starting store: 15
Status:  Finished
Result explanations:  []
---------------
Progress: 44.0%
Starting store: 5
Status:  Finished
Result explanations:  []
---------------
Progress: 46.0%
Starting store: 32
Status:  Finished
Result explanations:  []
---------------
Progress: 48.0%
Starting store: 48
Status:  Finished
Result explanations:  []
---------------
Progress: 50.0%
Starting store: 40
Status:  Finished
Result explanations:  []
---------------
Progress: 52.0%
Starting store: 46
Status:  Finished
Result explanations:  []
---------------
Progress: 54.0%
Starting store: 35
Status:  Finished
Result explanations:  []
---------------
Progress: 56.0%
Starting store: 41
Status:  Finished
Result explanations:  []
---------------
Progress: 58.0%
Starting store: 27
Status:  Finished
Result explanations:  []
---------------
Progress: 60.0%
Starting store: 33
Status:  Finished
Result explanations:  []
---------------
Progress: 62.0%
Starting store: 1
Status:  Finished
Result explanations:  []
---------------
Progress: 64.0%
Starting store: 42
Status:  Finished
Result explanations:  []
---------------
Progress: 66.0%
Starting store: 44
Status:  Finished
Result explanations:  []
---------------
Progress: 68.0%
Starting store: 47
Status:  Finished
Result explanations:  []
---------------
Progress: 70.0%
Starting store: 30
Status:  Finished
Result explanations:  []
---------------
Progress: 72.0%
Starting store: 23
Status:  Finished
Result explanations:  []
---------------
Progress: 74.0%
Starting store: 31
Status:  Finished
Result explanations:  []
---------------
Progress: 76.0%
Starting store: 22
Status:  Finished
Result explanations:  []
---------------
Progress: 78.0%
Starting store: 2
Status:  Finished
Result explanations:  []
---------------
Progress: 80.0%
Starting store: 43
Status:  Finished
Result explanations:  []
---------------
Progress: 82.0%
Starting store: 49
Status:  Finished
Result explanations:  []
---------------
Progress: 84.0%
Starting store: 28
Status:  Finished
Result explanations:  []
---------------
Progress: 86.0%
Starting store: 7
Status:  Finished
Result explanations:  []
---------------
Progress: 88.0%
Starting store: 16
Status:  Finished
Result explanations:  []
---------------
Progress: 90.0%
Starting store: 14
Status:  Finished
Result explanations:  []
---------------
Progress: 92.0%
Starting store: 9
Status:  Finished
Result explanations:  []
---------------
Progress: 94.0%
Starting store: 25
Status:  Finished
Result explanations:  []
---------------
Progress: 96.0%
Starting store: 36
Status:  Finished
Result explanations:  []
---------------
Progress: 98.0%
Starting store: 11
Status:  Finished
Result explanations:  []
---------------
Progress: 100.0%


results = { 'eval_data': result_eval_data, 'metrics':result_metrics, 'explanations':result_explanations, 'durations':result_durations }


sample_store = stores[0]


focus_df = results['eval_data'][ sample_store ]

fig = go.Figure()

x_axis = focus_df[ timestamp_column ]
   
fig.add_trace(go.Scatter( x = x_axis, y = focus_df[ target_column ], name=target_column ) )
fig.add_trace(go.Scatter( x = x_axis, y = focus_df[ target_column+'_pred'], name=target_column+'_pred' ) )

fig.update_layout( height = 700, width = 1200, title='Actual vs. predicted' )

fig.show()


results['metrics'][ sample_store ]

{'RMSE': 2283.9537791175253,
 'MAE': 1837.9248032864377,
 'MAPE': 0.11193601807691991,
 'RMSPE': 0.1301323412496775}


simple_importances = results['explanations'][ sample_store ]['simple']


fig = go.Figure()

fig.add_trace(go.Bar( x = simple_importances['predictorName'],
                      y = simple_importances['importance'] )
             )

fig.update_layout(
        title='Simple importances'
)

fig.show()


extended_importances = results['explanations'][ sample_store ]['extended']


fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[1]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[1]' ]['importance'] )
             )

fig.update_layout(
        title='Importances for the model used for predictions of the 1st point in prediction horizon',
        height = 700,
        width = 1000
)

fig.show()

fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[6]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[6]' ]['importance'] )
             )

fig.update_layout(
        title='Importances for the model used for predictions of the 6th point in prediction horizon',
        height = 700,
        width = 1000
)

fig.show()


make_report( results['metrics'] )


residuals = list()
for k, ed in results['eval_data'].items():
    ed['err'] = ed[ target_column ] - ed[ target_column+'_pred' ]
    residuals.extend( ed['err'].values )

fig = go.Figure()
fig.add_trace( go.Histogram( x=residuals ) )
fig.update_layout( height = 700, width = 900, title='Residuals combined')
fig.show()


make_report( results['durations'] )


Title:	Store Sales Forecasting
Author:	Michal Bezak, Tangent Works
Industry:	Retail
Area:	Sales
Type:	Forecasting


Business objective:	Cashflow/capital management
Business value:	More timely and informed decisions
KPI:	Various, e.g. cost of unused (free) cash, cost of sub-optimal order management


Business objective:	Pro-active coordination of marketing activities
Business value:	Improve sales
KPI:	Impact of marketing campaign on sales


Business objective:	Efficient operations
Business value:	Optimal staffing, Inventory management, Warehouse utilization
KPI:	Various, e.g. cost of under/over staffing, impact of delayed orders of certain categories of goods, cost of space in warehouse etc.

Column name	Description	Type	Availability
Timestamp	Timestamp	Timestamp column
Sales	The turnover per each day	Target	t+0
IsOpen	Binary value indicating if store was open (1) or closed (0)	Predictor	t+48
IsCampaign	Indicating if marketing campaign was running, binary values 0 or 1	Predictor	t+48
IsHoliday	Binary indicator for holiday (1 is holiday, 0 for common days)	Predictor	t+48

Store Sales Forecasting¶

Description¶

Business parameters¶

Dataset¶

Sampling¶

Data¶

Forecasting situations¶

Source¶

Engine settings¶

Forecasts¶

Evaluation of results per one (sample) store¶

Inspecting ML models internals¶

Consolidated report¶

Accuracy metrics¶

Residuals¶

Duration¶

Summary¶

	Date	Sales	IsHoliday	IsOpen	IsCampaign
0	2012-11-15	17457.803034	0	1	0
1	2012-11-16	16190.353214	0	1	0
2	2012-11-17	21790.792059	0	1	0
3	2012-11-18	0.000000	0	0	0
4	2012-11-19	27599.119245	0	1	0
...	...	...	...	...	...
1091	2015-11-11	24701.488646	0	1	0
1092	2015-11-12	18658.955443	0	1	0
1093	2015-11-13	15373.153422	0	1	0
1094	2015-11-14	16241.869082	0	1	0
1095	2015-11-15	0.000000	0	0	0

	RMSE	MAE	MAPE	RMSPE
count	50.000000	50.000000	50.000000	50.000000
mean	1043.575772	799.979135	0.084623	0.102138
std	962.818279	740.563755	0.016305	0.020395
min	242.793187	189.152381	0.062741	0.072441
25%	308.316449	229.002390	0.071893	0.087856
50%	389.944103	304.977412	0.081462	0.099716
75%	2167.450616	1618.720878	0.095476	0.115179
max	2659.313838	2141.008241	0.132118	0.156156