import logging
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import json
import datetime

from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

import tim_client


with open('credentials.json') as f:
    credentials_json = json.load(f)                     # loading the credentials from credentials.json

TIM_URL = 'https://timws.tangent.works/v4/api'          # URL to which the requests are sent

SAVE_JSON = False                                       # if True - JSON requests and responses are saved to JSON_SAVING_FOLDER
JSON_SAVING_FOLDER = 'logs/'                            # folder where the requests and responses are stored

LOGGING_LEVEL = 'INFO'


level = logging.getLevelName(LOGGING_LEVEL)
logging.basicConfig(level=level, format='[%(levelname)s] %(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(message)s')
logger = logging.getLogger(__name__)


credentials = tim_client.Credentials(credentials_json['license_key'], credentials_json['email'], credentials_json['password'], tim_url=TIM_URL)
api_client = tim_client.ApiClient(credentials)


results = dict()


datafiles ={
    'RW10': 'datasets/battery_discharge_r_RW10.csv',
    'RW25': 'datasets/battery_discharge_r_RW25.csv',
    'RW21': 'datasets/battery_discharge_r_RW21.csv',
    'merged': 'datasets/merged_r_RW10_RW25_RW21.csv',
}


results = dict()


# NOTE: start re-run of each iteration by changing key to CSV file
FOCUS = 'RW10'


data = tim_client.load_dataset_from_csv_file( datafiles.get(FOCUS), sep=',' )

data.tail()


data.shape

(20289, 8)


prediction_horizon = 1


target_column    = 'remaining_time_in_cycle'

timestamp_column = 'timestamp'


def apply_regular_timeline( df, sec, timestamp_column ):
    vis_df = df.copy()

    vis_df[ timestamp_column ] = pd.to_datetime( vis_df[ timestamp_column ] )
    vis_df.set_index( timestamp_column, inplace=True )

    timestamps = [ vis_df.index.min() + i * datetime.timedelta( seconds=sec ) for i in range( int( ( vis_df.index.max() - vis_df.index.min() ).total_seconds()/sec ) + 1 ) ]

    temp_df = pd.DataFrame( { timestamp_column: timestamps } )
    temp_df.set_index( timestamp_column, inplace=True )

    vis_df = temp_df.join( vis_df )
    vis_df[ timestamp_column ] = vis_df.index

    return vis_df.reset_index( drop=True )


last_n = 10000

vis_df = apply_regular_timeline( data.iloc[-last_n:], 5, timestamp_column )


fig = go.Figure()

fig.add_trace(go.Scatter( x = vis_df[ timestamp_column ], y = vis_df[ target_column ], name=target_column ) )

fig.update_layout( height = 700, width = 1200, title='Target visualization (last '+str(last_n)+' records)' )

fig.show()


backtest_length = int( data.shape[0] * .3 ) 

backtest_length

6086


configuration_backtest = {
    'usage': {                                 
        'predictionTo': { 
            'baseUnit': 'Sample',             
            'offset': prediction_horizon
        }, 
        'modelQuality':  [ {'day':0, 'quality':'Medium'} ],
        'backtestLength': backtest_length   
    }, 
    #'allowOffsets': False,
    'extendedOutputConfiguration': {
        'returnExtendedImportances': True,
    }
}


backtest = api_client.prediction_build_model_predict( data, configuration_backtest )  

backtest.status

'FinishedWithWarning'


backtest.result_explanations

[{'index': 1,
  'message': 'Predictor remaining_time_in_cycle has a value missing for timestamp 2014-05-29 09:31:55.'},
 {'index': 2,
  'message': 'There are 495 gaps with median length 39.0 in your target. Consider changing the imputation length to enable more transformations.'}]


simple_importances = pd.DataFrame.from_dict( backtest.predictors_importances['simpleImportances'], orient='columns' )

simple_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = simple_importances['predictorName'],
                      y = simple_importances['importance'] ) )

fig.update_layout( width = 1200, height = 700, title='Simple importances' )

fig.show()


extended_importances_temp = backtest.predictors_importances['extendedImportances']
extended_importances_temp = sorted( extended_importances_temp, key = lambda i: i['importance'], reverse=True ) 
extended_importances = pd.DataFrame.from_dict( extended_importances_temp )

extended_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[1]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[1]' ]['importance'] ) )

fig.update_layout(
        title='Model features sorted by importance',
        width = 1200,
        height = 700
)

fig.show()


def build_evaluation_data( backtest, data, timestamp_column, target_column ):
    out_of_sample_predictions = backtest.aggregated_predictions[1]['values']

    out_of_sample_predictions.rename( columns = {'Prediction':target_column+'_pred'}, inplace=True)

    out_of_sample_timestamps = out_of_sample_predictions.index.tolist()

    evaluation_data = data.copy()

    evaluation_data[ timestamp_column ] = pd.to_datetime(data[ timestamp_column ]).dt.tz_localize('UTC')
    evaluation_data = evaluation_data[ evaluation_data[ timestamp_column ].isin( out_of_sample_timestamps ) ]

    evaluation_data.set_index( timestamp_column,inplace=True)

    evaluation_data = evaluation_data[ [ target_column ] ]

    evaluation_data = evaluation_data.join( out_of_sample_predictions )

    return evaluation_data


def plot_results( e ):
    fig = go.Figure()
   
    fig.add_trace(go.Scatter( x = e.index, y = e.iloc[:,1], name=e.columns[1] ) )
    fig.add_trace(go.Scatter( x = e.index, y = e.iloc[:,0], name=e.columns[0] ) )

    fig.update_yaxes( range=[ int( e.iloc[:,0].min()*0.33 ) ,int( e.iloc[:,0].max()*1.5 ) ] )
    fig.update_layout( height = 700, width = 1200, title='Actual vs. predicted' )

    fig.show()


e = build_evaluation_data( backtest, data, timestamp_column, target_column )


e['timestamp'] = e.index
e['timestamp'] = e['timestamp'].apply( lambda x: datetime.datetime.strftime(x, '%Y-%m-%d %H:%M:%S' ) )

# e


e = apply_regular_timeline( e, 5, 'timestamp')


backtest.aggregated_predictions[1]['accuracyMetrics']

{'MAE': 15.83182040866166,
 'MSE': 992.6355711814798,
 'MAPE': 271.6063984370307,
 'RMSE': 31.506119583050527}


results[FOCUS] = backtest.aggregated_predictions[1]['accuracyMetrics']  # store results for overview


plot_results(e)


plot_results(e)


plot_results(e)


plot_results(e)


pd.DataFrame.from_dict(results)


Title:	Time left till battery discharge
Author:	Michal Bezak, Tangent Works
Industry:	Electronics, Automotive, Manufacturing ...
Area:	Customer experience, Utility
Type:	Forecasting


Business objective:	Process optimization
Business value:	Operational decisions timed more accurately
KPI:	-


Business objective:	Improved quality of products
Business value:	Greater value and utility delivered to customers
KPI:	-

Column name	Description	Type	Availability
timestamp	Absolute time stamp of sample	Timestamp column
remaining_time_in_cycle	Time left till discharge in seconds	Target	t-1
temperature	Temperature of battery (Celsius)	Predictor	t+0
current	Current measured in Amps	Predictor	t+0
voltage	Voltage measured in Volts	Predictor	t+0
voltage_cumsum	Cummulative sum of voltage within given cycle (Volts)	Predictor	t+0
capacity_removed	Removed capacity within given cycle (current x timeframe)	Predictor	t+0
capacity_removed_cumulative	Cumulative capacity reduced within given cycle	Predictor	t+0

	timestamp	remaining_time_in_cycle	voltage	current	temperature	capacity_removed	capacity_removed_cumsum	voltage_cumsum
20284	2014-06-02 06:07:10	9.650	3.24200	2.250200	36.484470	2.250200	83.250000	126.624200
20285	2014-06-02 06:07:15	4.650	3.22000	2.249600	36.533760	2.249600	94.499200	142.767800
20286	2014-06-02 06:07:20	0.575	3.19625	2.592750	36.580737	1.490375	77.425875	117.499750
20287	2014-06-02 06:07:25	5.150	3.26900	2.251333	36.628223	1.500000	2.250000	6.567333
20288	2014-06-02 06:07:30	NaN	3.20960	2.249800	36.650810	1.867130	10.865730	19.448200

Time left till battery discharge¶

Description¶

Business parameters¶

Dataset(s)¶

Sampling and gaps¶

Data¶

Forecasting situation¶

Source¶

Engine settings¶

Experiment iteration(s)¶

Insights - inspecting ML models¶

Evaluation of results¶

Accuracy metrics¶

Out-of-sample chart¶

Summary¶

	importance	predictorName
0	34.99	voltage
1	21.97	capacity_removed_cumsum
2	19.28	current
3	15.27	voltage_cumsum
4	7.41	capacity_removed
5	1.07	temperature

	time	type	termName	importance
0	[1]	Interaction	(voltage(t) - 3.25)⁺ & (-capacity_removed_cums...	22.42
1	[1]	Interaction	(current(t) - 1.50)⁺ & (voltage(t) - 3.25)⁺	10.25
2	[1]	Interaction	(-capacity_removed_cumsum(t) + 211.67)⁺ & (-vo...	7.24
3	[1]	Interaction	(voltage(t) - 3.25)⁺ & (-voltage(t) + 3.58)⁺	6.84
4	[1]	Interaction	(voltage_cumsum(t) - 432.03)⁺ & voltage_cumsum	6.80
5	[1]	Interaction	current & (-capacity_removed_cumsum(t) + 211.67)⁺	6.14
6	[1]	Interaction	(current(t) - 2.25)⁺ & capacity_removed	3.61
7	[1]	Interaction	(voltage_cumsum(t) - 432.03)⁺ & (-capacity_rem...	3.54
8	[1]	Interaction	capacity_removed & (-voltage(t) + 3.58)⁺	3.30
9	[1]	Interaction	(-voltage(t) + 3.36)⁺ & voltage_cumsum	3.17
10	[1]	Interaction	voltage_cumsum & voltage(t-1)	3.14
11	[1]	Interaction	capacity_removed_cumsum & (voltage(t) - 3.25)⁺	2.95
12	[1]	Interaction	(current(t) - 1.50)⁺ & (-capacity_removed_cums...	2.89
13	[1]	Interaction	capacity_removed_cumsum & capacity_removed	2.86
14	[1]	Interaction	(-capacity_removed_cumsum(t) + 211.67)⁺ & (-vo...	2.68
15	[1]	Interaction	(voltage_cumsum(t) - 432.03)⁺ & (-voltage(t) +...	2.60
16	[1]	Interaction	current & (-voltage(t) + 3.58)⁺	2.55
17	[1]	Interaction	(voltage_cumsum(t) - 432.03)⁺ & (-voltage(t) +...	2.41
18	[1]	Interaction	(-voltage(t) + 3.36)⁺ & current	2.40
19	[1]	Interaction	capacity_removed & current(t-1)	2.21

	merged	RW10	RW25	RW21
MAE	6.489112	15.831820	1.549011	1.009662
MSE	71.892082	992.635571	17.188447	6.530812
MAPE	45.330211	271.606398	13.596176	5.445381
RMSE	8.478920	31.506120	4.145895	2.555545