import logging
import pandas as pd
import plotly as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import json
import datetime

from sklearn.metrics import confusion_matrix, recall_score, precision_score

import tim_client


with open('credentials.json') as f:
    credentials_json = json.load(f)                     # loading the credentials from credentials.json

TIM_URL = 'https://timws.tangent.works/v4/api'          # URL to which the requests are sent

SAVE_JSON = False                                       # if True - JSON requests and responses are saved to JSON_SAVING_FOLDER
JSON_SAVING_FOLDER = 'logs/'                            # folder where the requests and responses are stored

LOGGING_LEVEL = 'INFO'


level = logging.getLevelName(LOGGING_LEVEL)
logging.basicConfig(level=level, format='[%(levelname)s] %(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(message)s')
logger = logging.getLogger(__name__)


credentials = tim_client.Credentials(credentials_json['license_key'], credentials_json['email'], credentials_json['password'], tim_url=TIM_URL)
api_client = tim_client.ApiClient(credentials)

api_client.save_json = SAVE_JSON
api_client.json_saving_folder_path = JSON_SAVING_FOLDER

[INFO] 2021-02-09 17:00:40,966 - tim_client.api_client:save_json:66 - Saving JSONs functionality has been disabled
[INFO] 2021-02-09 17:00:40,968 - tim_client.api_client:json_saving_folder_path:75 - JSON destination folder changed to logs


# results of each experiment iteration will be collected
results_last = dict()
results_all = dict()


iteration_code = 30 # 7, 15, 30


fpath = 'data_w_'+str(iteration_code)+'.csv'
data = tim_client.load_dataset_from_csv_file( fpath , sep=',')

data.tail()


data.shape

(33727, 27)


timestamp_column = 'timestamp'
target_column = 'window'


backtest_length = 13096   # information taken from original raw data


configuration_backtest = {
    'usage': {                                 
        'predictionTo': { 
            'baseUnit': 'Sample',             
            'offset': 1
        }, 
        'backtestLength': backtest_length  
    },
    'allowOffsets': False,
    'extendedOutputConfiguration': {
        'returnExtendedImportances': True,
    }
}


print('Current dataset', fpath)

Current dataset data_w_30.csv


#
# Beginning of experiment iteration cells
#


data.iloc[:-backtest_length][target_column].value_counts()   # in-sample

0.0    17531
1.0     3100
Name: window, dtype: int64


data.iloc[-backtest_length:][target_column].value_counts()   # out-of-sample

0.0    12764
1.0      331
Name: window, dtype: int64


data.iloc[-backtest_length:].tail()


backtest = api_client.prediction_build_model_predict(data, configuration_backtest)     # running the RTInstantML forecasting using data and defined configuration
backtest.status                                                                        # status of the job

'Finished'


backtest.result_explanations

[]


out_of_sample_predictions = backtest.aggregated_predictions[1]['values']


out_of_sample_predictions.rename( columns = {'Prediction':target_column+'_pred'}, inplace=True)


out_of_sample_timestamps = out_of_sample_predictions.index.tolist()


evaluation_data = data.copy()

evaluation_data[ timestamp_column ] = pd.to_datetime(data[ timestamp_column ]).dt.tz_localize('UTC')
evaluation_data = evaluation_data[ evaluation_data[ timestamp_column ].isin( out_of_sample_timestamps ) ]

evaluation_data.set_index( timestamp_column,inplace=True)

evaluation_data = evaluation_data[ [ target_column,'cycle' ] ]


def encode_class( x ):
    if x < .5: return 0
    return 1


evaluation_data = evaluation_data.join( out_of_sample_predictions )

evaluation_data[ target_column+'_pred_p' ] =  evaluation_data[ target_column+'_pred' ]
evaluation_data[ target_column+'_pred' ] =  evaluation_data[ target_column+'_pred' ].apply( encode_class )

evaluation_data.head()


# Run this cell for the last record evaluation only

# Raw data contains records per 100 engines, where records per each engine ended at certain cycle, thus in our consolidated file, we can detect the last record for each engine based on this information (sudden drop of cycle value).

# Filter out evaluation data to keep only the last record for each engine.
# evaluation_data['cycle_diff'] = evaluation_data['cycle'].diff(-1)
# evaluation_data = evaluation_data[ evaluation_data['cycle_diff']>0 ]
# evaluation_data


evaluation_data[ evaluation_data[target_column]==1 ].shape[0]

332


simple_importances = backtest.predictors_importances['simpleImportances']
simple_importances = sorted(simple_importances, key = lambda i: i['importance'], reverse=True) 

simple_importances = pd.DataFrame.from_dict( simple_importances )


fig = go.Figure()

fig.add_trace(go.Bar( x = simple_importances['predictorName'],
                      y = simple_importances['importance'] )
             )

fig.update_layout(
        title='Simple importances'
)

fig.show()


extended_importances = backtest.predictors_importances['extendedImportances']
extended_importances = sorted(extended_importances, key = lambda i: i['importance'], reverse=True) 

extended_importances = pd.DataFrame.from_dict( extended_importances )

#extended_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[1]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[1]' ]['importance'] )
             )

fig.update_layout(
        title='Features generated from predictors used by model',
        height = 700
)

fig.show()


TP = evaluation_data[ ( evaluation_data[ target_column+'_pred' ] == 1 ) & ( evaluation_data[target_column] == 1 ) ].shape[0]
FP = evaluation_data[ ( evaluation_data[ target_column+'_pred' ] == 1 ) & ( evaluation_data[target_column] == 0 ) ].shape[0]
TN = evaluation_data[ ( evaluation_data[ target_column+'_pred' ] == 0 ) & ( evaluation_data[target_column] == 0 ) ].shape[0]
FN = evaluation_data[ ( evaluation_data[ target_column+'_pred' ] == 0 ) & ( evaluation_data[target_column] == 1 ) ].shape[0]

print('True positive:', TP)
print('False positive:', FP)
print('True negative:', TN)
print('False negative:', FN)

True positive: 199
False positive: 50
True negative: 12714
False negative: 133


metrics_accuracy = evaluation_data[ evaluation_data[ target_column+'_pred' ] == evaluation_data[target_column]  ].shape[0] / evaluation_data.shape[0]
metrics_accuracy

0.9860262675626146


cm = confusion_matrix( evaluation_data[ target_column ], evaluation_data[ target_column+'_pred' ]  )


cm = np.pad(cm, (1, 0), 'constant')
cm[0][2] = 1
cm[2][0] = 1

cm

array([[    0,     0,     1],
       [    0, 12714,    50],
       [    1,   133,   199]])


metrics_precision = precision_score( evaluation_data[ target_column ] , evaluation_data[ target_column+'_pred' ]  )
metrics_precision

0.7991967871485943


metrics_recall = recall_score( evaluation_data[ target_column ] ,  evaluation_data[ target_column+'_pred' ]  )
metrics_recall

0.5993975903614458


metrics_specificity = TN / ( FP + TN )
metrics_specificity

0.9960827326856785


metrics_f1 = 2 * (metrics_precision * metrics_recall) / (metrics_precision + metrics_recall)
metrics_f1

0.6850258175559379


#
# End of experiment iteration cells
#


#results_all[ iteration_code ] = { 'Window':iteration_code, 'Accuracy': metrics_accuracy, 'Precision': metrics_precision,'Recall':metrics_recall, 'F1-score':metrics_f1 }


results_for_all_test_records = pd.DataFrame.from_dict( results_all, orient='index' )
results_for_all_test_records


#results_last[ iteration_code ] = { 'Window':iteration_code, 'Accuracy':metrics_accuracy,'Precision':metrics_precision,'Recall':metrics_recall,'F1-score':metrics_f1 }


results_for_last_test_record = pd.DataFrame.from_dict( results_last, orient='index' )
results_for_last_test_record


Title:	Predictive maintenance for Aircraft Engines
Author:	Michal Bezak, Tangent Works
Industry:	Manufacturing, Transportation
Area:	Predictive maintenance
Type:	Classification


Business objective:	Financial: Maximize value of capital, minimize operational costs
Business value:	Lower cost, capital utilization
KPI:	-


Business objective:	Operations: Increase aircraft’s safety
Business value:	Enhanced personnel/passengers safety
KPI:	-

Column name	Description	Type	Availability
Timestamp	Date	Timestamp column
window	Binary label, denotes whether engine will fail in given (time) window	Target	t-1
cycle	Operations cycle no.	Predictor	t+1
setting1 ... setting3	Operational settings	Predictor	t+1
s1 ... s21	Sensor measurements	Predictor	t+1

	timestamp	window	cycle	setting1	setting2	setting3	s1	s2	s3	s4	...	s12	s13	s14	s15	s16	s17	s18	s19	s20	s21
33722	2102-08-29	1.0	194	0.0049	0.0000	100.0	518.67	643.24	1599.45	1415.79	...	520.69	2388.00	8213.28	8.4715	0.03	394	2388	100.0	38.65	23.1974
33723	2102-08-30	1.0	195	-0.0011	-0.0001	100.0	518.67	643.22	1595.69	1422.05	...	521.05	2388.09	8210.85	8.4512	0.03	395	2388	100.0	38.57	23.2771
33724	2102-08-31	1.0	196	-0.0006	-0.0003	100.0	518.67	643.44	1593.15	1406.82	...	521.18	2388.04	8217.24	8.4569	0.03	395	2388	100.0	38.62	23.2051
33725	2102-09-01	1.0	197	-0.0038	0.0001	100.0	518.67	643.26	1594.99	1419.36	...	521.33	2388.08	8220.48	8.4711	0.03	395	2388	100.0	38.66	23.2699
33726	2102-09-02	NaN	198	0.0013	0.0003	100.0	518.67	642.95	1601.62	1424.99	...	521.07	2388.05	8214.64	8.4903	0.03	396	2388	100.0	38.70	23.1855

Predictive maintenance for Aircraft Engines (Classification)¶

Description¶

Dataset¶

Sampling¶

Data¶

Data situation¶

Source¶

Engine settings¶

Experiment iteration(s)¶

Insights - inspecting ML models¶

Evaluation of results¶

Precision, Recall, Specificity, F1 score¶

Summary¶

All records¶

Last record only¶

	window	cycle	window_pred	window_pred_p
timestamp
2066-10-24 00:00:00+00:00	1.0	200	1	0.993112
2066-10-25 00:00:00+00:00	0.0	1	0	0.000000
2066-10-26 00:00:00+00:00	0.0	2	0	0.006201
2066-10-27 00:00:00+00:00	0.0	3	0	0.000000
2066-10-28 00:00:00+00:00	0.0	4	0	0.000000

	Window	Accuracy	Precision	Recall	F1-score
7	7	0.999847	0.500000	1.000000	0.666667
15	15	0.997175	0.815789	0.508197	0.626263
30	30	0.986026	0.799197	0.599398	0.685026

	Window	Accuracy	Precision	Recall	F1-score
7	7	1.00	1.0	1.000000	1.000000
15	15	0.97	1.0	0.727273	0.842105
30	30	0.91	0.9	0.720000	0.800000