import logging
import pandas as pd
import plotly as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import json
import datetime

from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

import tim_client


with open('credentials.json') as f:
    credentials_json = json.load(f)                     # loading the credentials from credentials.json

TIM_URL = 'https://timws.tangent.works/v4/api'          # URL to which the requests are sent

SAVE_JSON = False                                       # if True - JSON requests and responses are saved to JSON_SAVING_FOLDER
JSON_SAVING_FOLDER = 'logs/'                            # folder where the requests and responses are stored

LOGGING_LEVEL = 'INFO'


level = logging.getLevelName(LOGGING_LEVEL)
logging.basicConfig(level=level, format='[%(levelname)s] %(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(message)s')
logger = logging.getLogger(__name__)


credentials = tim_client.Credentials(credentials_json['license_key'], credentials_json['email'], credentials_json['password'], tim_url=TIM_URL)
api_client = tim_client.ApiClient(credentials)

api_client.save_json = SAVE_JSON
api_client.json_saving_folder_path = JSON_SAVING_FOLDER

[INFO] 2021-02-08 17:30:27,019 - tim_client.api_client:save_json:66 - Saving JSONs functionality has been disabled
[INFO] 2021-02-08 17:30:27,020 - tim_client.api_client:json_saving_folder_path:75 - JSON destination folder changed to logs


fpath = 'data_rul.csv'
data = tim_client.load_dataset_from_csv_file( fpath , sep=',')

data.tail()


data.shape

(33727, 27)


fig = go.Figure()

fig.add_trace(go.Scatter( x = data.index,
                      y = data['RUL']
             ))

fig.update_layout(
        height = 700,
        width = 1200,
        title = 'RUL'
)

fig.show()


timestamp_column = 'timestamp'
target_column = 'RUL'


backtest_length = 13096


configuration_backtest = {
    'usage': {                                 
        'predictionTo': { 
            'baseUnit': 'Sample',             
            'offset': 1
        }, 
        'modelQuality':[ {'day':0,'quality':'Medium'} ],
        'backtestLength': backtest_length  
    },
    'extendedOutputConfiguration': {
        'returnExtendedImportances': True,
    }
}


backtest = api_client.prediction_build_model_predict(data, configuration_backtest)     # running the RTInstantML forecasting using data and defined configuration
backtest.status                                                                        # status of the job

'Finished'


backtest.result_explanations

[]


out_of_sample_predictions = backtest.aggregated_predictions[1]['values']
out_of_sample_predictions.rename( columns = {'Prediction':target_column+'_pred'}, inplace=True)


out_of_sample_timestamps = out_of_sample_predictions.index.tolist()


evaluation_data = data.copy()

evaluation_data[ timestamp_column ] = pd.to_datetime(data[ timestamp_column ]).dt.tz_localize('UTC')
evaluation_data = evaluation_data[ evaluation_data[ timestamp_column ].isin( out_of_sample_timestamps ) ]

evaluation_data.set_index( timestamp_column,inplace=True)

evaluation_data = evaluation_data[ [ target_column,'cycle' ] ]

evaluation_data = evaluation_data.join( out_of_sample_predictions )

evaluation_data.head()


# Raw data contains records per 100 engines, where records per each engine ended at certain cycle, thus in our consolidated file, we can detect the last record for each engine based on this information (sudden drop of cycle value).

# Filter out evaluation data to keep only the last record for each engine.
evaluation_data['cycle_diff'] = evaluation_data['cycle'].diff(-1)
evaluation_data = evaluation_data[ evaluation_data['cycle_diff']>0 ]

evaluation_data.head()


evaluation_data.shape

(100, 4)


simple_importances = backtest.predictors_importances['simpleImportances']
simple_importances = sorted(simple_importances, key = lambda i: i['importance'], reverse=True) 

simple_importances = pd.DataFrame.from_dict( simple_importances )


fig = go.Figure()

fig.add_trace(go.Bar( x = simple_importances['predictorName'],
                      y = simple_importances['importance'] )
             )

fig.update_layout(
        title='Simple importances'
)

fig.show()


extended_importances = backtest.predictors_importances['extendedImportances']
extended_importances = sorted(extended_importances, key = lambda i: i['importance'], reverse=True) 

extended_importances = pd.DataFrame.from_dict( extended_importances )

#extended_importances


fig = go.Figure()

fig.add_trace(go.Bar( x = extended_importances[ extended_importances['time'] == '[1]' ]['termName'],
                      y = extended_importances[ extended_importances['time'] == '[1]' ]['importance'] )
             )

fig.update_layout(
        title='Features generated from predictors used by model',
        height = 700
)

fig.show()


fig = go.Figure()

fig.add_trace(go.Scatter( x = evaluation_data.index,
                      y = evaluation_data[target_column],
                      name='Actual'
             ))
fig.add_trace(go.Scatter( x = evaluation_data.index,
                      y = evaluation_data[target_column+'_pred'],
                      name='Predicted'
             ))

fig.update_layout(
        title = 'Predicted vs. Actual',
        height = 700,
        width = 1200
)

fig.show()


rmse_last = math.sqrt( mean_squared_error( evaluation_data[target_column], evaluation_data[target_column+'_pred']) )
rmse_last

25.3202808752172


mae_last = mean_absolute_error( evaluation_data[target_column], evaluation_data[target_column+'_pred'])
mae_last

19.709900600000005


mape_last = abs( ( evaluation_data[target_column] - evaluation_data[target_column+'_pred'] ) / evaluation_data[target_column] ).mean()
mape_last

0.36884273224467423


# backtest = api_client.prediction_build_model_predict(data, configuration_backtest)     # running the RTInstantML forecasting using data and defined configuration
# backtest.status                                                                        # status of the job


# backtest.result_explanations


out_of_sample_predictions = backtest.aggregated_predictions[1]['values']
out_of_sample_predictions.rename( columns = {'Prediction':target_column+'_pred'}, inplace=True)


out_of_sample_timestamps = out_of_sample_predictions.index.tolist()


evaluation_data = data.copy()

evaluation_data[ timestamp_column ] = pd.to_datetime(data[ timestamp_column ]).dt.tz_localize('UTC')
evaluation_data = evaluation_data[ evaluation_data[ timestamp_column ].isin( out_of_sample_timestamps ) ]

evaluation_data.set_index( timestamp_column,inplace=True)

evaluation_data = evaluation_data[ [ target_column,'cycle' ] ]

evaluation_data = evaluation_data.join( out_of_sample_predictions )

evaluation_data.head()


evaluation_data.shape

(13096, 3)


fig = go.Figure()

fig.add_trace(go.Scatter( x = evaluation_data.index,
                      y = evaluation_data[target_column],
                      name='Actual'
             ))
fig.add_trace(go.Scatter( x = evaluation_data.index,
                      y = evaluation_data[target_column+'_pred'],
                      name='Predicted'
             ))

fig.update_layout(
        title = 'Predicted vs. Actual',
        height = 700,
        width = 1200
)

fig.show()


rmse_all = math.sqrt( mean_squared_error( evaluation_data[target_column], evaluation_data[target_column+'_pred']) )
rmse_all

40.39381016475072


mae_all = mean_absolute_error( evaluation_data[target_column], evaluation_data[target_column+'_pred'])
mae_all

30.787960507238854


mape_all = abs( ( evaluation_data[target_column] - evaluation_data[target_column+'_pred'] ) / evaluation_data[target_column] ).mean()
mape_all

0.2475012954893914


Title:	Predictive maintenance for Aircraft Engines
Author:	Michal Bezak, Tangent Works
Industry:	Manufacturing, Transportation
Area:	Predictive maintenance
Type:	Forecasting


Business objective:	Financial: Maximize value of capital, minimize operational costs
Business value:	Lower cost, capital utilization
KPI:	-


Business objective:	Operations: Increase aircraft’s safety
Business value:	Enhanced personnel/passengers safety
KPI:	-

Column name	Description	Type	Availability
Timestamp	Date	Timestamp column
rul	Remaning useful life (in days)	Target	t-1
cycle	Operations cycle no.	Predictor	t+1
setting1 ... setting3	Operational settings	Predictor	t+1
s1 ... s21	Sensor measurements	Predictor	t+1

	timestamp	RUL	cycle	setting1	setting2	setting3	s1	s2	s3	s4	...	s12	s13	s14	s15	s16	s17	s18	s19	s20	s21
33722	2102-08-29	24.0	194	0.0049	0.0000	100.0	518.67	643.24	1599.45	1415.79	...	520.69	2388.00	8213.28	8.4715	0.03	394	2388	100.0	38.65	23.1974
33723	2102-08-30	23.0	195	-0.0011	-0.0001	100.0	518.67	643.22	1595.69	1422.05	...	521.05	2388.09	8210.85	8.4512	0.03	395	2388	100.0	38.57	23.2771
33724	2102-08-31	22.0	196	-0.0006	-0.0003	100.0	518.67	643.44	1593.15	1406.82	...	521.18	2388.04	8217.24	8.4569	0.03	395	2388	100.0	38.62	23.2051
33725	2102-09-01	21.0	197	-0.0038	0.0001	100.0	518.67	643.26	1594.99	1419.36	...	521.33	2388.08	8220.48	8.4711	0.03	395	2388	100.0	38.66	23.2699
33726	2102-09-02	NaN	198	0.0013	0.0003	100.0	518.67	642.95	1601.62	1424.99	...	521.07	2388.05	8214.64	8.4903	0.03	396	2388	100.0	38.70	23.1855

Predictive maintenance for Aircraft Engines (Forecasting)¶

Description¶

Business parameters¶

Dataset¶

Sampling¶

Data¶

Data situation¶

Source¶

Engine settings¶

Experiment iteration(s)¶

Iteration 1¶

Insights - inspecting ML models¶

Evaluation of results¶

Iteration 2¶

Insights - inspecting ML models¶

Evaluation of results¶

Summary¶

	RUL	cycle	RUL_pred
timestamp
2066-10-24 00:00:00+00:00	0.0	200	0.000
2066-10-25 00:00:00+00:00	142.0	1	210.700
2066-10-26 00:00:00+00:00	141.0	2	225.228
2066-10-27 00:00:00+00:00	140.0	3	217.445
2066-10-28 00:00:00+00:00	139.0	4	206.110

	RUL	cycle	RUL_pred	cycle_diff
timestamp
2066-10-24 00:00:00+00:00	0.0	200	0.0000	199.0
2066-11-24 00:00:00+00:00	112.0	31	181.0570	30.0
2067-01-12 00:00:00+00:00	98.0	49	139.5480	48.0
2067-05-18 00:00:00+00:00	69.0	126	64.9790	125.0
2067-09-01 00:00:00+00:00	82.0	106	89.7947	105.0