import logging
import pandas as pd
import plotly as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import json
import pickle
import datetime

import tim_client


with open('credentials.json') as f:
    credentials_json = json.load(f)                     # loading the credentials from credentials.json

TIM_URL = 'https://timws.tangent.works/v4/api'          # URL to which the requests are sent

SAVE_JSON = False                                       # if True - JSON requests and responses are saved to JSON_SAVING_FOLDER
JSON_SAVING_FOLDER = 'logs/'                            # folder where the requests and responses are stored

LOGGING_LEVEL = 'INFO'


level = logging.getLevelName(LOGGING_LEVEL)
logging.basicConfig(level=level, format='[%(levelname)s] %(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(message)s')
logger = logging.getLogger(__name__)


credentials = tim_client.Credentials(credentials_json['license_key'], credentials_json['email'], credentials_json['password'], tim_url=TIM_URL)
api_client = tim_client.ApiClient(credentials)

api_client.save_json = SAVE_JSON
api_client.json_saving_folder_path = JSON_SAVING_FOLDER

[INFO] 2021-01-22 13:56:00,271 - tim_client.api_client:save_json:66 - Saving JSONs functionality has been disabled
[INFO] 2021-01-22 13:56:00,274 - tim_client.api_client:json_saving_folder_path:75 - JSON destination folder changed to logs


results = { 'long': {}, 'short': {} }


SITUATIONS = { h: 'data_'+str(h)+'_4B.csv' for h in range(0,24) }
  
SITUATIONS

{0: 'data_0_4B.csv',
 1: 'data_1_4B.csv',
 2: 'data_2_4B.csv',
 3: 'data_3_4B.csv',
 4: 'data_4_4B.csv',
 5: 'data_5_4B.csv',
 6: 'data_6_4B.csv',
 7: 'data_7_4B.csv',
 8: 'data_8_4B.csv',
 9: 'data_9_4B.csv',
 10: 'data_10_4B.csv',
 11: 'data_11_4B.csv',
 12: 'data_12_4B.csv',
 13: 'data_13_4B.csv',
 14: 'data_14_4B.csv',
 15: 'data_15_4B.csv',
 16: 'data_16_4B.csv',
 17: 'data_17_4B.csv',
 18: 'data_18_4B.csv',
 19: 'data_19_4B.csv',
 20: 'data_20_4B.csv',
 21: 'data_21_4B.csv',
 22: 'data_22_4B.csv',
 23: 'data_23_4B.csv'}


situation = 23


data = tim_client.load_dataset_from_csv_file('data/'+SITUATIONS[ situation ], sep=',')


data.head()


data.tail()


data.shape

(24231, 8)


fig = plt.subplots.make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.02)  

fig.add_trace( go.Scatter( x = data.loc[:, "timestamp"], y=data.loc[:, "c_usdchf"], name = "USDCHF", line=dict(color='blue')), row=1, col=1) 

fig.update_layout(height=500, width=1000, title = 'Closing price: USDCHF')                           

fig.show()


backtest_length = int( data.shape[0] * .3 )

backtest_length

7269


configuration_backtest = {
    'usage': {                                 
        'predictionTo': { 
            'baseUnit': 'Sample',             
            'offset': 1                        # number of units we want to predict into the future
        },
        'backtestLength': backtest_length      # number of samples that are used for backtesting (note that these samples are excluded from model building period)
    },
    'extendedOutputConfiguration': {
        'returnExtendedImportances': True      # flag that specifies if the importances of features are returned in the response
    }
    ,'interpolation':{
        'maxLength': 48*2,
        'type': 'Linear'
    }
}


backtest = api_client.prediction_build_model_predict(data, configuration_backtest)     # running the RTInstantML forecasting using data and defined configuration
backtest.status                                                                        # status of the job

'FinishedWithWarning'


backtest.result_explanations

[{'index': 1,
  'message': 'Predictor direction has a value missing for timestamp 2016-02-12 23:00:00.'}]


def encode_predictions(x):
    return 1 if x > 0.5 else 0


out_of_sample_timestamps = backtest.aggregated_predictions[1]['values'].index.tolist()
out_of_sample_predictions = pd.DataFrame.from_dict( backtest.aggregated_predictions[1]['values'] )


out_of_sample_predictions['direction_pred'] = out_of_sample_predictions['Prediction'].apply( lambda x: encode_predictions(x) )
out_of_sample_predictions = out_of_sample_predictions[ ['direction_pred'] ]


evaluation_data = data.copy()

evaluation_data['timestamp'] = pd.to_datetime(data['timestamp']).dt.tz_localize('UTC')
evaluation_data = evaluation_data[ evaluation_data['timestamp'].isin( out_of_sample_timestamps ) ]

evaluation_data.set_index('timestamp',inplace=True)

evaluation_data = evaluation_data[ ['direction','change'] ]


evaluation_data = evaluation_data.join( out_of_sample_predictions )


evaluation_data = evaluation_data[ evaluation_data.index.hour == ( situation ) ]


evaluation_data.head()


evaluation_data.shape[0]

219


evaluation_data.index[0]

Timestamp('2019-02-13 23:00:00+0000', tz='UTC')


evaluation_data.index[-1]

Timestamp('2019-12-11 23:00:00+0000', tz='UTC')


trading_p = evaluation_data.index[-1] - evaluation_data.index[0]

trading_p.days

301


simple_importances = backtest.predictors_importances['simpleImportances']
simple_importances = sorted(simple_importances, key = lambda i: i['importance'], reverse=True) 

extended_importances = backtest.predictors_importances['extendedImportances']


simple_importances

[{'importance': 100.0, 'predictorName': 'direction'}]


extended_importances

[{'time': '[1]',
  'type': 'TargetAndTargetTransformation',
  'termName': 'EMA_direction(t-1, w = 2)',
  'importance': 55.06},
 {'time': '[1]',
  'type': 'Interaction',
  'termName': 'EMA_direction(t-1, w = 2) & cos(2πt / 24.0 hours)',
  'importance': 44.94},
 {'time': '[1]',
  'type': 'TargetAndTargetTransformation',
  'termName': 'Intercept',
  'importance': 0.0}]


POSITION_SIZE = 10**6

'{:,}'.format(POSITION_SIZE)

'1,000,000'


filter_gains = evaluation_data['direction_pred'] == evaluation_data['direction']
filter_losses = evaluation_data['direction_pred'] != evaluation_data['direction']


direction_check = 1

filter_direction =  evaluation_data['direction'] == direction_check

gains = sum( evaluation_data[ filter_direction & filter_gains ]['change'] * POSITION_SIZE )
losses = sum( evaluation_data[ filter_direction & filter_losses ]['change'] * POSITION_SIZE )
pl = gains-losses


print('Gains', '%.4f' % gains  )
print('Losses', '%.4f' % losses )
print('Profit/Loss', '%.4f' % pl )

Gains 2149.9991
Losses 17800.3907
Profit/Loss -15650.3916


result = evaluation_data[ filter_direction ][ ['direction_pred'] ].value_counts()
result_len = evaluation_data[ filter_direction ].shape[0]

accuracy = 0
if direction_check in result: accuracy = result[ direction_check ].values[0] / result_len

print('Accuracy', '%.4f' % accuracy  )

Accuracy 0.0909


results['long'][ str(situation) ] = {'gains': gains, 'losses': losses, 'pl': pl, 'accuracy': accuracy }


direction_check = 0

filter_direction =  evaluation_data['direction'] == direction_check

gains = -sum( evaluation_data[ filter_direction & filter_gains ]['change'] * POSITION_SIZE )
losses = -sum( evaluation_data[ filter_direction & filter_losses ]['change'] * POSITION_SIZE )
pl = gains-losses


print('Gains', '%.4f' % gains  )
print('Losses', '%.4f' % losses )
print('Profit/Loss', '%.4f' % pl )

Gains 42099.6547
Losses 900.0301
Profit/Loss 41199.6245


result = evaluation_data[ filter_direction ][ ['direction_pred'] ].value_counts()
result_len = evaluation_data[ filter_direction ].shape[0]

accuracy = 0
if direction_check in result: accuracy = result[ direction_check ].values[0] / result_len

print('Accuracy', '%.4f' % accuracy  )

Accuracy 0.9648


results['short'][ str(situation) ] = {'gains': gains, 'losses': losses, 'pl': pl, 'accuracy': accuracy }


# save results to file on hard drive
# backup_file = open('results5.3.pkl', 'wb')
# pickle.dump(results, backup_file)
# backup_file.close()


results_df_long = pd.DataFrame.from_dict(results['long'], orient='index')
results_df_long


results_df_long['pl'].sum()

-566801.0115637623


results_df_short= pd.DataFrame.from_dict(results['short'], orient='index')
results_df_short


results_df_short['pl'].sum()

648501.3365736774


results_df_short.index = [ int(i) for i in results_df_short.index ]
results_df_short.sort_index(inplace=True)

results_df_long.index = [ int(i) for i in results_df_long.index ]
results_df_long.sort_index(inplace=True)


fig = go.Figure() 

fig.add_trace( go.Bar( x = results_df_long.index, y=results_df_long['accuracy'].values, name = "Long", marker_color='blue') )

fig.add_trace( go.Bar( x = results_df_short.index, y=results_df_short['accuracy'].values, name = "Short", marker_color='green') )

fig.update_layout(height=500, width=1000, title='Accuracy')                           

fig.show()


fig = go.Figure() 

fig.add_trace( go.Bar( x = results_df_long.index, y=results_df_long['pl'].values, name = "Long", marker_color='blue') )

fig.add_trace( go.Bar( x = results_df_short.index, y=results_df_short['pl'].values, name = "Short", marker_color='green') )

fig.update_layout( height=500, width=1000, title='P/L' )                           

fig.show()


Title:	Predicting price movement for currency pairs
Author:	Michal Bezak, Tangent Works
Industry:	Finance
Area:	Trading - Forex
Type:	Forecasting / Classification


Business objective:	Make profit from trading on forex intra-day market
Value:	Direct input into decision making process when opening positions - understanding movement of price in upcoming hour
KPI:	Profit / Loss

Column name	Description	Type	Availability
timestamp	Timestamp on hourly basis - GMT	Timestamp column
direction	1 if change is >0, otherwise 0	Target	t-1
change	Change against previous timestamp for c_usdchf	Predictor	t-1
c_usdchf	Price at which trading closed for given hour (USDCHF), our target	Predictor	t-1
o_usdchf	Price at which particular hour opened (USDCHF)	Predictor	t-1
h_usdchf	The highest price in given interval	Predictor	t-1
l_usdchf	The lowest price in given interval (USDCHF)	Predictor	t-1
v_usdchf	Volume traded (USDCHF)	Predictor	t-1

	timestamp	direction	c_usdchf	o_usdchf	h_usdchf	l_usdchf	change
0	2016-02-08 03:00:00	1.0	0.9938	0.9932	0.9938	0.9929	0.0004
1	2016-02-08 04:00:00	0.0	0.9938	0.9938	0.9941	0.9932	0.0000
2	2016-02-08 05:00:00	0.0	0.9937	0.9938	0.9942	0.9934	-0.0001
3	2016-02-08 06:00:00	0.0	0.9936	0.9938	0.9942	0.9935	-0.0001
4	2016-02-08 07:00:00	1.0	0.9938	0.9936	0.9942	0.9936	0.0002

	timestamp	direction	c_usdchf	o_usdchf	h_usdchf	l_usdchf	v_usdchf	change
24226	2019-12-12 18:00:00	1.0	0.98660	0.98610	0.98690	0.98555	1829	0.00040
24227	2019-12-12 19:00:00	0.0	0.98610	0.98645	0.98705	0.98595	2013	-0.00050
24228	2019-12-12 20:00:00	0.0	0.98580	0.98615	0.98730	0.98560	2234	-0.00030
24229	2019-12-12 21:00:00	0.0	0.98475	0.98600	0.98640	0.98460	2384	-0.00105
24230	2019-12-12 22:00:00	1.0	0.98510	0.98480	0.98535	0.98475	1012	0.00035

Predicting price movement for currency pairs¶

Description¶

Business parameters¶

Dataset¶

Sampling and gaps¶

Data¶

Forecasting situations¶

Source¶

Engine settings¶

Experiment iterations¶

Insights - inspecting ML models¶

Evaluation of results¶

Long positions¶

Short positions¶

Summary¶

Long positions¶

Short positions¶

Summary¶

Back-testing vs. Production¶

What's next?¶

Disclaimer¶

	direction	change
2019-02-13 23:00:00+00:00	1.0	0.00030
2019-02-14 23:00:00+00:00	1.0	0.00005
2019-02-15 23:00:00+00:00	0.0	-0.00015
2019-02-17 23:00:00+00:00	0.0	-0.00015
2019-02-18 23:00:00+00:00	1.0	0.00060

	gains	losses	pl	accuracy
0	4799.902439	19800.305366	-15000.402927	0.094118
1	2349.972725	32899.439335	-30549.466610	0.064516
2	3249.943256	47600.626945	-44350.683689	0.068966
3	5499.899387	30600.070953	-25100.171566	0.101852
4	3649.950027	31350.255012	-27700.304985	0.065421
5	4499.912262	33800.125122	-29300.212860	0.140625
6	3750.085831	24149.835110	-20399.749279	0.133333
7	9749.948979	26050.090790	-16300.141811	0.229358
8	13799.846173	47600.030899	-33800.184726	0.207207
9	18349.826336	48149.943352	-29800.117016	0.279570
10	40700.376034	35050.034523	5650.341511	0.495575
11	32749.831676	29699.325562	3050.506114	0.470085
12	28950.095176	25100.171566	3849.923610	0.490741
13	42899.668217	31349.599361	11550.068855	0.529412
14	28950.452804	47049.880028	-18099.427223	0.367347
15	35799.443722	49350.202084	-13550.758362	0.414414
16	20900.189876	57150.125504	-36249.935627	0.257426
17	8449.912071	67400.336266	-58950.424195	0.145455
18	12550.175190	48350.214959	-35800.039769	0.213592
19	7900.416851	35149.991512	-27249.574661	0.191304
20	3500.103951	62250.077724	-58749.973774	0.085271
21	13300.001621	44900.000095	-31599.998474	0.204724
22	6050.050259	28749.942780	-22699.892521	0.115702
23	2149.999142	17800.390721	-15650.391579	0.090909

	gains	losses	pl	accuracy
0	39600.133896	1299.917698	38300.216198	0.954545
1	22600.233555	1349.806786	21250.426769	0.926316
2	38050.472736	2099.931240	35950.541496	0.920792
3	30900.239944	4250.049591	26650.190353	0.888889
4	28699.815273	5549.967289	23149.847984	0.851852
5	16950.249672	3150.045872	13800.203800	0.850575
6	21900.057793	2749.800682	19150.257111	0.858407
7	28500.258923	3750.026226	24750.232696	0.872727
8	43249.368668	13199.806213	30049.562454	0.761905
9	69150.209427	27599.990368	41550.219059	0.672000
10	34399.807453	30499.875546	3899.931907	0.538462
11	20650.446415	33749.818802	-13099.372387	0.494949
12	20849.823952	38049.757480	-17199.933529	0.453704
13	33449.947834	20050.168038	13399.779797	0.571429
14	67349.255085	31049.907207	36299.347877	0.618644
15	47200.381756	28250.217438	18950.164318	0.647619
16	76599.955558	27749.896050	48850.059509	0.773913
17	62149.822712	20200.431347	41949.391365	0.754717
18	48400.402069	13249.993324	35150.408745	0.831858
19	57549.893856	4450.023174	53099.870682	0.933333
20	44550.478459	3150.165081	41400.313378	0.891089
21	41900.098324	7600.247860	34299.850464	0.875969
22	37150.204182	1450.002193	35700.201988	0.947368
23	42099.654675	900.030136	41199.624538	0.964789