import requests
import pandas as pd
import numpy as np
import time
import datetime as dt
import json

import plotly as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

from tim import Tim
pd.set_option('mode.chained_assignment', None)


with open('credentials.json') as f:
    tim_credentials = json.load(f)                             # loading the credentials from credentials.json
    
TIM_URL = 'https://tim-platform.tangent.works/api/v5'          # URL to which the requests are sent

tim_client = Tim(email = tim_credentials['email'], password = tim_credentials['password'], endpoint = TIM_URL)


data = pd.read_csv('panel_data_solar.zip', sep=',')                           # loading data from panel_data_retail.csv
display(data.head())                                                          # Quick look at the data
display(data.tail())
loc_cap_df = data.loc[:, ["Profile", "Capacity"]].drop_duplicates().reset_index(drop=True)
loc_cap_df                                                                    # Capacity per location


data_configuration = {                                                         # Upload dataset configuration
    'name': 'panelDataSolar',
    'groupKeys': ['Profile'],    
    'timestampFormat': 'yyyy-mm-ddTHH:MM:SS.s',
    'timestampColumn': 2    
}

upload_results = tim_client.upload_dataset(dataset = data, configuration = data_configuration)   # Upload dataset via python client

dataset_id = upload_results.dataset['id']                                      # Read dataset id
version_id = upload_results.dataset['latestVersion']['id']                     # Read dataset version id

print("Dataset id:         ", dataset_id)                                      # Display dataset id and dataset version id
print("Dataset version id: ", version_id)

Dataset id:          bccad76d-85e1-4b24-9824-2a29c1b50c59
Dataset version id:  b2e20d4a-36a9-4101-ab13-62490b177fd0


profile = 1
namesPred = data.columns[3:]
v_data = data[data.Profile == profile]
v_data = v_data.sort_values("Date")
fig = go.Figure(go.Scatter(x=v_data.Date, y=v_data.y, name='Solar production', line=dict(color='black')))
for p in namesPred:
    fig.add_trace(go.Scatter(x=v_data['Date'], y=v_data[p], name=p))
fig.update_layout(height=500, width=1000, title_text=f"Data visualization farm {profile}")
fig.show()


predictionTo = 1                                                        # Prediction horizon 1 day
outOfSampleProfiles = [4, 7, 8, 12]                                     # Profiles removed from training
inSampleProfiles = list(set([*range(1, 18)])-set(outOfSampleProfiles))  # Profiles on which the model is build
isFrom = "2019-01-01 00:00:00"
isTo   = "2019-12-31 23:00:00"
job_configuration = {                                              # Configuration
    "configuration": {
        "predictionTo": {"baseUnit": "Day","value": 1},
        "modelQuality": "Medium",
        "offsetLimit":{ "value": -48}
    },
    "data": {
        "inSampleRows": [{"from": isFrom, "to": isTo}],            # Define out of sample period, rest will be used as in-sample
        "preprocessors": [{
            "type": "CategoryFilter",                              # Filter only profiles on which should be trained
            "value": {
                "column": "Profile",
                "categories": inSampleProfiles
            }            
        }]
    }    
}


build_model_results = tim_client.build_forecasting_model_and_execute(dataset_id, job_configuration, wait_to_finish = True)
build_job_id = build_model_results.metadata['id']
resultsTableBuild = build_model_results.table_result
resultsTableBuild


# Authorization 
auth_url = TIM_URL+'/auth/login'
auth_response = requests.request(method="POST", url=auth_url, json=tim_credentials).json()
auth_token = auth_response['token']


# Configuration of the predict job 
resultsTablePredict = pd.DataFrame()

oosFrom = "2020-01-01 00:00:00"
oosTo   = "2020-12-31 23:00:00"
accuracies = np.zeros((17, 3))
for farms in [[*range(1, 7)], [*range(7, 13)], [*range(13, 18)]]:
    predict_job_configuration = {
        "configuration": {
            "predictionTo": {"baseUnit": "Day","value": predictionTo}
        },
        "data": {
            "version": {"id": version_id},
            "outOfSampleRows": [{"from": oosFrom, "to": oosTo}],
            "preprocessors": [{
                "type": "CategoryFilter",                            # Filter profile
                "value": {
                    "column": "Profile",
                    "categories": farms
                }            
            }]
        }    
    }

    # Registration of the predict job
    predict_url = TIM_URL+'/forecast-jobs/'+ build_job_id +'/predict'
    predict_register_response = requests.request(method="POST", url=predict_url, headers={'Authorization':'Bearer '+auth_token},json=predict_job_configuration).json()
    predict_job_id = predict_register_response['id']
    
    # Execution of job the registered with predict_job_id and waiting for results
    predict_response = tim_client.execute_forecast(forecast_job_id = predict_job_id, wait_to_finish = 'true')
    resultsTablePredict = resultsTablePredict.append(predict_response.table_result, ignore_index = True)
    
    # Store accuracies
    for acc in predict_response.accuracies['groups']['bin']:
        accuracies[int(acc['groupKeys']) - 1, 0] = acc['outOfSample']['mae']
        accuracies[int(acc['groupKeys']) - 1, 1] = acc['outOfSample']['mape']
        accuracies[int(acc['groupKeys']) - 1, 2] = acc['outOfSample']['rmse']


# Preview results table
resultsTablePredict.head()


for i in outOfSampleProfiles:
    v_data = resultsTablePredict[resultsTablePredict['Profile'] == i].sort_values('timestamp')
    production   = v_data['forecast_type'] == 'Production'
    fig = go.Figure(go.Scatter(x=v_data['timestamp'][production], y=v_data['forecast'][production], name='Prediction', line=dict(color='goldenrod')))
    outOfSample = (v_data['forecast_type'] == 'OutOfSample')    
    fig.add_trace(go.Scatter(x=v_data['timestamp'], y=v_data['target'], name='Actuals', line=dict(color='black')))
    fig.add_trace(go.Scatter(x=v_data['timestamp'][outOfSample], y=v_data['forecast'][outOfSample], name=f'OutOfSample', line=dict(color='red')))    
            
    fig.update_layout(height=500, width=1000, title_text=f"Results Profile {i}")
    fig.show()


for i in inSampleProfiles[:3]:
    t_data = resultsTableBuild[resultsTableBuild['Profile'] == i].sort_values('timestamp')
    v_data = resultsTablePredict[resultsTablePredict['Profile'] == i].sort_values('timestamp')
    production   = v_data['forecast_type'] == 'Production'
    fig = go.Figure(go.Scatter(x=v_data['timestamp'][production], y=v_data['forecast'][production], name='Prediction', line=dict(color='goldenrod')))
    outOfSample = (v_data['forecast_type'] == 'OutOfSample')
    inSample = (t_data['forecast_type'] == 'InSample')
    ts = t_data['timestamp'].append(v_data['timestamp'], ignore_index = True)
    y  = t_data['target'].append(v_data['target'], ignore_index = True)
    fig.add_trace(go.Scatter(x=ts, y=y, name='Actuals', line=dict(color='black')))
    fig.add_trace(go.Scatter(x=v_data['timestamp'][outOfSample], y=v_data['forecast'][outOfSample], name=f'OutOfSample', line=dict(color='red')))
    fig.add_trace(go.Scatter(x=t_data['timestamp'][inSample], y=t_data['forecast'][inSample], name=f'InSample', line=dict(color='green')))    
    
    fig.update_layout(height=500, width=1000, title_text=f"Results Profile {i}")
    fig.show()


properties = build_model_results.model_result['model']['Model Zoo']['variableProperties']
properties_df = pd.DataFrame(properties).sort_values(by='importance',ascending=False)
properties_df['rel_importance'] = properties_df['importance']/properties_df.sum()['importance']
fig = px.bar(properties_df, x="name", y="rel_importance", color="name")
fig.update_layout(height=500, width=1000, title_text="Variable Importances")
fig.show()


def find_feature(sub_parts):
    dow_list = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
    features_list = []
    for c,s in enumerate(sub_parts):
        if   s['type']=='β':
            sub_feature = ''
        elif s['type']=='TimeOffsets':
            sub_feature = s['predictor']+'(t'+str(s['offset'])+')'
        elif s['type']=='RestOfWeek':
            sub_feature ='DoW(t'+str(s['offset'])+') <= '+dow_list[s['day']-1]
        elif s['type']=='Intercept':
            sub_feature = 'Intercept('+str(int(s['value']))+')'
        elif s['type']=='Cos':
            sub_feature = 'Cos('+str(int(s['period']))+';'+s['unit']+')'
        elif s['type']=='Sin':
            sub_feature = 'Sin('+str(int(s['period']))+';'+s['unit']+')'
        elif s['type']=='ExponentialMovingAverage':
            sub_feature = 'EMA_'+s['predictor']+'(t'+str(int(s['offset']))+'; w='+str(int(s['window']))+')'
        elif s['type']=='Identity':
            sub_feature = s['predictor']
        elif s['type']=='PiecewiseLinear':
            sub_feature = 'max(0;'+str(s['subtype'])+'*('+str(s['knot'])+'-'+s['predictor']+'(t'+str(s['offset'])+')))'
        elif s['type']=='SimpleMovingAverage':
            sub_feature = 'SMA_'+s['predictor']+'(t'+str(int(s['offset']))+'; w='+str(int(s['window']))+')'
        elif s['type']=='Fourier':
            sub_feature = 'Fourier('+str(s['period'])+')'
        elif s['type']=='Weekday':
            sub_feature = '_test_Weekday_'
        elif s['type']=='Month':
            sub_feature = '_test_Month_'
        elif s['type']=='PublicHoliday':
            sub_feature = '_test_publicholiday_'
        elif s['type']=='Trend':
            sub_feature = '_test_trend_'
        else:
            sub_feature = '_test_'
        if s['type']=='β':
            features_list.append(sub_feature)
            beta = s['value']
        else:
            features_list.append(' & '+sub_feature) if c>0 else features_list.append(sub_feature)
    feature_output = ''.join(str(e) for e in features_list)
    return feature_output,beta


features = []
for m in build_model_results.model_result['model']['Model Zoo']['models']:
    terms = m['terms']
    for count,t in enumerate(terms):
        f,b = find_feature(t['parts'])
        features.append([m['index'],count,f,t['importance'],b])

features_df = pd.DataFrame(features,columns=['Model','Term','Feature','importance','beta'])
fig = px.sunburst(features_df, path=['Model','Feature'], values='importance',color='Feature')
fig.update_layout(height=700,width=700,title_text='Feature Importances')
fig.show()


accuracies_df = pd.DataFrame(accuracies, columns=['mae', 'mape', 'rmse'])
accuracies_df['Profile'] = range(1,18)

accuracies_df['wmae'] = accuracies_df['mae'] / loc_cap_df['Capacity'] * 100

metric = 'wmae'

fig = px.bar(accuracies_df, x="Profile", y=metric, color="Profile")
fig.update_layout(height=500, width=900, title_text="Accuracies "+ metric)
fig.show()


Title:	Solar production forecasting
Author:	Maria Starovska, Tangent Works
Industry:	Utilities
Area:	Solar production
Type:	Forecasting

Column name	Description	Type	Availability
Profile	Id of each farm	Group key column
Date	Timestamp	Timestamp column
y	Solar production	Target	D+0
PVOUT	The amount of power generated per unit of the installed photovoltaic capacity	Predictor	D+1
TEMP	Temperature	Predictor	D+1
GHI	Global horizontal irradiance	Predictor	D+1
GTI	Global uilted irradiance	Predictor	D+1
Capacity	Installed capacity	Predictor	D+1

	Profile	Date	TEMP	Capacity
0	1	2019-01-01T00:00:00.0	-10.0	0.78
1	1	2019-01-01T01:00:00.0	-9.6	0.78
2	1	2019-01-01T02:00:00.0	-8.4	0.78
3	1	2019-01-01T03:00:00.0	-8.0	0.78
4	1	2019-01-01T04:00:00.0	-7.8	0.78

	Profile	Date	y	TEMP	Capacity
298243	17	2020-12-31T19:00:00.0	NaN	-6.4	1.176
298244	17	2020-12-31T20:00:00.0	NaN	-6.8	1.176
298245	17	2020-12-31T21:00:00.0	NaN	-7.0	1.176
298246	17	2020-12-31T22:00:00.0	NaN	-7.3	1.176
298247	17	2020-12-31T23:00:00.0	NaN	-7.6	1.176

	Profile	Capacity
0	1	0.7800
1	2	1.1988
2	3	1.1988
3	4	1.1988
4	5	3.9600
5	6	3.9600
6	7	3.9600
7	8	1.1988
8	9	4.7700
9	10	1.1856
10	11	1.6560
11	12	1.4760
12	13	1.1760
13	14	1.1760
14	15	1.1760
15	16	1.1760
16	17	1.1760

Solar production forecasting - panel data¶

Description¶

Setup¶

Dataset¶

Description¶

Sampling period¶

Data¶

Forecasting situations¶

Source¶

Read and preview dataset¶

Upload dataset to the TIM DB¶

Data Visualization¶

Experiment¶

Description¶

Build-model job configuration¶

Build-model job execution¶

Predict job configuration and registration¶

Visualize Results¶

Forecasts Visualization¶

Visualization of farms on which model was not trained¶

Visualization of farms on which model was trained¶

Importances¶

Variable Importances¶

Feature Importances¶

Accuracies¶

Accuracies per farm¶

	Profile	timestamp	date_from	time_from	target	forecast	forecast_type	relative_distance	model_index	samples_ahead	lower_bound	upper_bound	bin
0	1	2019-01-16T02:00:00.000Z	2019-01-15	23:00:00.0	NaN	NaN	InSample	D+1	NaN	3	NaN	NaN	S+1:S+24
1	1	2019-01-16T03:00:00.000Z	2019-01-15	23:00:00.0	0.0	0.001872	InSample	D+1	4.0	4	-0.004618	0.007372	S+1:S+24
2	1	2019-01-16T04:00:00.000Z	2019-01-15	23:00:00.0	0.0	0.003080	InSample	D+1	5.0	5	-0.019896	0.027685	S+1:S+24
3	1	2019-01-16T05:00:00.000Z	2019-01-15	23:00:00.0	0.0	0.002110	InSample	D+1	6.0	6	-0.051457	0.055773	S+1:S+24
4	1	2019-01-16T06:00:00.000Z	2019-01-15	23:00:00.0	0.0	0.021404	InSample	D+1	7.0	7	-0.068703	0.109235	S+1:S+24
...	...	...	...	...	...	...	...	...	...	...	...	...	...
109481	17	2020-12-31T19:00:00.000Z	2020-12-30	23:00:00.0	NaN	0.001390	Production	D+1	20.0	20	-0.010594	0.012757	S+1:S+24
109482	17	2020-12-31T20:00:00.000Z	2020-12-30	23:00:00.0	NaN	0.002745	Production	D+1	21.0	21	-0.000515	0.003438	S+1:S+24
109483	17	2020-12-31T21:00:00.000Z	2020-12-30	23:00:00.0	NaN	0.002807	Production	D+1	22.0	22	-0.000073	0.003168	S+1:S+24
109484	17	2020-12-31T22:00:00.000Z	2020-12-30	23:00:00.0	NaN	0.003142	Production	D+1	23.0	23	0.000282	0.003495	S+1:S+24
109485	17	2020-12-31T23:00:00.000Z	2020-12-30	23:00:00.0	NaN	0.002927	Production	D+1	24.0	24	0.000022	0.003336	S+1:S+24

	Profile	timestamp	date_from	time_from	forecast	forecast_type	relative_distance	model_index	samples_ahead	lower_bound	upper_bound	bin
0	1	2020-01-01T00:00:00.000Z	2019-12-31	23:00:00.0	0.002970	OutOfSample	D+1	1	1	0.000033	0.003413	S+1:S+24
1	1	2020-01-01T01:00:00.000Z	2019-12-31	23:00:00.0	0.003048	OutOfSample	D+1	2	2	0.000075	0.003534	S+1:S+24
2	1	2020-01-01T02:00:00.000Z	2019-12-31	23:00:00.0	0.003110	OutOfSample	D+1	3	3	-0.000141	0.003877	S+1:S+24
3	1	2020-01-01T03:00:00.000Z	2019-12-31	23:00:00.0	0.002963	OutOfSample	D+1	4	4	-0.003526	0.008464	S+1:S+24
4	1	2020-01-01T04:00:00.000Z	2019-12-31	23:00:00.0	0.001303	OutOfSample	D+1	5	5	-0.021674	0.025907	S+1:S+24