# -*- coding: utf-8 -*-
"""
Created on Thu Nov 24 12:38:24 2022

@author: Potamitis Ilyas
"""

import pandas as pd # Python library for data analysis and data frame
import numpy as np # Numerical Python library for linear algebra and computations

# Visualisation libraries
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.figsize'] = (12, 8); # setting the figuresize 

#from datetime import datetime, date #Library to deal with datetime columns

FILE_PATH = 'd:/delivs.csv'
df = pd.read_csv(FILE_PATH)
df.Date = pd.to_datetime(df.Date, dayfirst=True)
df.set_index(['Date'], inplace=True)

ddf = df[['CO2', 'TVOC','TEMP', 'HUM', 'WEIGHT', 'INCNTs', 'OUTCNTs']]
#ddf.set_index(['Date'], inplace=True)

from matplotlib.dates import DateFormatter
Nlayer = 6
plt.rcParams.update({'font.size': 8})
fig, axarr = plt.subplots(Nlayer, sharex='col',gridspec_kw={'hspace': 0, 'wspace': 0})
cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
lines = ["-","-","-","-",'-','-']
columns = ['CO2', 'TVOC','TEMP', 'HUM', 'WEIGHT']
limit = [12000, 7000, 50, 90, 25, -3]
ylimit = [[0,15000],[0,10000],[8,40],[30,90],[20,35]]
labels = ['ppm','ppb','$^\circ$C','RH (%)', 'Kgr']
date_form = DateFormatter("%m-%d")

for i,ax in enumerate(axarr):
    if i==5:break
    ax.plot(ddf[columns[i]], linewidth=1.0, color=cycle[i], linestyle = lines[i], label=columns[i])
    ax.set_ylim(ylimit[i])
    ax.legend(loc='upper right', prop={'size':6})
    ax.axhline(y = limit[i], color = 'r', linestyle = '--')
    ax.grid(linestyle='dashdot')
    ax.set_ylabel(labels[i], fontsize=8)
    ax.xaxis.set_major_formatter(date_form)
axarr[5].plot(ddf['INCNTs'], linewidth=1.0, color=cycle[4], linestyle = lines[4], label='In count')
axarr[5].plot(ddf['OUTCNTs'], linewidth=1.0, color=cycle[4], linestyle = lines[4], label='Out count')
axarr[5].grid(linestyle='dashdot')
axarr[5].legend(loc='upper right', prop={'size':6})

plt.savefig('beeseries.jpg', dpi = 300)

sns.heatmap(ddf.corr(), annot = True)
plt.savefig("Plotting_Correlation_HeatMap.jpg", dpi = 300)

# Some checks

print('Starting date of the data: ', ddf.index.min())
print('Ending date of the data: ', ddf.index.max())
print('Total number of records in available historical data: ', ddf.index.nunique())
print(ddf.isnull().sum())  #missingData

#Graphing boxplots to understand the skewness
plt.figure(figsize=(4,4))
sns.set_style("whitegrid")
sns.boxplot(df['CO2'])
plt.savefig("box_CO2.jpg", dpi = 300)
plt.show()

plt.figure(figsize=(4,4))
sns.set_style("whitegrid")
sns.boxplot(df['TVOC'])
plt.savefig("box_TVOC.jpg", dpi = 300)
plt.show()

plt.figure(figsize=(4,4))
sns.set_style("whitegrid")
sns.boxplot(df['TEMP'])
plt.savefig("box_TEMP.jpg", dpi = 300)
plt.show()

plt.figure(figsize=(4,4))
sns.set_style("whitegrid")
sns.boxplot(df['HUM'])
plt.savefig("box_HUM.jpg", dpi = 300)
plt.show()

plt.figure(figsize=(4,4))
sns.set_style("whitegrid")
sns.boxplot(df['WEIGHT'])
plt.savefig("box_WEIGHT.jpg", dpi = 300)
plt.show()

describe_num_df = ddf.describe(include=['int64','float64'])
describe_num_df.reset_index(inplace=True)

total_columns = describe_num_df.columns
num_col = describe_num_df._get_numeric_data().columns
describe_num_df = describe_num_df[describe_num_df['index'] != 'count']

for i in num_col:
  if i in ['index']:
    continue
  sns.factorplot(x='index', y=i, data=describe_num_df)
  plt.grid()
  plt.savefig(i+'.jpg', dpi = 300)
  plt.show()


from prophet import Prophet

# converting the original dataframe into required format by prophet
FILE_PATH = 'd:/delivs.csv'
df = pd.read_csv(FILE_PATH)
df.Date = pd.to_datetime(df.Date, dayfirst=True)

prophet_df=df[['Date','HUM', 'CO2', 'TVOC', 'TEMP', 'WEIGHT', 'INCNTs', 'OUTCNTs']]
prophet_df.columns=['ds','y', 'CO2', 'TVOC', 'TEMP', 'WEIGHT', 'INCNTs', 'OUTCNTs']
prophet_df.head()

# train test split data
split_date = '4-December-2022'

df_train = prophet_df.loc[prophet_df.ds <= split_date]
df_test = prophet_df.loc[prophet_df.ds > split_date]
df_train['cap'] = 83
df_test['cap'] = 83
#model = Prophet(changepoint_prior_scale=0.01,seasonality_mode='multiplicative')
model = Prophet(weekly_seasonality=False, changepoint_range=1, changepoint_prior_scale=0.75, seasonality_mode='multiplicative', growth='logistic')
model.fit(df_train) #fit training data to model

forecast_train = model.predict(df_train)
forecast_test = model.predict(df_test)

# Prediction Plot
#from prophet.plot import add_changepoints_to_plot

plt.close()
legend = ['y_train', 'y_test', 'prediction_train', 'prediction_test']

fig, ax = plt.subplots(figsize=(15,6))

ax.plot(df_train['ds'], df_train['y'], c='black', marker='o', markersize=2, ls='')
ax.plot(df_test['ds'], df_test['y'], c='r', marker='o', markersize=2, ls='')
ax.plot(forecast_train['ds'], forecast_train['yhat'], ls='-', color='cornflowerblue', alpha=0.7)
ax.plot(forecast_test['ds'], forecast_test['yhat'], ls='-', color='cornflowerblue', alpha=0.7)
ax.fill_between(forecast_test['ds'], forecast_test['yhat_lower'], forecast_test['yhat_upper'], color='b', alpha=0.2)
legend.append('uncertainty_interval_test')
#add_changepoints_to_plot(fig.gca(),model,forecast_train)
ax.legend(legend, bbox_to_anchor=(1.0, 1.05))
ax.set(title='Humidity inside the beehive', xlabel='Date', ylabel='RH (%)')
ax.grid()
plt.tight_layout()
plt.show()

plt.savefig("sensor_Prophet_simple_model_all_data.jpg", dpi = 300)

# Prediction Plot
plt.close()
legend = ['y_test', 'prediction_test']

fig, ax = plt.subplots(figsize=(15,6))

ax.plot(df_test['ds'], df_test['y'], c='r', marker='o', markersize=2, ls='')
ax.plot(forecast_test['ds'], forecast_test['yhat'], ls='-', color='cornflowerblue', alpha=0.7)
ax.fill_between(forecast_test['ds'], forecast_test['yhat_lower'], forecast_test['yhat_upper'], color='b', alpha=0.2)
legend.append('uncertainty_interval_test')
ax.legend(legend, bbox_to_anchor=(1.0, 1.05))
ax.set(title='Humidity inside the beehive', xlabel='Date', ylabel='RH (%)')
ax.grid()
plt.tight_layout()
plt.show()

plt.savefig("sensor_Prophet_simple_model_test_data.jpg", dpi = 300)


from sklearn.metrics import r2_score

def MAPE(actual, forecast):
    actual, forecast = np.array(actual), np.array(forecast)
    return np.mean([100*abs((actual[i]-forecast[i])/actual[i]) for i in range(len(actual))])

def metrics(df_train, forecast_train, df_test, forecast_test):
    MAPE_metric = pd.DataFrame(index=['MAPE'],
                               data={'Train': [MAPE(df_train['y'], forecast_train['yhat'])],
                                    'Test': [MAPE(df_test['y'], forecast_test['yhat'])]})
    r2_metric = pd.DataFrame(index=['R2'],
                             data={'Train': [100*r2_score(y_true=df_train['y'], y_pred=forecast_train['yhat'])],
                                   'Test': [100*r2_score(y_true=df_test['y'], y_pred=forecast_test['yhat'])]})
    return pd.concat([MAPE_metric, r2_metric])


metrics(df_train, forecast_train, df_test, forecast_test)


######## add regressors

model = Prophet(changepoint_prior_scale=0.01,seasonality_mode='multiplicative', growth='logistic')
model.add_regressor('TEMP', prior_scale=0.5, mode='multiplicative')
model.add_regressor('CO2', prior_scale=0.5, mode='multiplicative')
model.add_regressor('TVOC', prior_scale=0.5, mode='multiplicative')
model.add_regressor('WEIGHT', prior_scale=0.5, mode='multiplicative')

model.fit(df_train) #fit training data to model
forecast_train = model.predict(df_train)
forecast_test = model.predict(df_test)

metrics(df_train, forecast_train, df_test, forecast_test)


# Prediction Plot
plt.close()
legend = ['y_test', 'prediction_test']

fig, ax = plt.subplots(figsize=(15,6))

ax.plot(df_test['ds'], df_test['y'], c='r', marker='o', markersize=2, ls='')
ax.plot(forecast_test['ds'], forecast_test['yhat'], ls='-', color='cornflowerblue', alpha=0.7)
ax.fill_between(forecast_test['ds'], forecast_test['yhat_lower'], forecast_test['yhat_upper'], color='b', alpha=0.2)
legend.append('uncertainty_interval_test')
ax.legend(legend, bbox_to_anchor=(1.0, 1.05))
ax.set(title='Humidity inside the beehive', xlabel='Date', ylabel='RH (%)')
ax.grid()
plt.tight_layout()
plt.show()

plt.savefig("sensor_Prophet_additional_regressors_test_data.jpg", dpi = 300)


fig2 = model.plot_components(forecast_test)
plt.show()

from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
from prophet.plot import plot_cross_validation_metric

df_cv = cross_validation(model, initial='30 days', period='15 days', horizon = '2 days')
df_p = performance_metrics(df_cv)
#plot = plot_cross_validation_metric(df_cv, metric = 'mae')
plot = plot_cross_validation_metric(df_cv, metric = 'mape')
plt.savefig("cross_validation_data.jpg", dpi = 300)


#################
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
import xgboost as xgb
from xgboost import plot_importance

sns.set(rc={'figure.figsize':(11*2, 4*2)})

def create_features(df, label=None):
    """
    Creates time series features from datetime index
    """
    df['date'] = df.index
    df['hour'] = df['date'].dt.hour
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    
    X = df[['hour','dayofweek','dayofyear','dayofmonth', 'CO2','TVOC', 'WEIGHT',  'INCNTs', 'OUTCNTs', 'TEMP']]
    if label:
        y = df[label]
        return X, y
    return X

# split data
df_hour=ddf
#split_date = '20-November-2022'
df_train = df_hour.loc[df_hour.index <= split_date].copy()
df_test = df_hour.loc[df_hour.index > split_date].copy()

X_train, y_train = create_features(df_train, label='HUM')
X_test, y_test = create_features(df_test, label='HUM')

# Let's plot the cross-correlation matrix
plt.figure(figsize=(20,10))
sns.heatmap(X_train.corr().round(2), vmin=-1, vmax=1, center=0, annot=True, cmap='viridis')
plt.show()

reg = xgb.XGBRegressor(n_estimators=100, max_depth= 6, colsample_bylevel = 0.7, learning_rate = 0.1)
reg.fit(X_train, y_train,
        eval_set=[(X_train, y_train)],
        early_stopping_rounds=50,
       verbose=False) # Change verbose to True if you want to see it train

_ = plot_importance(reg, height=0.9)
plt.savefig('XGBoost_plt.savefig', dpi = 300)

df_test['Prediction'] = reg.predict(X_test).clip(0)

print(MAPE(reg.predict(X_train),y_train))
print(MAPE(reg.predict(X_test),y_test))
print(100*r2_score(y_train, y_pred=reg.predict(X_train)))
print(100*r2_score(y_test, y_pred=reg.predict(X_test)))

# plot expected vs actual
y_true = y_test
y_pred = df_test['Prediction']
plt.plot(y_true, label='Actual')
plt.plot(y_pred, label='Predicted')
plt.grid(visible=True)
plt.legend()
plt.show()

plt.savefig("sensor_XGBoost_test_data.jpg", dpi = 300)

########
# some more error metrics
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print("mse = " + str(round(mse,2)))
print("mae = " + str(round(mae,2)))
    

