Stochastic experiments in stabilisation of money market benchmarks

The main input of this research is a stochastic model of a theoretical panel of contributors (banks) to a money market index. The model proved to constitute a useful environment for testing various index formulae, their characteristics and some trade-offs that may arise while deciding on the particular benchmark’s design.  It may be also used to evaluate indices without historical data or stress them against different scenarios of adverse changes in market conditions or panellists’ behaviour. The hypothetical problems with: changes in the panel’s composition as well as the irregularity of daily contributions may strongly influence the utility of a final benchmark to be used in medium and long term loan contracts, especially with retail clients. Our focus is on several selected classes of benchmarks’ formulae that are derived from the raw index and allow for some confinement of the mentioned drawbacks while decreasing quality measured by other criteria (goodness of fit). The set of classes include: the geometric time weights with different smoothing parameters and observation window’s length used on the original raw index, stabilisation of the raw index in bands, rolling window volume weights rebalancing and finally the geometric time weights performed on log-volume transformed index. The potential trade-offs in such a benchmark’s stabilisation efforts are shown.

Published in Bezpieczny Bank (Safe Bank) No 4 (73) 2018 pp 44-61

Additional files

optimalSetsComparedMAC_MAE

optimalSetsComparedSD_MAE

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 29 20:23:48 2019

@author: marcindec
"""

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Mar  3 16:35:04 2019

@author: marcindec
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from workalendar.europe import Poland
import seaborn as sns
from scipy.stats import norm
from scipy.optimize import fsolve, minimize, basinhopping, brute, root
import os
from scipy.integrate import quad
from timeit import default_timer as timer
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
from matplotlib.ticker import MaxNLocator
from multiprocessing import Pool
from functools import partial
from datetime import datetime

path="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN"
os.chdir(path)


#%% Uploading previously prepared bases:

volbase = pd.read_hdf('full_base_vol_withVix.h5')                  # Volume base including VIX index
bondbase = pd.read_hdf('bondbase_basic.h5')                        # Basic info on fixings
bondbase_add = pd.read_hdf('bondbase_add_final5.h5')               # Extended info (OA, w, firstdate, prev ytm etc..)
fitNSSbase = pd.read_hdf('NSS_SLSQP__2try_tol5_1_3562.h5')         # All fitted NSS values
thebase=pd.read_hdf('thebaseApril.h5')                             # bond/day database with all fit, rich/cheap etc


# Technical bases needed for some functions to operate
operationsbase=pd.read_hdf('operationsbase.h5')                    # Base of all bonds operations ever (PL Min Fin)
couponsbase=pd.read_hdf('couponsbase.h5')                          # Coupon period dates base 

VIX



#%%
#l12=bondbase_add.index.unique()
#fitNSSbase2=fitNSSbase.set_index("date")
#thebase=bondbase_add.join(fitNSSbase2)
#thebase.dropna(subset=['b0'],inplace=True);











# %% USED FUNCTIONS 


# Returning the table of all interest periods and cpn cash flows of a given bond
def cpn_period_table(bond):
    # take maturity of a bond if zero coupon
    if 'OK' in bond:
        maturity=operationsbase.xs(bond, level=1, drop_level=False).mat_date[0]
        begining=operationsbase.xs(bond, level=1, drop_level=False).index[0][0]
        t=pd.DataFrame(data={'beg':[begining],'end':[maturity],'exdiv':[maturity],'cpnsetl':[maturity],'cpn_value':[0]})
    else:
        t=couponsbase.loc[bond].replace({'-': np.nan});
        t.dropna(inplace=True);
        t.drop(index=['Kod ISIN', 'Data wykupu', 'Kupon'], inplace=True);
        t=t.tolist()
        t=np.reshape(t, (int(len(t)/5),5));
        t=pd.DataFrame(t, columns=['beg','end','exdiv','cpnsetl','cpn_value'])
    return t


# Returning accrued interest of a given bond on a given settlement day (per 1000PLN notional)
def acc_int(bond, date_settl):  
    
    if 'OK' in bond:
        accint=0;
    else:
    
        vv=cpn_period_table(bond)
        date_set=pd.Timestamp(date_settl)
    
        if (date_set>=vv['end'][len(vv)-1]) |  (date_set<=vv['beg'][0]):
            accint=0;
        else:
            # calculate the period number we are in (0,1,2, ... 10 etc) STARTING FROM ZERO!
            if vv['beg'][len(vv)-1]<date_set:
                curr_per_nr=len(vv)-1
            else:
                curr_per_nr = min(vv[vv['beg']>=date_set].index)-1
                
            # ex-dividend period !!!
            if (date_set>vv['exdiv'][curr_per_nr]) & (date_set<=vv['end'][curr_per_nr]):
                accint=0;     
            else:
                nr_of_days_period=(vv['end'][curr_per_nr]-vv['beg'][curr_per_nr]).days
                nr_of_days_accrued=(date_set-vv['beg'][curr_per_nr]).days
                accint=(nr_of_days_accrued/nr_of_days_period)*vv['cpn_value'][curr_per_nr]
                accint=round(accint,2) 
                
    return accint


def acc_int2(bond, date_settl):  
    
    if 'OK' in bond:
        accint=0;
    else:
    
        vv=cpn_period_table(bond)
        #date_set=pd.Timestamp(date_settl)
        date_set=date_settl;
        
        if (date_set>=vv['end'][len(vv)-1]) |  (date_set<=vv['beg'][0]):
            accint=0;
        else:
            # calculate the period number we are in (0,1,2, ... 10 etc) STARTING FROM ZERO!
            if vv['beg'][len(vv)-1]<date_set:
                curr_per_nr=len(vv)-1
            else:
                curr_per_nr = min(vv[vv['beg']>=date_set].index)-1
                
            # ex-dividend period !!!
            if (date_set>vv['exdiv'][curr_per_nr]) & (date_set<=vv['end'][curr_per_nr]):
                accint=0;     
            else:
                nr_of_days_period=(vv['end'][curr_per_nr]-vv['beg'][curr_per_nr]).days
                nr_of_days_accrued=(date_set-vv['beg'][curr_per_nr]).days
                accint=(nr_of_days_accrued/nr_of_days_period)*vv['cpn_value'][curr_per_nr]
                accint=round(accint,2) 
                
    return accint


# Returning a table of CF and yr_frac for a bond from a given settlement day till maturity (per 1000PLN notional)
def cf_table(bond, date_settl):  
    vv=cpn_period_table(bond)
    date_set=pd.Timestamp(date_settl)
    cf_table=[];
    if (date_set>=vv['end'][len(vv)-1]) |  (date_set<=vv['beg'][0]):
        cf_table=[];
    else:
        # calculate the period number we are in (0,1,2, ... 10 etc) STARTING FROM ZERO!
        if vv['beg'][len(vv)-1]<date_set:
            curr_per_nr=len(vv)-1
        else:
            curr_per_nr = min(vv[vv['beg']>=date_set].index)-1
        
        # ex-dividend period !!!
        if (date_set>vv['exdiv'][curr_per_nr]) & (date_set<=vv['end'][curr_per_nr]):
            #cf_table=[];    
             for i in range(curr_per_nr+1, len(vv)):
                nr_of_days_since_settlement=(vv['end'][i]-date_set).days
                yr_frac=round(nr_of_days_since_settlement/365,6)
                if i==(len(vv)-1):
                    cf=1000+vv['cpn_value'][i]
                else:
                    cf=vv['cpn_value'][i]
                cf_table.append([i, yr_frac, cf])           
            
        
        else:
            for i in range(curr_per_nr, len(vv)):
                nr_of_days_since_settlement=(vv['end'][i]-date_set).days
                yr_frac=round(nr_of_days_since_settlement/365,6)
                if i==(len(vv)-1):
                    cf=1000+vv['cpn_value'][i]
                else:
                    cf=vv['cpn_value'][i]
                cf_table.append([i, yr_frac, cf])
            
    return cf_table


# Settlement date after N days from trade date Polish calendar
def settle_day(N, trade_day):
    cal=Poland()
    date_trade=pd.Timestamp(trade_day)
    is_hol=cal.is_working_day(date_trade)
    settle_day=cal.add_working_days(date_trade, N)
    return settle_day, is_hol

def settle_day2(N, trade_day):
    cal=Poland()
    date_trade=trade_day
    is_hol=cal.is_working_day(date_trade)
    settle_day=cal.add_working_days(date_trade, N)
    return settle_day, is_hol




def NSS_RATES(T, b0, b1, b2, b3, t1, t2):
    yc_rate=b0+(b1+b2)*(1-np.exp(-T/t1))/(T/t1)-b2*np.exp(-T/t1)+b3*((1-np.exp(-T/t2))/(T/t2) - np.exp(-T/t2))
    fw_rate=b0+ b1*np.exp(-T/t1) +b2*(T/t1)*np.exp(-T/t1) +b3*(T/t2)*np.exp(-T/t2)
    slope=b1*((1-np.exp(-T/t1))/(T/t1))
    garb1=b2*((1-np.exp(-T/t1))/(T/t1)-np.exp(-T/t1))
    garb2=b3*((1-np.exp(-T/t2))/(T/t2)-np.exp(-T/t2))
    return yc_rate, fw_rate, slope, garb1, garb2


def NSS_YC(b0, b1, b2, b3, t1, t2):
    horizon_line = np.linspace(0,12, 120)
    val1 = NSS_RATES(horizon_line,b0, b1, b2, b3, t1, t2)[0]
    val2 = NSS_RATES(horizon_line,b0, b1, b2, b3, t1, t2)[1]
    val3 = NSS_RATES(horizon_line,b0, b1, b2, b3, t1, t2)[2]
    val4 = NSS_RATES(horizon_line,b0, b1, b2, b3, t1, t2)[3]
    val5 = NSS_RATES(horizon_line,b0, b1, b2, b3, t1, t2)[4]
    return val1, val2, val3, val4, val5


def PRC(bond, trade_date, acc_int, YTM): # returning a clean price of a bond given YTM [0,1]!! i.e 0.03 
    set_date = settle_day(2, trade_date)[0]
    tab=cf_table(bond, set_date)
    prc=-acc_int  
    
    if (len(tab)==1) & (tab[0][2]!=1000):
        prc=prc+tab[0][2]/(1+YTM*tab[0][1])
    else:
        for i in range(len(tab)):
            prc=prc+tab[i][2]/((1+YTM)**(tab[i][1]))    
    return prc


def YTM(bond, trade_date, acc_int, price): # returning a clean price of a bond given YTM
    # price is given per 1000 PLN notional !!
    def f(x):
        pp=PRC(bond, trade_date, acc_int, x)-price
        return pp
    ytm=fsolve(f,0.05)
    return ytm


def PRC_NSS(bond, trade_date, acc_int, NSS): # returning a clean price of a bond given NSS params of a YC 
    b0=NSS[0]; b1=NSS[1]; b2=NSS[2]; b3=NSS[3]; t1=NSS[4]; t2=NSS[5];
    set_date = settle_day(2, trade_date)[0]
    tab=cf_table(bond, set_date)
    prc=-acc_int  
    for i in range(len(tab)):
        prc=prc+tab[i][2]/((1+NSS_RATES(tab[i][1], b0, b1, b2, b3, t1, t2)[0])**(tab[i][1]))    
    return prc


def SENS(bond, trade_date, YTM_PRC): # returning a vector of Duration, ModifiedDuration and Convexity 
    # Handling of PRICE/YTM dual argument
    # if >10 then we treat it as PRICE, YTM otherwise
    # ytm in [0,1] format
    
    set_date = settle_day(2, trade_date)[0]
    AI=acc_int(bond, set_date)
    
    if YTM_PRC>10:
        ytm=YTM(bond, trade_date, AI, YTM_PRC)
        prc=YTM_PRC
    else:
        ytm=YTM_PRC
        prc=PRC(bond, trade_date, AI, YTM_PRC)
        
    tab=cf_table(bond, set_date)
    P=prc+AI
    
    D=0; C=0
    
    for i in range(len(tab)):
        D=D+(tab[i][1]*tab[i][2])/((1+ytm)**(tab[i][1]))
        C=C+((tab[i][1]+1)*tab[i][1]*tab[i][2])/((1+ytm)**(tab[i][1]+2))
    
    D=D/P; C=C/P;
    MD=-D/(1+ytm)
    return D, MD, C


def outstanding(bond, date):   # Outstanding amount for a given date and bond
    amnt=sum(operationsbase.loc[(pd.date_range('1994-02-17', date),bond), ('net_sale')]);
    return amnt

def outstanding2(bonds, date):   # Outstanding amounts for a list of bonds
    amnts=[]
    for bond in bonds:
        amnt=sum(operationsbase.loc[(pd.date_range('1994-02-17', date),bond), ('net_sale')]);
        amnts.append([bond, amnt])
    return amnts

def outstanding3(date, fix_nr):   # Outstanding amounts on a certain date and fix_nr and date (all bonds fixed on that date)
    list_of_bonds=bondbase.loc[(date,fix_nr)].index.tolist();
    aaam= outstanding2(list_of_bonds, date)
    tot_aaam=sum([aaam[i][1] for i in range(len(aaam))])
    for i in range(len(aaam)):
        aaam[i]=[aaam[i][0], aaam[i][1], aaam[i][1]/tot_aaam]; # with weights
    return aaam


# COMPARISON OF BONDSPOT'S and MY CALCULATIONS OF YTM
# Single fixing:
    
def tab_price_list_full(trade_date, fix_nr):
    tab=bondbase.loc[trade_date,fix_nr, :]
    tab=tab.reset_index()
    bond_list=tab['bond'].tolist()
    outstan=outstanding3(trade_date,fix_nr)
    outstan=pd.DataFrame(outstan)
    
    ytm=[]
    weights=[]
    duration=[]
    mdur=[]
    convexity=[]
    ai=[]
    yr_frac=[]
    
    for bond in bond_list:
        price=np.asscalar(tab[tab['bond']==bond].px_fix)*10;
        ytm.append(round(np.asscalar(YTM(bond, trade_date, acc_int(bond, settle_day(2, trade_date)[0]),price)*100),2))
        weights.append(round(np.asscalar( outstan[outstan[0]==bond][2]) ,4))
        duration.append(np.asscalar(SENS(bond, trade_date, price)[0]))
        mdur.append(np.asscalar(SENS(bond, trade_date, price)[1]))
        convexity.append(np.asscalar(SENS(bond, trade_date, price)[2]))
        ai.append(round(acc_int(bond, settle_day(2, trade_date)[0]) ,2))
        yr_frac.append(round((operationsbase.xs(bond, level=1, drop_level=False).mat_date[0]-pd.Timestamp(settle_day(2, trade_date)[0])).days/365,4))

    tab['ytm_my']=ytm
    tab['ytm_fix']=tab['ytm_fix'].astype(float)
    tab['diff']=tab.ytm_my-tab.ytm_fix
    tab['acc_int']=ai
    tab['weight_OA']=weights
    tab['D']=duration
    tab['MD']=mdur
    tab['C']=convexity
    tab['yr_frac']=yr_frac

    return tab


def tab_price_list_compact(trade_date, fix_nr):
    tab=bondbase.loc[trade_date,fix_nr, :]
    tab=tab.reset_index()
    bond_list=tab['bond'].tolist()
    outstan=outstanding3(trade_date,fix_nr)
    outstan=pd.DataFrame(outstan)
    
    ytm=[]
    weights=[]
    mdur=[]
    ai=[]
    
    for bond in bond_list:
        price=np.asscalar(tab[tab['bond']==bond].px_fix)*10;
        ytm.append(round(np.asscalar(YTM(bond, trade_date, acc_int(bond, settle_day(2, trade_date)[0]),price)*100),2))
        weights.append(round(np.asscalar( outstan[outstan[0]==bond][2]) ,4))
        mdur.append(np.asscalar(SENS(bond, trade_date, price)[1]))
        ai.append(round(acc_int(bond, settle_day(2, trade_date)[0]) ,2))

    tab['ytm_my']=ytm
    tab['ytm_fix']=tab['ytm_fix'].astype(float)
    tab['diff']=tab.ytm_my-tab.ytm_fix
    tab['acc_int']=ai
    tab['weight_OA']=weights
    tab['MD']=mdur
    tab.drop(columns=['fix_nr','ytm_fix','isin', 'px_bid','p_ask', 'ytm_bid', 'ytm_ask'], inplace=True);
    
    return tab


# All Data:
listaaa=bondbase.index.unique(level='date') # list of all unique dates in bondbase
l2=listaaa[3562:]
pd.set_option('display.max_columns', None)

def do_checks(l2, filename):
    baza=[]
    for dd in l2:
        xxx=tab_price_list_compact(dd, 2)
        baza.append(xxx)
        print(xxx)
    mm=pd.concat(baza)
    mm.to_excel(str(filename)+'.xlsx')
    #return baza

def do_checks_all(lista, filename):
    for i in range(14):
        do_checks(lista[i*178:(i+1)*178], filename+str(i))
        

#%% OBJECTIVE FUNCTIONS
   
# PRC_NSS(bond, trade_date, acc_int, NSS),      0.0287, -0.003, 0.01, 0.01, 1, 1.85
#maintab22=tab_price_list_full('2019-02-28', 2)       # create table with all possible info on bond prior to evaluating Objective Function  


def ObjFun1(params_nss, *params ):   
    # 1= version with MD and outstanding amount weights (on squared prx differences)
     maintab=params[0]
     trade_date=maintab['date'][0]
     bondlist=maintab['bond'].tolist()
     value1=0

     for bond in bondlist:
         price_fix=np.asscalar(maintab[maintab['bond']==bond].px_fix)*10;
         acc_i=np.asscalar(maintab[maintab['bond']==bond].acc_int)
         price_nss=PRC_NSS(bond, trade_date, acc_i, params_nss)
         w=np.asscalar(maintab[maintab['bond']==bond].weight_OA) / (-np.asscalar(maintab[maintab['bond']==bond].MD))
         value1=value1+w*((price_nss-price_fix)**2)
         #print(price_fix, price_nss, w)
  
     return value1
   

     
def Opt1(x00, met1, params):   # 'Nelder-Mead'   ,   ’SLSQP’
    # bb=brute(ObjFun1, ((0.0001,0.01),(-0.09,0.09), (0.0001,0.01),(-0.09,0.09)), Ns=4, full_output=True)
    # x0=[0.0247, -0.003, 0.01, 0.01, 1, 1.85]
    # bb=minimize(ObjFun1, x0 ,method=met1, tol=1e-3)
    if (met1=='SLSQP')|(met1=='COBYLA'):
        const_nss= ({'type': 'ineq', 'fun': lambda x:  x[0]}, {'type': 'ineq', 'fun': lambda x:  x[0]+x[1]}, {'type': 'ineq', 'fun': lambda x:  x[4]}, {'type': 'ineq', 'fun': lambda x:  x[5]-x[4]}  )
        opt_nss_cobyla={'rhobeg': 1, 'maxiter': 3000, 'disp': False, 'catol': 0.0002}
        opt_nss_slsqp={'maxiter': 3000, 'disp': False}
        if met1=='SLSQP':
            opt_nss_this=opt_nss_slsqp
        else:
            opt_nss_this=opt_nss_cobyla
            
        bb=minimize(fun=ObjFun1, x0=x00 ,args=params, method=met1,constraints=const_nss, tol=1e-5, options=opt_nss_this)
    
    elif met1=='Nelder-Mead':
        bb=minimize(fun=ObjFun1, x0=x00 ,args=params, method=met1,tol=1e-3)
        
    return bb.x, bb.fun, bb.message, bb.success#, bb.nit
    


def create_init_params(bond_table, density_per_param, betaAbsMax, spreadBetas12):
    # density_per_param=[5,5,5,5,5,5]
    ytm_fix_LONG=bond_table['ytm_fix'][bond_table.index[-1]]
    ytm_fix_SHORT=bond_table['ytm_fix'][0]
    beta0=ytm_fix_LONG/100
    beta1=(ytm_fix_SHORT-ytm_fix_LONG)/100
    beta2absMax=betaAbsMax
    beta3absMax=betaAbsMax
    tau1=1
    tau2=3
    tauMid=(tau2+tau1)/2
    
    beta0range=np.linspace(np.max(beta0-spreadBetas12,0), beta0+spreadBetas12, density_per_param[0])
    beta1range=np.linspace(beta1-spreadBetas12, beta1+spreadBetas12, density_per_param[1])
    beta2range=np.linspace(-beta2absMax, beta2absMax, density_per_param[2])
    beta3range=np.linspace(-beta3absMax, beta3absMax, density_per_param[3])
    tau1range=np.linspace(0.01, tauMid, density_per_param[4])
    tau2range=np.linspace(tauMid+0.01, 7, density_per_param[5])
    
    table_params=[]
    
    for b0 in beta0range:
        for b1 in beta1range:
            for b2 in beta2range:
                for b3 in beta3range:
                    for t1 in tau1range:
                        for t2 in tau2range:
                            table_params.append([b0,b1,b2,b3,t1,t2])
        
    # check feasibility ???
    
    return table_params


def do_init_eval_old(bond_table, density_per_param, betaAbsMax, spreadBetas12):
    tableX=create_init_params(bond_table, density_per_param, betaAbsMax, spreadBetas12)    
    #timess=density_per_param[0]*density_per_param[1]*density_per_param[2]*density_per_param[3]*density_per_param[4]*density_per_param[5]
    #print(timess)
    
    OBV=[]
    
    for i in range(len(tableX)):
        ObjVal=ObjFun1([tableX[i][0],tableX[i][1],tableX[i][2],tableX[i][3],tableX[i][4],tableX[i][5]], bond_table)
        OBV.append(ObjVal)
    
    bbbbb=pd.DataFrame(tableX, OBV)
    bbbbb = bbbbb.reset_index();
    bbbbb.columns=['ObFuVal', 'b0', 'b1', 'b2', 'b3', 't1', 't2']
    bbbbb= bbbbb.sort_values(by=['ObFuVal'], ascending=True)
    
    return bbbbb.head(10)


def f2(t, btab):
    rr=ObjFun1(t, btab )
    return rr

# new version of the function - with multiprocessing
def do_init_eval(bond_table, density_per_param, betaAbsMax, spreadBetas12):
    tableX=create_init_params(bond_table, density_per_param, betaAbsMax, spreadBetas12)    
     
    func = partial(f2, btab=bond_table)
    
    p = Pool()
    obj_fu = p.map(func, tableX)
  
    bbbbb=pd.DataFrame(tableX, obj_fu)
    bbbbb = bbbbb.reset_index();
    bbbbb.columns=['ObFuVal', 'b0', 'b1', 'b2', 'b3', 't1', 't2']
    bbbbb= bbbbb.sort_values(by=['ObFuVal'], ascending=True)
    
    return bbbbb.head(10)



def is_nss_feasible(params):
    if (params[0]>0) & (params[0]+params[1]>0) & (params[4]>0) & (params[5]>params[4]):
        return True
    else:
        return False


def tab_price_list_full_nss(trade_date, fix_nr, params_nss):
    #tab=bondbase.loc[trade_date,fix_nr, :]
    #tab=tab.reset_index()
    tab=bondbase_add.loc[trade_date] 
    
    bond_list=tab['bond'].tolist()
    outstan=outstanding3(trade_date,fix_nr)
    outstan=pd.DataFrame(outstan)
    
    ytm=[]
    weights=[]
    duration=[]
    mdur=[]
    convexity=[]
    ai=[]
    yr_frac=[]
    ytm_nss=[]
    ytm_nss_in_bid_ask=[] 
    
    for bond in bond_list:
        price=np.asscalar(tab[tab['bond']==bond].px_fix)*10;
        ytm.append(round(np.asscalar(YTM(bond, trade_date, acc_int(bond, settle_day(2, trade_date)[0]),price)*100),2))
        weights.append(round(np.asscalar( outstan[outstan[0]==bond][2]) ,4))
        duration.append(np.asscalar(SENS(bond, trade_date, price)[0]))
        mdur.append(np.asscalar(SENS(bond, trade_date, price)[1]))
        convexity.append(np.asscalar(SENS(bond, trade_date, price)[2]))
        ai.append(round(acc_int(bond, settle_day(2, trade_date)[0]) ,2))
        yr_frac.append(round((operationsbase.xs(bond, level=1, drop_level=False).mat_date[0]-pd.Timestamp(settle_day(2, trade_date)[0])).days/365,4))
        
        pricenss=PRC_NSS(bond, trade_date, acc_int(bond, settle_day(2, trade_date)[0]), params_nss)
        ytmtech=round(np.asscalar(YTM(bond, trade_date, acc_int(bond, settle_day(2, trade_date)[0]),pricenss)*100),2)
        ytm_nss.append(ytmtech)
        ytm_nss_in_bid_ask.append(((ytmtech<=tab[tab['bond']==bond].ytm_bid.astype(float)) & (ytmtech>=tab[tab['bond']==bond].ytm_ask.astype(float))).bool())
 
    tab['ytm_my']=ytm
    tab['ytm_fix']=tab['ytm_fix'].astype(float)
    tab['diff']=tab.ytm_my-tab.ytm_fix
    tab['acc_int']=ai
    tab['weight_OA']=weights
    tab['D']=duration
    tab['MD']=mdur
    tab['C']=convexity
    tab['yr_frac']=yr_frac

    tab['ytm_nss']=ytm_nss
    tab['ytm_diff']=tab['ytm_nss']-tab['ytm_fix'].astype(float)
    tab['ytm_nss_in_BA']=ytm_nss_in_bid_ask
        
    ss=tab['ytm_diff'].astype(float).abs().mean()
    ss_max=tab['ytm_diff'].astype(float).abs().max()
    how_many_in_BA=tab[tab['ytm_nss_in_BA']==True].sum(axis=0).ytm_nss_in_BA
    prc_in_BA=how_many_in_BA/len(bond_list)


    return tab, ss, ss_max, how_many_in_BA, prc_in_BA




def do_nss_fitting(date_nr_start, date_nr_end, method, bestN, filename):
    listaaa=bondbase.index.unique(level='date') # list of all unique dates in bondbase
    l2=listaaa[date_nr_start:date_nr_end]
    #Mega_base=[]
    col_names=['date', 'b0', 'b1', 'b2', 'b3', 'tau1', 'tau2',  'ObFuVal', 'diff_avg', 'diff_max', 'prc_in_BA', 'time1', 'time2', 'time3', 'time4']
    mega_df=pd.DataFrame(columns=col_names)
    
    for date in l2: 
        # Creating table of all possible info about bonds fixed on THE given date
        start1=timer()
        maintab=tab_price_list_full(date, 2)
        end1=timer()
        print(date, end1-start1)
        
        # Creating 10 best starting vectors
        start2=timer()
        best_start_params=do_init_eval(maintab, [5,3,3,3,3,7], 0.03, 0.005)   # [5,3,3,3,3,7], 0.03, 0.005)
        end2=timer()
        print(date, end2-start2)
        
        # Fitting NSS params and 10 best starting vectors
        all_optimised_10=[]
        list_10_ObjFunVal=[]
        start3=timer()
        for j in range(bestN):
            print(j, '-th optimisation')
            v=best_start_params.iloc[j]
            x0best=[v[1], v[2], v[3], v[4], v[5], v[6]]
            current_best=Opt1(x0best, method, maintab)
            all_optimised_10.append([current_best[0], current_best[1], current_best[2], current_best[3]])
            list_10_ObjFunVal.append(current_best[1])
            
        index_min = list_10_ObjFunVal.index(min(list_10_ObjFunVal))
        vb=all_optimised_10[index_min][0]
        nss_params_best=[vb[0], vb[1], vb[2], vb[3], vb[4], vb[5]]
        nss_obj_val_best=all_optimised_10[index_min][1]
        end3=timer()
        print(date, end3-start3)
        
        # Calculating YTM diffs
        start4=timer()
        bond_list=maintab['bond'].tolist()
        ytm_nss=[]
        ytm_nss_in_bid_ask=[]
        
        for bond in bond_list:
            price=PRC_NSS(bond, date, acc_int(bond, settle_day(2, date)[0]), nss_params_best)
            ytmtech=round(np.asscalar(YTM(bond, date, acc_int(bond, settle_day(2, date)[0]),price)*100),2)
            ytm_nss.append(ytmtech)
            ytm_nss_in_bid_ask.append(((ytmtech<=maintab[maintab['bond']==bond].ytm_bid.astype(float)) & (ytmtech>=maintab[maintab['bond']==bond].ytm_ask.astype(float))).bool())
            
        maintab['ytm_nss']=ytm_nss
        maintab['ytm_diff']=maintab['ytm_nss']-maintab['ytm_fix'].astype(float)
        maintab['ytm_nss_in_BA']=ytm_nss_in_bid_ask
        
        ss=maintab['ytm_diff'].astype(float).abs().mean()
        ss_max=maintab['ytm_diff'].astype(float).abs().max()
        how_many_in_BA=maintab[maintab['ytm_nss_in_BA']==True].sum(axis=0).ytm_nss_in_BA
        prc_in_BA=how_many_in_BA/len(bond_list)
        
        end4=timer()
        print(date, end4-start4)
        
        print(date, vb[0], vb[1], vb[2], vb[3], vb[4], vb[5],  nss_obj_val_best, ss, ss_max, prc_in_BA, end1-start1, end2-start2, end3-start3, end4-start4 )
        
        #Mega_base.append([date, vb[0], vb[1], vb[2], vb[3], vb[4], vb[5],  nss_obj_val_best, ss, ss_max, prc_in_BA, end1-start1, end2-start2, end3-start3, end4-start4 ])
         
        row_df=pd.DataFrame([[date, vb[0], vb[1], vb[2], vb[3], vb[4], vb[5],  nss_obj_val_best, ss, ss_max, prc_in_BA, end1-start1, end2-start2, end3-start3, end4-start4]], columns=col_names )
        
        mega_df = pd.concat([mega_df,row_df])
        mega_df.to_hdf(str(filename)+str(date_nr_start)+'_'+str(date_nr_end)+'.h5', 'df')
        
    return mega_df
    
# cc=pd.DataFrame([['2019-02-06', 0.025370845832487512,-0.010642277941949396,-0.04456286905151759]], columns=['date','a', 'b', 'c'] )
# mega = pd.concat(Mega_base)
# mega.to_hdf(str(filename_root), 'df')
# baza_nss = pd.read_hdf('NSS1_1_11.h5')



# New exercises with multiprocessing
       
def f3(t, methodss, maintabss):
    rr=Opt1(t, methodss, maintabss)
    return rr

  
def do_nss_fitting_multi(date_nr_start, date_nr_end, method, bestN, filename):
    listaaa=bondbase.index.unique(level='date') # list of all unique dates in bondbase
    l2=listaaa[date_nr_start:date_nr_end]
    col_names=['date', 'b0', 'b1', 'b2', 'b3', 'tau1', 'tau2',  'ObFuVal', 'diff_avg', 'diff_max', 'prc_in_BA', 'time1', 'time2', 'time3', 'time4', 'OFV_improve', 'ytm_L', 'ytm_S']
    mega_df=pd.DataFrame(columns=col_names)
    ile_razem=date_nr_end-date_nr_start
    ile_zrobione=0
    
    for date in l2: 
        # Creating table of all possible info about bonds fixed on THE given date
        start1=timer()
        maintab=tab_price_list_full(date, 2)
        end1=timer()
        #print(date, end1-start1)
        
        # Creating 10 best starting vectors
        start2=timer()
        best_start_params=do_init_eval(maintab, [5,3,3,3,3,7], 0.03, 0.005)   # [5,3,3,3,3,7], 0.03, 0.005)
        end2=timer()
        #print(date, end2-start2)
        print(best_start_params)
        
        ObFuVal_bestguess=best_start_params.ObFuVal.tolist()[0]
        
        # Fitting NSS params and 10 best starting vectors
        all_optimised_10=[]
        list_10_ObjFunVal=[]
        
        start3=timer()
        
        ## MULTIPROCSESSING START
        # Preparing list of lists/tuples as arguments for optimisation function
        Row_list =[] 
        for index, rows in best_start_params.iterrows(): 
            my_list =[rows.b0, rows.b1, rows.b2, rows.b3, rows.t1, rows.t2] 
            Row_list.append(my_list) 
        Row_list=Row_list[0:bestN]
        
        func = partial(f3, methodss=method, maintabss=maintab)        
        p = Pool()
        u = p.map(func, Row_list)
        ### MULTIPROCSESSING END
        
        for j in range(bestN):
            all_optimised_10.append([u[j][0], u[j][1], u[j][2], u[j][3]])
            list_10_ObjFunVal.append(u[j][1])
        
        index_min = list_10_ObjFunVal.index(min(list_10_ObjFunVal))
        vb=all_optimised_10[index_min][0]
        nss_params_best=[vb[0], vb[1], vb[2], vb[3], vb[4], vb[5]]
        nss_obj_val_best=all_optimised_10[index_min][1]
        end3=timer()
        #print(date, end3-start3)
        #print(nss_params_best, nss_obj_val_best)
        
        # Calculating YTM diffs
        start4=timer()
        bond_list=maintab['bond'].tolist()
        ytm_nss=[]
        ytm_nss_in_bid_ask=[]
        
        for bond in bond_list:
            price=PRC_NSS(bond, date, acc_int(bond, settle_day(2, date)[0]), nss_params_best)
            ytmtech=round(np.asscalar(YTM(bond, date, acc_int(bond, settle_day(2, date)[0]),price)*100),2)
            ytm_nss.append(ytmtech)
            ytm_nss_in_bid_ask.append(((ytmtech<=maintab[maintab['bond']==bond].ytm_bid.astype(float)) & (ytmtech>=maintab[maintab['bond']==bond].ytm_ask.astype(float))).bool())
            
        maintab['ytm_nss']=ytm_nss
        maintab['ytm_diff']=maintab['ytm_nss']-maintab['ytm_my'].astype(float)
        maintab['ytm_nss_in_BA']=ytm_nss_in_bid_ask
        
        ss=maintab['ytm_diff'].astype(float).abs().mean()
        ss_max=maintab['ytm_diff'].astype(float).abs().max()
        how_many_in_BA=maintab[maintab['ytm_nss_in_BA']==True].sum(axis=0).ytm_nss_in_BA
        prc_in_BA=how_many_in_BA/len(bond_list)
        
        end4=timer()
        #print(date, end4-start4)
        
        print(date, vb[0], vb[1], vb[2], vb[3], vb[4], vb[5],  nss_obj_val_best, ss, ss_max, prc_in_BA, end1-start1, end2-start2, end3-start3, end4-start4 )
        
        ytm_L=maintab['ytm_fix'][maintab.index[-1]]
        ytm_S=maintab['ytm_fix'][0]
        
        impr=1-nss_obj_val_best/ObFuVal_bestguess
        row_df=pd.DataFrame([[date, vb[0], vb[1], vb[2], vb[3], vb[4], vb[5],  nss_obj_val_best, ss, ss_max, prc_in_BA, end1-start1, end2-start2, end3-start3, end4-start4, impr, ytm_L, ytm_S]], columns=col_names)
        
        mega_df = pd.concat([mega_df,row_df])
        mega_df.to_hdf(str(filename)+str(date_nr_start)+'_'+str(date_nr_end)+'.h5', 'df')
        
        ile_zrobione=ile_zrobione+1
        print('Working progress:  performed - '+str(ile_zrobione)+' of total: '+str(ile_razem))
        
    return mega_df


#%%










































#%%
# Creating base with 0.5 - 11 years NSS fitted yields
# NSS_RATES(T, b0, b1, b2, b3, t1, t2):
# return yc_rate, fw_rate, slope, garb1, garb2

tenorNSSbase=fitNSSbase;
lista_yfrac_tenorow=np.linspace(0.5,11,22)     # 22 tenors semiannualy spaced form 0.5y to 11yrs

for i in range(len(lista_yfrac_tenorow)):
    tenorNSSbase['T'+str(lista_yfrac_tenorow[i])]=100*NSS_RATES(lista_yfrac_tenorow[i], tenorNSSbase['b0'], tenorNSSbase['b1'], tenorNSSbase['b2'], tenorNSSbase['b3'], tenorNSSbase['tau1'], tenorNSSbase['tau2'])[0]


tenorNSSbase['short_rate']=tenorNSSbase['b0']+ tenorNSSbase['b1']
tenorNSSbase['slope10_2']= tenorNSSbase['T10.0']-tenorNSSbase['T2.0']
tenorNSSbase['slope10_05']= tenorNSSbase['T10.0']-tenorNSSbase['T0.5']
tenorNSSbase['bfly10_3_05']= tenorNSSbase['T10.0']-2*tenorNSSbase['T3.0']+tenorNSSbase['T0.5']


# Graphing TS of a certain tenor NSS fitted yield
fig, ax = plt.subplots(figsize=(12,8))
plt.plot(tenorNSSbase['date'], tenorNSSbase['T0.5'])
plt.grid()
plt.title('Timeseries of a short (6m) NNS fitted yield (PL)')
plt.savefig('TS_05_rate.pdf', bbox_inches='tight')
plt.show()


# Graphing TS of a slope between two tenors of NSS fitted yields
fig, ax = plt.subplots(figsize=(12,8))
plt.plot(tenorNSSbase['date'], tenorNSSbase['T10.0']-tenorNSSbase['T2.0'])
plt.grid()
plt.title('Timeseries of a slope between 10 and 2 years of NNS fitted yields (PL)')
plt.savefig('TS_10_2_slope.pdf', bbox_inches='tight')
plt.show()


# Graphing TS of a butterfly between three tenors of NSS fitted yields
fig, ax = plt.subplots(figsize=(12,8))
plt.plot(tenorNSSbase['date'], tenorNSSbase['T10.0']-2*tenorNSSbase['T5.0']+tenorNSSbase['T0.5'])
plt.grid()
plt.title('Timeseries of a butterfly 10y - 5y - 0.5y NNS fitted yields (PL)')
plt.savefig('TS_10_5_05_BF.pdf', bbox_inches='tight')
plt.show()


#%%
lista_nazw_tenorow=['T'+str(x) for x in lista_yfrac_tenorow ]

# Graphing boxplots for fitted NSS tenors, all data
fig, ax = plt.subplots(figsize=(12,8))
plt.boxplot(np.array(tenorNSSbase.iloc[:, 18:40]), labels=lista_yfrac_tenorow)
plt.grid()
plt.title('Stats for fitted YC by tenors and years')
plt.savefig('IntQ_YC.pdf', bbox_inches='tight')
plt.show()



# Graphing boxplots for fitted NSS tenors, year by year
yrs=[2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018]
for yr in yrs:
    fig, ax = plt.subplots(figsize=(12,8))
    plt.boxplot(np.array(tenorNSSbase.loc[tenorNSSbase.date.dt.year==yr, lista_nazw_tenorow]), labels=lista_yfrac_tenorow)
    plt.grid()
    plt.title('Stats for fitted YC by tenors in '+str(yr))
    plt.savefig('IntQ_YC'+str(yr)+'.pdf', bbox_inches='tight')
    plt.show()


#%% Graph - all average yield curves all years
    
avgYC_tenorNSSbase=tenorNSSbase.groupby(tenorNSSbase.date.dt.year).mean()
fig, ax = plt.subplots(figsize=(12,10))
for yr in yrs:
    series_to_plot=np.array(avgYC_tenorNSSbase.loc[avgYC_tenorNSSbase.index==yr, lista_nazw_tenorow]).T
    plt.plot(lista_yfrac_tenorow, series_to_plot)
    plt.text(lista_yfrac_tenorow[21]+0.1, series_to_plot[21], str(yr))

plt.grid()
plt.title('Stats for fitted YC by tenors - all data')
plt.savefig('AverageYCs.pdf', bbox_inches='tight')
plt.show()







#%%

t22=tenorNSSbase.groupby(tenorNSSbase.date.dt.year).mean()
t33=tenorNSSbase.groupby(tenorNSSbase.date.dt.year).quantile(0.25)
t44=tenorNSSbase.groupby(tenorNSSbase.date.dt.year).quantile(0.5)
t55=tenorNSSbase.groupby(tenorNSSbase.date.dt.year).quantile(0.75)

# Do xls with that
writer = pd.ExcelWriter('tenorNSSbaseMeanQartiles.xlsx', engine='xlsxwriter')
t22.to_excel(writer, sheet_name='Mean')
t33.to_excel(writer, sheet_name='Q1')
t44.to_excel(writer, sheet_name='Median')
t55.to_excel(writer, sheet_name='Q3')
writer.save()


#plt.boxplot(np.array(tenorNSSbase.iloc[:, 18:40]), labels=lista_yfrac_tenorow)
# plt.boxplot(np.array(tenorNSSbase.loc[tenorNSSbase.date.dt.year==2019, lista_nazw_tenorow]), labels=lista_yfrac_tenorow)


#%%
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
PCAcompo_base_ANN=[]

fig, ax = plt.subplots(figsize=(12,8))
principalComponents = pca.fit(np.array(tenorNSSbase.iloc[:, 18:40]))
plt.plot(lista_yfrac_tenorow, principalComponents.components_[0])
plt.plot(lista_yfrac_tenorow, principalComponents.components_[1])
plt.plot(lista_yfrac_tenorow, principalComponents.components_[2])
plt.grid()
plt.suptitle('PCA all data')
plt.title('PC1: '+str(round(principalComponents.explained_variance_ratio_[0],4))+'   PC2: '+str(round(principalComponents.explained_variance_ratio_[1],4))+ '   PC3: '+str(round(principalComponents.explained_variance_ratio_[2],4)) )

plt.savefig('PCA all data.pdf', bbox_inches='tight')
plt.legend(['PC1', 'PC2', 'PC3'])
plt.show()


#    pca.components_ * np.sqrt(pca.explained_variance_)        !!!!!!!!!!!!!


PCAcompo_base_ANN.append(['all',principalComponents.components_[0], principalComponents.components_[1], principalComponents.components_[2], principalComponents.explained_variance_ratio_[0], principalComponents.explained_variance_ratio_[1], principalComponents.explained_variance_ratio_[2] ])



for yr in yrs:
    fig, ax = plt.subplots(figsize=(12,8))
    ff=np.array(tenorNSSbase.loc[tenorNSSbase.date.dt.year==yr, lista_nazw_tenorow])
    principalComponents = pca.fit(ff)
    #principalDf = pd.DataFrame(data = principalComponents.components_[0:3], columns = ['PC1', 'PC2', 'PC3'])
    plt.plot(lista_yfrac_tenorow, principalComponents.components_[0])
    plt.plot(lista_yfrac_tenorow, principalComponents.components_[1])
    plt.plot(lista_yfrac_tenorow, principalComponents.components_[2])
    
    #PCAcompo_base_ANN.append([yr,principalComponents.components_[0], principalComponents.components_[1], principalComponents.components_[2] ])
    PCAcompo_base_ANN.append([yr,principalComponents.components_[0], principalComponents.components_[1], principalComponents.components_[2], principalComponents.explained_variance_ratio_[0], principalComponents.explained_variance_ratio_[1], principalComponents.explained_variance_ratio_[2]])

    #plt.boxplot(np.array(tenorNSSbase.loc[tenorNSSbase.date.dt.year==yr, lista_nazw_tenorow]), labels=lista_yfrac_tenorow)
    plt.grid()
    plt.suptitle('PCA '+str(yr))
    plt.title('PC1: '+str(round(principalComponents.explained_variance_ratio_[0],4))+'   PC2: '+str(round(principalComponents.explained_variance_ratio_[1],4))+ '   PC3: '+str(round(principalComponents.explained_variance_ratio_[2],4)) )
    plt.savefig('PCA_'+str(yr)+'.pdf', bbox_inches='tight')
    plt.legend(['PC1', 'PC2', 'PC3'])
    plt.show()

# PCAcompo_base_ANN.to_hdf('PCAcompo_base_ANN.h5', 'df')  # doesnt work: AttributeError: 'list' object has no attribute 'to_hdf'


#%%
# Tables with principal components year-by-year and variance explained ratios

pcdf = pd.DataFrame(data=PCAcompo_base_ANN)


pc1df=pcdf.drop(columns=[2,3,4,5,6])
pc1df.columns=['Period', 'PC1']
pc1=pc1df.PC1.apply(pd.Series)
pc1.columns=lista_nazw_tenorow
pc1_=pd.merge(pc1df, pc1, left_index = True, right_index = True)
pc1_=pc1_.iloc[:, [0,3,5,7,11,15,21]]
print(pc1_)


pc2df=pcdf.drop(columns=[1,3,4,5,6])
pc2df.columns=['Period', 'PC2']
pc2=pc2df.PC2.apply(pd.Series)
pc2.columns=lista_nazw_tenorow
pc2_=pd.merge(pc2df, pc2, left_index = True, right_index = True)
pc2_=pc2_.iloc[:, [0,3,5,7,11,15,21]]
print(pc2_)


pc3df=pcdf.drop(columns=[1,2,4,5,6])
pc3df.columns=['Period', 'PC3']
pc3=pc3df.PC3.apply(pd.Series)
pc3.columns=lista_nazw_tenorow
pc3_=pd.merge(pc3df, pc3, left_index = True, right_index = True)
pc3_=pc3_.iloc[:, [0,3,5,7,11,15,21]]
print(pc3_)


pcdf_var_exp=pcdf.drop(columns=[1,2,3])
pcdf_var_exp.columns=['Period', 'PC1','PC2','PC3']
print(pcdf_var_exp)


# Do xls with that
writer = pd.ExcelWriter('PCA.xlsx', engine='xlsxwriter')
pcdf_var_exp.to_excel(writer, sheet_name='pcdf_var_exp')
pc1_.to_excel(writer, sheet_name='pc1_')
pc2_.to_excel(writer, sheet_name='pc2_')
pc3_.to_excel(writer, sheet_name='pc3_')
pc1.to_excel(writer, sheet_name='pc1')
pc2.to_excel(writer, sheet_name='pc2')
pc3.to_excel(writer, sheet_name='pc3')
writer.save()








#%% Tables with volatilities:

r1=tenorNSSbase['T1.0'].pct_change()
r2=tenorNSSbase['T2.0'].pct_change()
r3=tenorNSSbase['T3.0'].pct_change()
r4=tenorNSSbase['T4.0'].pct_change()
r5=tenorNSSbase['T5.0'].pct_change()
r6=tenorNSSbase['T6.0'].pct_change()
r7=tenorNSSbase['T7.0'].pct_change()
r8=tenorNSSbase['T8.0'].pct_change()
r9=tenorNSSbase['T9.0'].pct_change()
r10=tenorNSSbase['T10.0'].pct_change()
r11=tenorNSSbase['T11.0'].pct_change()


vol_r1_3m = r1.rolling(63).std() * np.sqrt(252) #quarterly
vol_r2_3m = r2.rolling(63).std() * np.sqrt(252) #quarterly
vol_r3_3m = r3.rolling(63).std() * np.sqrt(252) #quarterly
vol_r4_3m = r4.rolling(63).std() * np.sqrt(252) #quarterly
vol_r5_3m = r5.rolling(63).std() * np.sqrt(252) #quarterly
vol_r6_3m = r6.rolling(63).std() * np.sqrt(252) #quarterly
vol_r7_3m = r7.rolling(63).std() * np.sqrt(252) #quarterly
vol_r8_3m = r8.rolling(63).std() * np.sqrt(252) #quarterly
vol_r9_3m = r9.rolling(63).std() * np.sqrt(252) #quarterly
vol_r10_3m = r10.rolling(63).std() * np.sqrt(252) #quarterly
vol_r11_3m = r11.rolling(63).std() * np.sqrt(252) #quarterly


vollist=[vol_r1_3m, vol_r2_3m, vol_r3_3m, vol_r4_3m, vol_r5_3m, vol_r6_3m, vol_r7_3m, vol_r8_3m, vol_r9_3m, vol_r10_3m, vol_r11_3m]
vollist_names=['vol_r1_3m', 'vol_r2_3m', 'vol_r3_3m', 'vol_r4_3m', 'vol_r5_3m', 'vol_r6_3m', 'vol_r7_3m', 'vol_r8_3m', 'vol_r9_3m', 'vol_r10_3m', 'vol_r11_3m']

tenorNSSbase2=tenorNSSbase

fig, ax = plt.subplots(figsize=(15,8))
for i, j in zip(vollist, vollist_names):
    plt.plot(tenorNSSbase['date'],i)
    tenorNSSbase2[j]=i
plt.grid()
plt.title('All 3m volatilities for different tenors 1-11 yrs')
plt.legend(['1', '2', '3','4','5','6','7','8','9','10','11']) 
plt.savefig('AllVols.pdf', bbox_inches='tight')


t222=tenorNSSbase2.groupby(tenorNSSbase.date.dt.year).mean()
t332=tenorNSSbase2.groupby(tenorNSSbase.date.dt.year).std()
t442=tenorNSSbase2.groupby(tenorNSSbase.date.dt.year).quantile(0.5)


# Do xls with that
writer = pd.ExcelWriter('tenorNSSbaseVols.xlsx', engine='xlsxwriter')
t222.to_excel(writer, sheet_name='Mean')
t332.to_excel(writer, sheet_name='Std')
t442.to_excel(writer, sheet_name='Median')
writer.save()












#%%
fig, ax = plt.subplots(figsize=(12,10))
PCAcompo_base_ANN.pop(0)

for i in range(len(PCAcompo_base_ANN)):
    plt.plot(lista_yfrac_tenorow, PCAcompo_base_ANN[i][1], label=yrs[i], linewidth=i/2)
    plt.text(lista_yfrac_tenorow[21]+0.1, PCAcompo_base_ANN[i][1][21], str(yrs[i]))
    plt.grid()
    plt.suptitle('PC1 yearly')
    #plt.title('PC1: '+str(round(principalComponents.explained_variance_ratio_[0],4))+'   PC2: '+str(round(principalComponents.explained_variance_ratio_[1],4))+ '   PC3: '+str(round(principalComponents.explained_variance_ratio_[2],4)) )
    plt.savefig('PCA_PC1.pdf', bbox_inches='tight')
    #plt.legend()
plt.show()



fig, ax = plt.subplots(figsize=(12,10))

for i in range(len(PCAcompo_base_ANN)):
    plt.plot(lista_yfrac_tenorow, PCAcompo_base_ANN[i][2], label=yrs[i], linewidth=i/2)
    plt.text(lista_yfrac_tenorow[21]+0.1, PCAcompo_base_ANN[i][2][21], str(yrs[i]))
    plt.grid()
    plt.suptitle('PC2 yearly')
    #plt.title('PC1: '+str(round(principalComponents.explained_variance_ratio_[0],4))+'   PC2: '+str(round(principalComponents.explained_variance_ratio_[1],4))+ '   PC3: '+str(round(principalComponents.explained_variance_ratio_[2],4)) )
    plt.savefig('PCA_PC2.pdf', bbox_inches='tight')
    #plt.legend()
plt.show()




fig, ax = plt.subplots(figsize=(12,10))

for i in range(len(PCAcompo_base_ANN)):
    plt.plot(lista_yfrac_tenorow, PCAcompo_base_ANN[i][3], label=yrs[i], linewidth=i/2)
    plt.text(lista_yfrac_tenorow[21]+0.1, PCAcompo_base_ANN[i][3][21], str(yrs[i]))
    plt.grid()
    plt.suptitle('PC1 yearly')
    #plt.title('PC1: '+str(round(principalComponents.explained_variance_ratio_[0],4))+'   PC2: '+str(round(principalComponents.explained_variance_ratio_[1],4))+ '   PC3: '+str(round(principalComponents.explained_variance_ratio_[2],4)) )
    plt.savefig('PCA_PC3_years.pdf', bbox_inches='tight')
    #plt.legend()
plt.show()








#%% How to calculate volatilities in BS style

#Loading Reuters data on swaptions 2014-2018(Jan)
path="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN"
os.chdir(path)

daneRTS=pd.read_table('PLNswaptionsRTS.csv',sep=';')
ffdate=pd.to_datetime(daneRTS['date'],  format='%d/%m/%Y')
daneRTS['date']=ffdate
daneRTS=daneRTS.iloc[::-1]

#Loading VIX data
Vixbase= pd.read_csv('vixcurrentcopy.csv', parse_dates=['Date'], dayfirst=False)
Vixbase.columns=['date', 'vix_open', 'vix_high', 'vix_low', 'vix_close']
#Vixbase.index=Vixbase['date']
Vixbase.drop(columns=[ 'vix_open', 'vix_high', 'vix_low'], inplace=True)
Vixbase.reset_index();
Vixbase=Vixbase[Vixbase['date']<='2019-02-28']
Vixbase.dropna(inplace=True)
# Mega base with volatilities and market vol data (bond yield volatilities and VIX)
tenorNSSbase3=pd.merge(tenorNSSbase2,  daneRTS  , left_on = 'date', right_on = 'date', how='left')
tenorNSSbase3=pd.merge(tenorNSSbase3,  Vixbase  , left_on = 'date', right_on = 'date', how='inner')




#%% Calculated vols of different tenors compared with mkt data from Reuters on Swaptions
fig, ax = plt.subplots(figsize=(15,8))
rr=tenorNSSbase['T10.0'].pct_change()
#pp=np.ma.average(tenorNSSbase['T10.0'], axis=1, weights=[0.1, 0.2, 0.2, 0.5])

#plt.plot(tenorNSSbase['date'],rr)
ss1m=rr.rolling(21).std() * np.sqrt(252) #monthly
ss1q=rr.rolling(126).std() * np.sqrt(252) #quarterly
plt.plot(tenorNSSbase['date'],ss1m)
plt.plot(tenorNSSbase['date'],ss1q)
plt.plot(daneRTS['date'],daneRTS.PL6MX10YA/100)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3.vix_close/100)
plt.grid()
plt.title('10Y yield vol annualised ')
plt.legend(['1M window','6M window', 'Swptn 6Mx10Y', 'VIX']) # '1M window', 
plt.savefig('Vol10Y.pdf', bbox_inches='tight')











#%% NSS parameters - TimeSeries plots


fig, ax = plt.subplots(3,2,figsize=(12,16))
plt.subplot(321)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['b0'])
plt.title('Beta 0')
plt.grid(True)

plt.subplot(322)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['b1'])
plt.title('Beta 1')
plt.grid(True)

plt.subplot(323)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['b2'])
plt.title('Beta 2')
plt.grid(True)

plt.subplot(324)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['b3'])
plt.title('Beta 3')
plt.grid(True)

plt.subplot(325)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['tau1'])
plt.title('Tau 1')
plt.grid(True)

plt.subplot(326)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['tau1'])
plt.title('Tau 2')
plt.grid(True)

plt.savefig('NSSparamsTS.pdf', bbox_inches='tight')





#%% Optimisation performance and quality - TimeSeries plots


fig, ax = plt.subplots(3,2,figsize=(12,16))
plt.subplot(321)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['ObFuVal'])
plt.title('Objective function value')
plt.grid(True)

plt.subplot(322)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['diff_avg'])
plt.title('Avg diff (bps)')
plt.grid(True)

plt.subplot(323)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['diff_max'])
plt.title('Max diff (bps)')
plt.grid(True)

plt.subplot(324)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['prc_in_BA'])
plt.title('Share within BA sprd')
plt.grid(True)

plt.subplot(325)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['time2'])
plt.title('Calc time2')
plt.grid(True)

plt.subplot(326)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['time3'])
plt.title('Calc time3')
plt.grid(True)

plt.savefig('OptiQualTS.pdf', bbox_inches='tight')







#%% Level, slope, curvature  - TimeSeries plots


fig, ax = plt.subplots(3,1,figsize=(12,16))
plt.subplot(311)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['T10.0'])
plt.title('Level (10y)')
plt.grid(True)

plt.subplot(312)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['slope10_05'])
plt.title('Slope (10y-0.5y)')
plt.grid(True)

plt.subplot(313)
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['bfly10_3_05'])
plt.title('Curvature (10y-2x3y+0.5y)')
plt.grid(True)


plt.savefig('LvlSlopeCurTS.pdf', bbox_inches='tight')








#%%
#matplotlib.get_backend()

list_additional=['short_rate','slope10_2','slope10_05','bfly10_3_05', 'vix_close']
list_param_names=['b0','b1','b2','b3','tau1','tau2']
lista_nazw_tenorow_wybranych=['T0.5', 'T1.0', 'T2.0', 'T5.0', 'T7.0', 'T10.0']
vvix=['vix_close']

tenorNSSbase3CutOut=tenorNSSbase3[lista_nazw_tenorow_wybranych +  list_additional].copy()
corr = tenorNSSbase3CutOut.corr()
mask = np.zeros_like(corr, dtype=np.bool) # Generate a mask for the upper triangle
mask[np.triu_indices_from(mask)] = True
f, ax = plt.subplots(figsize=(11, 9))
sns.heatmap(corr, mask=mask, cmap="YlGnBu", annot=True)
plt.savefig('HMap_tenors_add.pdf', bbox_inches='tight')


tenorNSSbase3CutOut=tenorNSSbase3[vollist_names].copy()
corr = tenorNSSbase3CutOut.corr()
mask = np.zeros_like(corr, dtype=np.bool) # Generate a mask for the upper triangle
mask[np.triu_indices_from(mask)] = True
f, ax = plt.subplots(figsize=(11, 9))
sns.heatmap(corr, mask=mask, cmap="YlGnBu", annot=True)
plt.savefig('HMap_vols.pdf', bbox_inches='tight')


tenorNSSbase3CutOut=tenorNSSbase3[list_param_names].copy()
corr = tenorNSSbase3CutOut.corr()
mask = np.zeros_like(corr, dtype=np.bool) # Generate a mask for the upper triangle
mask[np.triu_indices_from(mask)] = True
f, ax = plt.subplots(figsize=(11, 9))
sns.heatmap(corr, mask=mask, cmap="YlGnBu", annot=True)
plt.savefig('HMap_params.pdf', bbox_inches='tight')

f, ax = plt.subplots(figsize=(11, 9))
plt.plot(tenorNSSbase3['date'],tenorNSSbase3['vol_r10_3m'].rolling(252).corr(tenorNSSbase3['vix_close']))
plt.grid()






#%% CREATING BASE FOR EXPECTATIONS HYPOTHESIS TESTING 

def fwd(short_rate, long_rate, short_yr, long_yr ):
    fwd=((1+long_rate/100)**(long_yr)/((1+short_rate/100)**(short_yr)))**(1/(long_yr-short_yr))-1
    return fwd*100



EH_base=tenorNSSbase3.set_index('date').copy()

# SHIFTED yields - this are rates from the FUTURE i.e T3.0_shifted_2Y means 3y yield observed 2 years from the index date
lista_tenorow_years=['T1.0','T2.0','T3.0','T4.0','T5.0','T6.0','T7.0','T8.0','T9.0','T10.0', 'T11.0']
lista_tenorow_years_shifted_1Y=[x+'_shifted_1Y' for x in lista_tenorow_years[:10] ]

tol = pd.Timedelta('3 day')
EH_data=EH_base[lista_tenorow_years]

for i in range(1):
    techDF=pd.DataFrame(EH_base.shift(-365*(i+1), freq='D')[lista_tenorow_years[:10]])
    techDF.columns=lista_tenorow_years_shifted_1Y
    EH_data = pd.merge_asof(left=EH_data,right=techDF,right_index=True,left_index=True,direction='nearest',tolerance=tol)

EH_data=EH_data[:'2018-03-01']

lista_tenorow_years_fwd_start_in_1Y=['FWD_(' +x+ ')_T1.0_'+y for (x,y) in zip(lista_tenorow_years[0:10], lista_tenorow_years[1:11] )]

k=2
for i,j in zip(lista_tenorow_years_fwd_start_in_1Y, lista_tenorow_years[1:11] ):
    EH_data[i]=fwd(EH_data['T1.0'], EH_data[j],  1,  k)
    k=k+1
    
lista_tenorow_spread_to_1Y=['SPR_' +x+ '_to_T1.0' for x in lista_tenorow_years[0:10]]


for i,j in zip(lista_tenorow_spread_to_1Y, lista_tenorow_years[0:10] ):
    EH_data[i]=EH_data[j]-EH_data['T1.0']







#%%

fig, ax = plt.subplots(figsize=(12,8))
plt.plot(EH_data['FWD_(T3.0)_T1.0_T4.0']); 
plt.plot(EH_data['T3.0_shifted_1Y'], color='red')
#plt.plot(EH_data['T1.0'], color='green')
plt.grid()
#plt.title('5Y yield vol annualised ')
plt.legend([ 'FWD_(T3.0)_T1.0_T4.0', 'T3.0_shifted_1Y'])
plt.savefig('timeseriesFutFwd.pdf', bbox_inches='tight')


fig, ax = plt.subplots(figsize=(10,10))
plt.scatter(EH_data['FWD_(T3.0)_T1.0_T4.0'], EH_data['T3.0_shifted_1Y'], marker='+');
ax.set_xlabel('Forward yield (expected future yield)')
ax.set_ylabel('Real future yield') 
plt.grid()
#plt.title('5Y yield vol annualised ')
#plt.legend([ 'FWD_(T3.0)_T1.0_T4.0', 'T3.0_shifted_1Y'])
plt.savefig('scatterFutFwd.pdf', bbox_inches='tight')



#%%

EH_data=EH_data[lista_tenorow_years_shifted_1Y+lista_tenorow_years_fwd_start_in_1Y+lista_tenorow_spread_to_1Y]

# Preparing PANEL
lista_maturx3=np.tile(range(1,11),3)
lista_variables=np.repeat(['FutYield','CurrFwdYield','TenorSpread'],10)
EH_data=EH_data.T.set_index(lista_maturx3, append=False).T
EH_data=EH_data.T.set_index(lista_variables, append=True).T

EH_data2=EH_data.stack(0)

EH_data2.to_stata('EH_data.dta')





#%% CREATING BASE 2 FOR EXPECTATIONS HYPOTHESIS TESTING - FamaBliss type 1


EH_base=tenorNSSbase3.set_index('date').copy()

# SHIFTED yields - this are rates from the FUTURE i.e T3.0_shifted_2Y means 3y yield observed 2 years from the index date
lista_tenorow_years=['T1.0','T2.0','T3.0','T4.0','T5.0','T6.0','T7.0','T8.0','T9.0','T10.0', 'T11.0']
lista_tenorow_years_shifted_1Y=[x+'_shifted_1Y' for x in lista_tenorow_years[:10] ]

tol = pd.Timedelta('3 day')
EH_data=EH_base[lista_tenorow_years]

lista_tenorow_years_fwd_start_in_1Y=['FWD_(' +x+ ')_T1.0_'+y for (x,y) in zip(lista_tenorow_years[0:10], lista_tenorow_years[1:11] )]
lista_tenorow_spread_to_1Y=['SPR_' +x+ '_to_T1.0' for x in lista_tenorow_years[0:10]]


for i in range(1):
    techDF=pd.DataFrame(EH_base.shift(-365*(i+1), freq='D')[lista_tenorow_years[:10]])
    techDF.columns=lista_tenorow_years_shifted_1Y
    EH_data = pd.merge_asof(left=EH_data,right=techDF,right_index=True,left_index=True,direction='nearest',tolerance=tol)

EH_data=EH_data[:'2018-03-01']


k=2 # Remade to allow for spread instead of level values
for i,j,z in zip(lista_tenorow_years_fwd_start_in_1Y, lista_tenorow_years[1:11], lista_tenorow_years[0:10] ):
    EH_data[i]=fwd(EH_data['T1.0'], EH_data[j],  1,  k)-EH_data[z]
    k=k+1
    

for i,j in zip(lista_tenorow_spread_to_1Y, lista_tenorow_years[0:10] ):
    EH_data[i]=EH_data[j]-EH_data['T1.0']


# Made to allow for spread instead of level values (spread between future yield realised in the market and current yield)
for i,j in zip(lista_tenorow_years_shifted_1Y, lista_tenorow_years[0:10] ):
    EH_data[i]=EH_data[i]-EH_data[j]


EH_data=EH_data[lista_tenorow_years_shifted_1Y+lista_tenorow_years_fwd_start_in_1Y+lista_tenorow_spread_to_1Y]

# Preparing PANEL
lista_maturx3=np.tile(range(1,11),3)
lista_variables=np.repeat(['FutYieldSpr','CurrFwdYieldSpr','TenorSpread'],10)
EH_data=EH_data.T.set_index(lista_maturx3, append=False).T
EH_data=EH_data.T.set_index(lista_variables, append=True).T

EH_data3=EH_data.stack(0)

EH_data3.to_stata('EH_data2.dta')










#%% FAMA BLISS

# Regression 1:





tenorNSSbase4=tenorNSSbase3.set_index('date').copy()

# SHIFTED yields - this are rates from the FUTURE i.e T3.0_shifted_2Y means 3y yield observed 2 years from the index date
lista_tenorow_years=['T1.0','T2.0','T3.0','T4.0','T5.0','T6.0','T7.0','T8.0','T9.0','T10.0']
lista_tenorow_years_shifted_1Y=[x+'_shifted_1Y' for x in lista_tenorow_years ]
lista_tenorow_years_shifted_2Y=[x+'_shifted_2Y' for x in lista_tenorow_years ]
lista_tenorow_years_shifted_3Y=[x+'_shifted_3Y' for x in lista_tenorow_years ]
lista_tenorow_years_shifted_4Y=[x+'_shifted_4Y' for x in lista_tenorow_years ]
lista_tenorow_years_shifted_5Y=[x+'_shifted_5Y' for x in lista_tenorow_years ]
lista_list=[lista_tenorow_years_shifted_1Y, lista_tenorow_years_shifted_2Y, lista_tenorow_years_shifted_3Y, lista_tenorow_years_shifted_4Y, lista_tenorow_years_shifted_5Y]

tol = pd.Timedelta('3 day')
famaBlissBase=tenorNSSbase4;

for i in range(5):
    techDF=pd.DataFrame(tenorNSSbase4.shift(-365*(i+1), freq='D')[lista_tenorow_years])
    techDF.columns=lista_list[i]
    famaBlissBase = pd.merge_asof(left=famaBlissBase,right=techDF,right_index=True,left_index=True,direction='nearest',tolerance=tol)



# Realised spread between 1Y yields observed at index date and in 1,2,3,4,5Y time from index date
famaBlissBase['RS_1Y_1Y']=famaBlissBase['T1.0_shifted_1Y']-famaBlissBase['T1.0']
famaBlissBase['RS_1Y_2Y']=famaBlissBase['T1.0_shifted_2Y']-famaBlissBase['T1.0']
famaBlissBase['RS_1Y_3Y']=famaBlissBase['T1.0_shifted_3Y']-famaBlissBase['T1.0']
famaBlissBase['RS_1Y_4Y']=famaBlissBase['T1.0_shifted_4Y']-famaBlissBase['T1.0']
famaBlissBase['RS_1Y_5Y']=famaBlissBase['T1.0_shifted_5Y']-famaBlissBase['T1.0']

# Expected spreads between 1Y yields observed at index date and in 1,2,3,4,5Y time from index date
#  i.e. Forward - Spot
famaBlissBase['ES_1Y_1Y']=fwd(famaBlissBase['T1.0'], famaBlissBase['T2.0'],  1,  2) - famaBlissBase['T1.0']
famaBlissBase['ES_1Y_2Y']=fwd(famaBlissBase['T2.0'], famaBlissBase['T3.0'],  2,  3) - famaBlissBase['T1.0']
famaBlissBase['ES_1Y_3Y']=fwd(famaBlissBase['T3.0'], famaBlissBase['T4.0'],  3,  4) - famaBlissBase['T1.0']
famaBlissBase['ES_1Y_4Y']=fwd(famaBlissBase['T4.0'], famaBlissBase['T5.0'],  4,  5) - famaBlissBase['T1.0']
famaBlissBase['ES_1Y_5Y']=fwd(famaBlissBase['T5.0'], famaBlissBase['T6.0'],  5,  6) - famaBlissBase['T1.0']

#fig, ax = plt.subplots(figsize=(10,8))
#plt.plot(famaBlissBase.index, famaBlissBase['RS_1Y_1Y'], label='real')
#plt.plot(famaBlissBase.index, famaBlissBase['ES_1Y_1Y'], label='expe')
#plt.legend()
#plt.grid()
#plt.title('Exp_vs_Real_1Y_1Y_all_yrs')
#plt.savefig('Exp_vs_Real_1Y_1Y_all_yrs.pdf', bbox_inches='tight')



# Realised spread between 2Y yields observed at index date and in 1,2,3,4,5Y time from index date
famaBlissBase['RS_2Y_1Y']=famaBlissBase['T2.0_shifted_1Y']-famaBlissBase['T2.0']
famaBlissBase['RS_2Y_2Y']=famaBlissBase['T2.0_shifted_2Y']-famaBlissBase['T2.0']
famaBlissBase['RS_2Y_3Y']=famaBlissBase['T2.0_shifted_3Y']-famaBlissBase['T2.0']
famaBlissBase['RS_2Y_4Y']=famaBlissBase['T2.0_shifted_4Y']-famaBlissBase['T2.0']
famaBlissBase['RS_2Y_5Y']=famaBlissBase['T2.0_shifted_5Y']-famaBlissBase['T2.0']

# Expected spreads between 2Y yields observed at index date and in 1,2,3,4,5Y time from index date
#  i.e. Forward - Spot
famaBlissBase['ES_2Y_1Y']=fwd(famaBlissBase['T1.0'], famaBlissBase['T3.0'],  1,  3) - famaBlissBase['T2.0']
famaBlissBase['ES_2Y_2Y']=fwd(famaBlissBase['T2.0'], famaBlissBase['T4.0'],  2,  4) - famaBlissBase['T2.0']
famaBlissBase['ES_2Y_3Y']=fwd(famaBlissBase['T3.0'], famaBlissBase['T5.0'],  3,  5) - famaBlissBase['T2.0']
famaBlissBase['ES_2Y_4Y']=fwd(famaBlissBase['T4.0'], famaBlissBase['T6.0'],  4,  6) - famaBlissBase['T2.0']
famaBlissBase['ES_2Y_5Y']=fwd(famaBlissBase['T5.0'], famaBlissBase['T7.0'],  5,  7) - famaBlissBase['T2.0']

#fig, ax = plt.subplots(figsize=(10,8))
#plt.plot(famaBlissBase.index, famaBlissBase['RS_2Y_1Y'], label='real')
#plt.plot(famaBlissBase.index, famaBlissBase['ES_2Y_1Y'], label='expe')
#plt.legend()
#plt.grid()
#plt.title('Exp_vs_Real_2Y_1Y_all_yrs')
#plt.savefig('Exp_vs_Real_2Y_1Y_all_yrs.pdf', bbox_inches='tight')
#
#fig, ax = plt.subplots(figsize=(10,8))
#plt.plot(famaBlissBase.index, famaBlissBase['RS_2Y_2Y'], label='real')
#plt.plot(famaBlissBase.index, famaBlissBase['ES_2Y_2Y'], label='expe')
#plt.legend()
#plt.grid()
#plt.title('Exp_vs_Real_2Y_2Y_all_yrs')
#plt.savefig('Exp_vs_Real_2Y_2Y_all_yrs.pdf', bbox_inches='tight')
#
#fig, ax = plt.subplots(figsize=(10,8))
#plt.plot(famaBlissBase.index, famaBlissBase['RS_2Y_3Y'], label='real')
#plt.plot(famaBlissBase.index, famaBlissBase['ES_2Y_3Y'], label='expe')
#plt.legend()
#plt.grid()
#plt.title('Exp_vs_Real_2Y_3Y_all_yrs')
#plt.savefig('Exp_vs_Real_2Y_3Y_all_yrs.pdf', bbox_inches='tight')





# Realised spread between 3Y yields observed at index date and in 1,2,3,4,5Y time from index date
famaBlissBase['RS_3Y_1Y']=famaBlissBase['T3.0_shifted_1Y']-famaBlissBase['T3.0']
famaBlissBase['RS_3Y_2Y']=famaBlissBase['T3.0_shifted_2Y']-famaBlissBase['T3.0']
famaBlissBase['RS_3Y_3Y']=famaBlissBase['T3.0_shifted_3Y']-famaBlissBase['T3.0']
famaBlissBase['RS_3Y_4Y']=famaBlissBase['T3.0_shifted_4Y']-famaBlissBase['T3.0']
famaBlissBase['RS_3Y_5Y']=famaBlissBase['T3.0_shifted_5Y']-famaBlissBase['T3.0']

# Expected spreads between 3Y yields observed at index date and in 1,2,3,4,5Y time from index date
#  i.e. Forward - Spot
famaBlissBase['ES_3Y_1Y']=fwd(famaBlissBase['T1.0'], famaBlissBase['T4.0'],  1,  4) - famaBlissBase['T3.0']
famaBlissBase['ES_3Y_2Y']=fwd(famaBlissBase['T2.0'], famaBlissBase['T5.0'],  2,  5) - famaBlissBase['T3.0']
famaBlissBase['ES_3Y_3Y']=fwd(famaBlissBase['T3.0'], famaBlissBase['T6.0'],  3,  6) - famaBlissBase['T3.0']
famaBlissBase['ES_3Y_4Y']=fwd(famaBlissBase['T4.0'], famaBlissBase['T7.0'],  4,  7) - famaBlissBase['T3.0']
famaBlissBase['ES_3Y_5Y']=fwd(famaBlissBase['T5.0'], famaBlissBase['T8.0'],  5,  8) - famaBlissBase['T3.0']


# Realised spread between 4Y yields observed at index date and in 1,2,3,4,5Y time from index date
famaBlissBase['RS_4Y_1Y']=famaBlissBase['T4.0_shifted_1Y']-famaBlissBase['T4.0']
famaBlissBase['RS_4Y_2Y']=famaBlissBase['T4.0_shifted_2Y']-famaBlissBase['T4.0']
famaBlissBase['RS_4Y_3Y']=famaBlissBase['T4.0_shifted_3Y']-famaBlissBase['T4.0']
famaBlissBase['RS_4Y_4Y']=famaBlissBase['T4.0_shifted_4Y']-famaBlissBase['T4.0']
famaBlissBase['RS_4Y_5Y']=famaBlissBase['T4.0_shifted_5Y']-famaBlissBase['T4.0']

# Expected spreads between 4Y yields observed at index date and in 1,2,3,4,5Y time from index date
#  i.e. Forward - Spot
famaBlissBase['ES_4Y_1Y']=fwd(famaBlissBase['T1.0'], famaBlissBase['T5.0'],  1,  5) - famaBlissBase['T4.0']
famaBlissBase['ES_4Y_2Y']=fwd(famaBlissBase['T2.0'], famaBlissBase['T6.0'],  2,  6) - famaBlissBase['T4.0']
famaBlissBase['ES_4Y_3Y']=fwd(famaBlissBase['T3.0'], famaBlissBase['T7.0'],  3,  7) - famaBlissBase['T4.0']
famaBlissBase['ES_4Y_4Y']=fwd(famaBlissBase['T4.0'], famaBlissBase['T8.0'],  4,  8) - famaBlissBase['T4.0']
famaBlissBase['ES_4Y_5Y']=fwd(famaBlissBase['T5.0'], famaBlissBase['T9.0'],  5,  9) - famaBlissBase['T4.0']


# Realised spread between 5Y yields observed at index date and in 1,2,3,4,5Y time from index date
famaBlissBase['RS_5Y_1Y']=famaBlissBase['T5.0_shifted_1Y']-famaBlissBase['T5.0']
famaBlissBase['RS_5Y_2Y']=famaBlissBase['T5.0_shifted_2Y']-famaBlissBase['T5.0']
famaBlissBase['RS_5Y_3Y']=famaBlissBase['T5.0_shifted_3Y']-famaBlissBase['T5.0']
famaBlissBase['RS_5Y_4Y']=famaBlissBase['T5.0_shifted_4Y']-famaBlissBase['T5.0']
famaBlissBase['RS_5Y_5Y']=famaBlissBase['T5.0_shifted_5Y']-famaBlissBase['T5.0']

# Expected spreads between 5Y yields observed at index date and in 1,2,3,4,5Y time from index date
#  i.e. Forward - Spot
famaBlissBase['ES_5Y_1Y']=fwd(famaBlissBase['T1.0'], famaBlissBase['T6.0'],  1,  6) - famaBlissBase['T5.0']
famaBlissBase['ES_5Y_2Y']=fwd(famaBlissBase['T2.0'], famaBlissBase['T7.0'],  2,  7) - famaBlissBase['T5.0']
famaBlissBase['ES_5Y_3Y']=fwd(famaBlissBase['T3.0'], famaBlissBase['T8.0'],  3,  8) - famaBlissBase['T5.0']
famaBlissBase['ES_5Y_4Y']=fwd(famaBlissBase['T4.0'], famaBlissBase['T9.0'],  4,  9) - famaBlissBase['T5.0']
famaBlissBase['ES_5Y_5Y']=fwd(famaBlissBase['T5.0'], famaBlissBase['T10.0'],  5,  10) - famaBlissBase['T5.0']





#%%
# http://www.statsmodels.org/dev/stats.html
import statsmodels.api as sm # import statsmodels 
import statsmodels.stats as st
from statsmodels.tsa.stattools import adfuller
from pandas.plotting import autocorrelation_plot

def signif(coef, se):
    zscore=abs(coef/se)
    if zscore>2.58:
        stars='***'
    elif zscore>1.96:
        stars='**'
    elif zscore>1.65:
        stars='*'   
    else:
        stars='-'
    return stars


# Reproducing first part of Table3 in FAMA.BLISS.1987 for Polish Gov Bonds

listX=['ES_1Y_1Y', 'ES_1Y_2Y', 'ES_1Y_3Y', 'ES_1Y_4Y', 'ES_1Y_5Y', 'ES_2Y_1Y', 'ES_2Y_2Y', 'ES_2Y_3Y', 'ES_2Y_4Y', 'ES_2Y_5Y', 'ES_3Y_1Y', 'ES_3Y_2Y', 'ES_3Y_3Y', 'ES_3Y_4Y', 'ES_3Y_5Y', 'ES_4Y_1Y', 'ES_4Y_2Y', 'ES_4Y_3Y', 'ES_4Y_4Y', 'ES_4Y_5Y', 'ES_5Y_1Y', 'ES_5Y_2Y', 'ES_5Y_3Y', 'ES_5Y_4Y', 'ES_5Y_5Y']
listY=['RS_1Y_1Y', 'RS_1Y_2Y', 'RS_1Y_3Y', 'RS_1Y_4Y', 'RS_1Y_5Y', 'RS_2Y_1Y', 'RS_2Y_2Y', 'RS_2Y_3Y', 'RS_2Y_4Y', 'RS_2Y_5Y', 'RS_3Y_1Y', 'RS_3Y_2Y', 'RS_3Y_3Y', 'RS_3Y_4Y', 'RS_3Y_5Y', 'RS_4Y_1Y', 'RS_4Y_2Y', 'RS_4Y_3Y', 'RS_4Y_4Y', 'RS_4Y_5Y', 'RS_5Y_1Y', 'RS_5Y_2Y', 'RS_5Y_3Y', 'RS_5Y_4Y', 'RS_5Y_5Y']
listEnd=['2018-03-02', '2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01', '2018-03-02', '2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01', '2018-03-02', '2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01', '2018-03-02', '2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01', '2018-03-02', '2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01']

table3=[]

for i,j, t in zip(listX, listY, listEnd):
    X = famaBlissBase[i][:t] ## X input variables (or independent variables)
    y = famaBlissBase[j][:t] ## Y output/dependent variable
    X = sm.add_constant(X) ## let's add an intercept (beta_0) to our model
    model = sm.OLS(y, X).fit() ## sm.OLS(output, input)     #predictions = model.predict(X)

    ##   Sandwich Robust Covariances -> new S.E.
    # getting new StdErrors of the parameters that are robust to heteroscedasticity and autocorrelation in the errors
    se_adj=np.sqrt(np.diag(st.sandwich_covariance.cov_hac(model)))    # or st.sandwich_covariance.cov_white_simple(model)
    #model.resid.plot.kde(bw_method=0.3)
    #famaBlissBase[j][:t].plot.kde(); famaBlissBase[i][:t].plot.kde()

    # p-value of Augmented Dickey Fuler test for unit root
    #print(adfuller(famaBlissBase[i][:t])[1])
    #print(adfuller(famaBlissBase[j][:t])[1])
    
    #fig, ax = plt.subplots(figsize=(10,8))
    #autocorrelation_plot(model.resid)
    
    table3.append([j, i, t, round(model.params[0],4), round(se_adj[0],4), signif(model.params[0], se_adj[0]), round(model.params[1],4), round(se_adj[1],4), signif(model.params[1], se_adj[1]),round(model.rsquared,4), round(model.resid.autocorr(1*252),2), round(model.resid.autocorr(2*252),2), round(model.resid.autocorr(3*252),2), round(model.resid.autocorr(4*252),2), round(model.resid.autocorr(5*252),2)   ])

tab3=pd.DataFrame(data=table3, columns=['Y', 'X', 'TS end','a1', 'se_adj(a1)','a_snf', 'b1', 'se_adj(b1)','b_snf' ,'r^2', 'resid_auto_1Y', 'resid_auto_2Y', 'resid_auto_3Y', 'resid_auto_4Y', 'resid_auto_5Y'])







#%% Regression 2 FAMA BLISS




# Realised 1Y horizon returns on bonds from 1 to 10Y minus 1Y yields 
famaBlissBase['HR_1Y_1Y']=fwd(famaBlissBase['T1.0_shifted_1Y'], famaBlissBase['T2.0'],  1,  2) - famaBlissBase['T1.0']
famaBlissBase['HR_1Y_2Y']=fwd(famaBlissBase['T2.0_shifted_1Y'], famaBlissBase['T3.0'],  2,  3) - famaBlissBase['T1.0']
famaBlissBase['HR_1Y_3Y']=fwd(famaBlissBase['T3.0_shifted_1Y'], famaBlissBase['T4.0'],  3,  4) - famaBlissBase['T1.0']
famaBlissBase['HR_1Y_4Y']=fwd(famaBlissBase['T4.0_shifted_1Y'], famaBlissBase['T5.0'],  4,  5) - famaBlissBase['T1.0']
famaBlissBase['HR_1Y_5Y']=fwd(famaBlissBase['T5.0_shifted_1Y'], famaBlissBase['T6.0'],  5,  6) - famaBlissBase['T1.0']
famaBlissBase['HR_1Y_6Y']=fwd(famaBlissBase['T6.0_shifted_1Y'], famaBlissBase['T7.0'],  6,  7) - famaBlissBase['T1.0']
famaBlissBase['HR_1Y_7Y']=fwd(famaBlissBase['T7.0_shifted_1Y'], famaBlissBase['T8.0'],  7,  8) - famaBlissBase['T1.0']
famaBlissBase['HR_1Y_8Y']=fwd(famaBlissBase['T8.0_shifted_1Y'], famaBlissBase['T9.0'],  8,  9) - famaBlissBase['T1.0']
famaBlissBase['HR_1Y_9Y']=fwd(famaBlissBase['T9.0_shifted_1Y'], famaBlissBase['T10.0'], 9,  10) - famaBlissBase['T1.0']
famaBlissBase['HR_1Y_10Y']=fwd(famaBlissBase['T10.0_shifted_1Y'], famaBlissBase['T11.0'], 10,  11) - famaBlissBase['T1.0']
# Expected spreads of 1Yr bonds i.e. Forward - Spot
famaBlissBase['ES_1Y_6Y']=fwd(famaBlissBase['T6.0'], famaBlissBase['T7.0'],  6,  7) - famaBlissBase['T1.0']
famaBlissBase['ES_1Y_7Y']=fwd(famaBlissBase['T7.0'], famaBlissBase['T8.0'],  7,  8) - famaBlissBase['T1.0']
famaBlissBase['ES_1Y_8Y']=fwd(famaBlissBase['T8.0'], famaBlissBase['T9.0'],  8,  9) - famaBlissBase['T1.0']
famaBlissBase['ES_1Y_9Y']=fwd(famaBlissBase['T9.0'], famaBlissBase['T10.0'],  9,  10) - famaBlissBase['T1.0']
famaBlissBase['ES_1Y_10Y']=fwd(famaBlissBase['T10.0'], famaBlissBase['T11.0'], 10,  11) - famaBlissBase['T1.0']


# Realised 2Y horizon returns on bonds from 1 to 10Y minus 2Y yields 
fbb_spot=famaBlissBase['T2.0']
famaBlissBase['HR_2Y_1Y']=fwd(famaBlissBase['T1.0_shifted_2Y'], famaBlissBase['T3.0'],  1,  3) - fbb_spot
famaBlissBase['HR_2Y_2Y']=fwd(famaBlissBase['T2.0_shifted_2Y'], famaBlissBase['T4.0'],  2,  4) - fbb_spot
famaBlissBase['HR_2Y_3Y']=fwd(famaBlissBase['T3.0_shifted_2Y'], famaBlissBase['T5.0'],  3,  5) - fbb_spot
famaBlissBase['HR_2Y_4Y']=fwd(famaBlissBase['T4.0_shifted_2Y'], famaBlissBase['T6.0'],  4,  6) - fbb_spot
famaBlissBase['HR_2Y_5Y']=fwd(famaBlissBase['T5.0_shifted_2Y'], famaBlissBase['T7.0'],  5,  7) - fbb_spot
famaBlissBase['HR_2Y_6Y']=fwd(famaBlissBase['T6.0_shifted_2Y'], famaBlissBase['T8.0'],  6,  8) - fbb_spot
famaBlissBase['HR_2Y_7Y']=fwd(famaBlissBase['T7.0_shifted_2Y'], famaBlissBase['T9.0'],  7,  9) - fbb_spot
famaBlissBase['HR_2Y_8Y']=fwd(famaBlissBase['T8.0_shifted_2Y'], famaBlissBase['T10.0'], 8,  10) - fbb_spot
famaBlissBase['HR_2Y_9Y']=fwd(famaBlissBase['T9.0_shifted_2Y'], famaBlissBase['T11.0'], 9,  11) - fbb_spot
# Expected spreads of 2Yr bonds i.e. Forward - Spot
famaBlissBase['ES_2Y_6Y']=fwd(famaBlissBase['T6.0'], famaBlissBase['T8.0'],  6,  8) - fbb_spot
famaBlissBase['ES_2Y_7Y']=fwd(famaBlissBase['T7.0'], famaBlissBase['T9.0'],  7,  9) - fbb_spot
famaBlissBase['ES_2Y_8Y']=fwd(famaBlissBase['T8.0'], famaBlissBase['T10.0'],  8,  10) - fbb_spot
famaBlissBase['ES_2Y_9Y']=fwd(famaBlissBase['T9.0'], famaBlissBase['T11.0'],  9,  11) - fbb_spot


# Realised 3Y horizon returns on bonds from 1 to 10Y minus 3Y yields 
fbb_spot=famaBlissBase['T3.0']
famaBlissBase['HR_3Y_1Y']=fwd(famaBlissBase['T1.0_shifted_3Y'], famaBlissBase['T4.0'],  1,  4) - fbb_spot
famaBlissBase['HR_3Y_2Y']=fwd(famaBlissBase['T2.0_shifted_3Y'], famaBlissBase['T5.0'],  2,  5) - fbb_spot
famaBlissBase['HR_3Y_3Y']=fwd(famaBlissBase['T3.0_shifted_3Y'], famaBlissBase['T6.0'],  3,  6) - fbb_spot
famaBlissBase['HR_3Y_4Y']=fwd(famaBlissBase['T4.0_shifted_3Y'], famaBlissBase['T7.0'],  4,  7) - fbb_spot
famaBlissBase['HR_3Y_5Y']=fwd(famaBlissBase['T5.0_shifted_3Y'], famaBlissBase['T8.0'],  5,  8) - fbb_spot
famaBlissBase['HR_3Y_6Y']=fwd(famaBlissBase['T6.0_shifted_3Y'], famaBlissBase['T9.0'],  6,  9) - fbb_spot
famaBlissBase['HR_3Y_7Y']=fwd(famaBlissBase['T7.0_shifted_3Y'], famaBlissBase['T10.0'], 7,  10) - fbb_spot
famaBlissBase['HR_3Y_8Y']=fwd(famaBlissBase['T8.0_shifted_3Y'], famaBlissBase['T11.0'], 8,  11) - fbb_spot
# Expected spreads of 3Yr bonds i.e. Forward - Spot
famaBlissBase['ES_3Y_6Y']=fwd(famaBlissBase['T6.0'], famaBlissBase['T9.0'],  6,  9) - fbb_spot
famaBlissBase['ES_3Y_7Y']=fwd(famaBlissBase['T7.0'], famaBlissBase['T10.0'],  7,  10) - fbb_spot
famaBlissBase['ES_3Y_8Y']=fwd(famaBlissBase['T8.0'], famaBlissBase['T11.0'],  8,  11) - fbb_spot



# Realised 4Y horizon returns on bonds from 1 to 10Y minus 4Y yields 
fbb_spot=famaBlissBase['T4.0']
famaBlissBase['HR_4Y_1Y']=fwd(famaBlissBase['T1.0_shifted_4Y'], famaBlissBase['T5.0'],  1,  5) - fbb_spot
famaBlissBase['HR_4Y_2Y']=fwd(famaBlissBase['T2.0_shifted_4Y'], famaBlissBase['T6.0'],  2,  6) - fbb_spot
famaBlissBase['HR_4Y_3Y']=fwd(famaBlissBase['T3.0_shifted_4Y'], famaBlissBase['T7.0'],  3,  7) - fbb_spot
famaBlissBase['HR_4Y_4Y']=fwd(famaBlissBase['T4.0_shifted_4Y'], famaBlissBase['T8.0'],  4,  8) - fbb_spot
famaBlissBase['HR_4Y_5Y']=fwd(famaBlissBase['T5.0_shifted_4Y'], famaBlissBase['T9.0'],  5,  9) - fbb_spot
famaBlissBase['HR_4Y_6Y']=fwd(famaBlissBase['T6.0_shifted_4Y'], famaBlissBase['T10.0'], 6, 10) - fbb_spot
famaBlissBase['HR_4Y_7Y']=fwd(famaBlissBase['T7.0_shifted_4Y'], famaBlissBase['T11.0'], 7, 11) - fbb_spot
# Expected spreads of 4Yr bonds i.e. Forward - Spot
famaBlissBase['ES_4Y_6Y']=fwd(famaBlissBase['T6.0'], famaBlissBase['T10.0'],  6,  10) - fbb_spot
famaBlissBase['ES_4Y_7Y']=fwd(famaBlissBase['T7.0'], famaBlissBase['T11.0'],  7,  11) - fbb_spot


# Realised 5Y horizon returns on bonds from 1 to 10Y minus 5Y yields 
fbb_spot=famaBlissBase['T5.0']
famaBlissBase['HR_5Y_1Y']=fwd(famaBlissBase['T1.0_shifted_5Y'], famaBlissBase['T6.0'],  1,  6) - fbb_spot
famaBlissBase['HR_5Y_2Y']=fwd(famaBlissBase['T2.0_shifted_5Y'], famaBlissBase['T7.0'],  2,  7) - fbb_spot
famaBlissBase['HR_5Y_3Y']=fwd(famaBlissBase['T3.0_shifted_5Y'], famaBlissBase['T8.0'],  3,  8) - fbb_spot
famaBlissBase['HR_5Y_4Y']=fwd(famaBlissBase['T4.0_shifted_5Y'], famaBlissBase['T9.0'],  4,  9) - fbb_spot
famaBlissBase['HR_5Y_5Y']=fwd(famaBlissBase['T5.0_shifted_5Y'], famaBlissBase['T10.0'],  5, 10) - fbb_spot
famaBlissBase['HR_5Y_6Y']=fwd(famaBlissBase['T6.0_shifted_5Y'], famaBlissBase['T11.0'], 6, 11) - fbb_spot
# Expected spreads of 5Yr bonds i.e. Forward - Spot
famaBlissBase['ES_5Y_6Y']=fwd(famaBlissBase['T6.0'], famaBlissBase['T11.0'],  6,  11) - fbb_spot




# Reproducing second part of Table3 in FAMA.BLISS.1987 for Polish Gov Bonds

listX=['ES_1Y_1Y', 'ES_1Y_2Y', 'ES_1Y_3Y', 'ES_1Y_4Y', 'ES_1Y_5Y', 'ES_1Y_6Y', 'ES_1Y_7Y', 'ES_1Y_8Y' ,'ES_1Y_9Y', 'ES_1Y_10Y', 'ES_2Y_1Y', 'ES_2Y_2Y', 'ES_2Y_3Y', 'ES_2Y_4Y', 'ES_2Y_5Y', 'ES_2Y_6Y', 'ES_2Y_7Y', 'ES_2Y_8Y' ,'ES_2Y_9Y', 'ES_3Y_1Y', 'ES_3Y_2Y', 'ES_3Y_3Y', 'ES_3Y_4Y', 'ES_3Y_5Y', 'ES_3Y_6Y', 'ES_3Y_7Y', 'ES_3Y_8Y', 'ES_4Y_1Y', 'ES_4Y_2Y', 'ES_4Y_3Y', 'ES_4Y_4Y', 'ES_4Y_5Y', 'ES_4Y_6Y', 'ES_4Y_7Y', 'ES_5Y_1Y', 'ES_5Y_2Y', 'ES_5Y_3Y', 'ES_5Y_4Y', 'ES_5Y_5Y', 'ES_5Y_6Y'] #, 'ES_2Y_1Y', 'ES_2Y_2Y', 'ES_2Y_3Y', 'ES_2Y_4Y', 'ES_2Y_5Y', 'ES_3Y_1Y', 'ES_3Y_2Y', 'ES_3Y_3Y', 'ES_3Y_4Y', 'ES_3Y_5Y', 'ES_4Y_1Y', 'ES_4Y_2Y', 'ES_4Y_3Y', 'ES_4Y_4Y', 'ES_4Y_5Y', 'ES_5Y_1Y', 'ES_5Y_2Y', 'ES_5Y_3Y', 'ES_5Y_4Y', 'ES_5Y_5Y']
listY=['HR_1Y_1Y', 'HR_1Y_2Y', 'HR_1Y_3Y', 'HR_1Y_4Y', 'HR_1Y_5Y', 'HR_1Y_6Y', 'HR_1Y_7Y', 'HR_1Y_8Y', 'HR_1Y_9Y', 'HR_1Y_10Y', 'HR_2Y_1Y', 'HR_2Y_2Y', 'HR_2Y_3Y', 'HR_2Y_4Y', 'HR_2Y_5Y', 'HR_2Y_6Y', 'HR_2Y_7Y', 'HR_2Y_8Y', 'HR_2Y_9Y', 'HR_3Y_1Y', 'HR_3Y_2Y', 'HR_3Y_3Y', 'HR_3Y_4Y', 'HR_3Y_5Y', 'HR_3Y_6Y', 'HR_3Y_7Y', 'HR_3Y_8Y', 'HR_4Y_1Y', 'HR_4Y_2Y', 'HR_4Y_3Y', 'HR_4Y_4Y', 'HR_4Y_5Y', 'HR_4Y_6Y', 'HR_4Y_7Y', 'HR_5Y_1Y', 'HR_5Y_2Y', 'HR_5Y_3Y', 'HR_5Y_4Y', 'HR_5Y_5Y', 'HR_5Y_6Y'] #, 'RS_2Y_1Y', 'RS_2Y_2Y', 'RS_2Y_3Y', 'RS_2Y_4Y', 'RS_2Y_5Y', 'RS_3Y_1Y', 'RS_3Y_2Y', 'RS_3Y_3Y', 'RS_3Y_4Y', 'RS_3Y_5Y', 'RS_4Y_1Y', 'RS_4Y_2Y', 'RS_4Y_3Y', 'RS_4Y_4Y', 'RS_4Y_5Y', 'RS_5Y_1Y', 'RS_5Y_2Y', 'RS_5Y_3Y', 'RS_5Y_4Y', 'RS_5Y_5Y']
listEnd=['2018-03-02', '2018-03-02', '2018-03-02', '2018-03-02', '2018-03-02', '2018-03-02', '2018-03-02', '2018-03-02', '2018-03-02', '2018-03-02', '2017-03-01', '2017-03-01', '2017-03-01', '2017-03-01', '2017-03-01', '2017-03-01', '2017-03-01', '2017-03-01', '2017-03-01', '2016-03-01', '2016-03-01', '2016-03-01', '2016-03-01', '2016-03-01', '2016-03-01', '2016-03-01', '2016-03-01', '2015-03-01', '2015-03-01', '2015-03-01', '2015-03-01', '2015-03-01', '2015-03-01', '2015-03-01', '2014-03-01', '2014-03-01', '2014-03-01', '2014-03-01', '2014-03-01', '2014-03-01'] #, '2018-03-02', '2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01', '2018-03-02', '2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01', '2018-03-02', '2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01', '2018-03-02', '2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01']
#'2017-03-01', '2016-03-01', '2015-03-01', '2014-03-01'

table3=[]

for i,j, t in zip(listX, listY, listEnd):
    X = famaBlissBase[i][:t] ## X input variables (or independent variables)
    y = famaBlissBase[j][:t] ## Y output/dependent variable
    X = sm.add_constant(X) ## let's add an intercept (beta_0) to our model
    model = sm.OLS(y, X).fit() ## sm.OLS(output, input)     #predictions = model.predict(X)

    ##   Sandwich Robust Covariances -> new S.E.
    # getting new StdErrors of the parameters that are robust to heteroscedasticity and autocorrelation in the errors
    se_adj=np.sqrt(np.diag(st.sandwich_covariance.cov_hac(model)))    # or st.sandwich_covariance.cov_white_simple(model)
    #model.resid.plot.kde(bw_method=0.3)
    #famaBlissBase[j][:t].plot.kde(); famaBlissBase[i][:t].plot.kde()

    # p-value of Augmented Dickey Fuler test for unit root
    #print(adfuller(famaBlissBase[i][:t])[1])
    #print(adfuller(famaBlissBase[j][:t])[1])
    
    #fig, ax = plt.subplots(figsize=(10,8))
    #autocorrelation_plot(model.resid)
    
    table3.append([j, i, t, round(model.params[0],4), round(se_adj[0],4), signif(model.params[0], se_adj[0]), round(model.params[1],4), round(se_adj[1],4), signif(model.params[1], se_adj[1]),round(model.rsquared,4), round(model.resid.autocorr(1*252),2), round(model.resid.autocorr(2*252),2), round(model.resid.autocorr(3*252),2), round(model.resid.autocorr(4*252),2), round(model.resid.autocorr(5*252),2)   ])

tab4=pd.DataFrame(data=table3, columns=['Y', 'X', 'TS end','a2', 'se_adj(a2)','a_snf', 'b2', 'se_adj(b2)','b_snf' ,'r^2', 'resid_auto_1Y', 'resid_auto_2Y', 'resid_auto_3Y', 'resid_auto_4Y', 'resid_auto_5Y'])





writer = pd.ExcelWriter('FamaBlissTable3.xlsx', engine='xlsxwriter')
tab3.to_excel(writer, sheet_name='Regression1')
tab4.to_excel(writer, sheet_name='Regression2')
writer.save()









#%%
X = famaBlissBase['ES_2Y_1Y'][:'2017-03-01'] ## X input variables (or independent variables)
y = famaBlissBase['RS_2Y_1Y'][:'2017-03-01'] ## Y output/dependent variable
X = sm.add_constant(X) ## let's add an intercept (beta_0) to our model
model = sm.OLS(y, X).fit() ## sm.OLS(output, input)
predictions = model.predict(X)
print(model.summary())
print(np.sqrt(np.diag(st.sandwich_covariance.cov_hac(model))))
#model.resid.plot.kde(bw_method=0.3)

famaBlissBase['RS_2Y_1Y'][:'2017-03-01'].plot.kde(); famaBlissBase['ES_2Y_1Y'][:'2017-03-01'].plot.kde()

# p-value of Augmented Dickey Fuler test for unit root
print(adfuller(famaBlissBase['ES_2Y_1Y'][:'2017-03-01'])[1])
print(adfuller(famaBlissBase['RS_2Y_1Y'][:'2017-03-01'])[1])


#%%
X = famaBlissBase['ES_5Y_1Y'][:'2014-03-01'] ## X input variables (or independent variables)
y = famaBlissBase['RS_5Y_1Y'][:'2014-03-01'] ## Y output/dependent variable
X = sm.add_constant(X) ## let's add an intercept (beta_0) to our model
model = sm.OLS(y, X).fit() ## sm.OLS(output, input)
predictions = model.predict(X)
print(model.summary())
print(np.sqrt(np.diag(st.sandwich_covariance.cov_hac(model))))
# model.resid.plot.kde(bw_method=0.3)

famaBlissBase['RS_5Y_1Y'][:'2014-03-01'].plot.kde(); famaBlissBase['ES_5Y_1Y'][:'2014-03-01'].plot.kde()

print(adfuller(famaBlissBase['ES_5Y_1Y'][:'2014-03-01'])[1])
print(adfuller(famaBlissBase['RS_5Y_1Y'][:'2014-03-01'])[1])


#fig, ax = plt.subplots(5,3,figsize=(14,16))
#plt.suptitle('Exp_vs_Real_1Y_2Y', x=0.45, y=1.03, horizontalalignment='left', verticalalignment='top', fontsize = 15)
#
#yrs=['2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']
#
#for i, num in zip(yrs, range(len(yrs))):
#    plt.subplot(5, 3, num+1)
#    plt.plot(famaBlissBase[i].index, famaBlissBase['RS_1Y_2Y'][i], label='real')
#    plt.plot(famaBlissBase[i].index, famaBlissBase['ES_1Y_2Y'][i], label='expe')
#    plt.title(i)
#    plt.tight_layout()
#    plt.grid()
#
#plt.legend()
#plt.savefig('Exp_vs_Real_1Y_2Y.pdf', bbox_inches='tight')
#plt.show()



#%%









#%%

# LAGGED yields - this are rates from the PAST i.e T3.0_lagged_2Y means 3y yield observed 2 years ago from the index date
# we would rather be interested in the case when the tenor of the yield from the past is longer than lag period
#    i.e. the yield's original period is still 'valid' when we observe it in its future
    
lista_tenorow_years_lagged_1Y=[x+'_lagged_1Y' for x in lista_tenorow_years ]
lista_tenorow_years_lagged_2Y=[x+'_lagged_2Y' for x in lista_tenorow_years ]
lista_tenorow_years_lagged_3Y=[x+'_lagged_3Y' for x in lista_tenorow_years ]
lista_tenorow_years_lagged_4Y=[x+'_lagged_4Y' for x in lista_tenorow_years ]
lista_tenorow_years_lagged_5Y=[x+'_lagged_5Y' for x in lista_tenorow_years ]
lista_list_lag=[lista_tenorow_years_lagged_1Y, lista_tenorow_years_lagged_2Y, lista_tenorow_years_lagged_3Y, lista_tenorow_years_lagged_4Y, lista_tenorow_years_lagged_5Y]


for i in range(5):
    techDF=pd.DataFrame(tenorNSSbase4.shift(+365*(i+1), freq='D')[lista_tenorow_years])
    techDF.columns=lista_list_lag[i]
    famaBlissBase = pd.merge_asof(left=famaBlissBase,right=techDF,right_index=True,left_index=True,direction='nearest',tolerance=tol)













## Sandbox:
#bondbase_add4.to_hdf('bondbase_add_final5.h5', 'df') 
#lista_nazw_tenorow_shifted=['T'+str(x) for x in lista_yfrac_tenorow ]









#%%
from pandas.plotting import autocorrelation_plot, scatter_matrix
f, ax = plt.subplots(figsize=(11, 9))
autocorrelation_plot(tenorNSSbase3['vix_close'].dropna())



#f, ax = plt.subplots(figsize=(11, 9))
scatter_matrix(tenorNSSbase3[list_param_names], alpha=0.2, figsize=(16, 16), diagonal='kde')

#%%
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(np.array(tenorNSSbase3['vix_close'].dropna()))
plt.show()


plot_acf(np.array(tenorNSSbase3['slope10_05'].dropna()))
plt.show()




#%%

import statsmodels.api as sm

#tttt=tenorNSSbase3.loc[:,['T2.0', 'vol_r10_3m']].dropna()

tttt=tenorNSSbase3[list_additional+ lista_nazw_tenorow_wybranych].copy().dropna()
y = tttt['T10.0']
X = tttt.drop(columns=['T10.0'])
X = sm.add_constant(X)
#y = tenorNSSbase3['vol_r10_3m']

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

















#%%
fig, ax = plt.subplots(figsize=(12,8))
rr=tenorNSSbase['T5.0'].pct_change()
#plt.plot(tenorNSSbase['date'],rr)
ss1m=rr.rolling(21).std() * np.sqrt(252) #monthly
ss1q=rr.rolling(126).std() * np.sqrt(252) #quarterly
#plt.plot(tenorNSSbase['date'],ss1m)
plt.plot(tenorNSSbase['date'],ss1q)
plt.plot(daneRTS['date'],daneRTS.PL6MX5YA/100)
plt.grid()
plt.title('5Y yield vol annualised ')
plt.legend([ '6M window', 'Swptn 6Mx5Y'])
plt.savefig('Vol5Y.pdf', bbox_inches='tight')


#%%
fig, ax = plt.subplots(figsize=(12,8))
rr=tenorNSSbase['T1.0'].pct_change()
#plt.plot(tenorNSSbase['date'],rr)
ss1m=rr.rolling(21).std() * np.sqrt(252) #monthly
ss1q=rr.rolling(126).std() * np.sqrt(252) #quarterly
#plt.plot(tenorNSSbase['date'],ss1m)
plt.plot(tenorNSSbase['date'],ss1q)
plt.plot(daneRTS['date'],daneRTS.PL6MX1YA/100)
plt.grid()
plt.title('1Y yield vol annualised ')
plt.legend(['6M window', 'Swptn 6Mx1Y'])
plt.savefig('Vol1Y.pdf', bbox_inches='tight')




#%%
#cor_mat1 = np.corrcoef(np.array(tenorNSSbase.loc[tenorNSSbase.date.dt.year==2006, lista_nazw_tenorow]).T)

#ff=np.array(tenorNSSbase.loc[tenorNSSbase.date.dt.year==2017, lista_nazw_tenorow])

#tenorNSSbase.date=='2019-02-19'
#tenorNSSbase.iloc[tenorNSSbase.loc[tenorNSSbase.date=='2019-02-19'], 18:40]
#tenorNSSbase.loc[tenorNSSbase.date.year=='2019']
#tenorNSSbase.loc[tenorNSSbase.date.dt.year==2019]
#tenorNSSbase.loc[tenorNSSbase.date.dt.year==2019, lista_nazw_tenorow]







































#%% GRAPHS


TT= pd.read_hdf('bondbase_add_final4.h5')

fig, ax = plt.subplots(figsize=(8,12))

xx=TT.yr_frac_from_0TD.tolist()
yy=TT.yr_frac.tolist()
ww=TT.weight_OA.tolist()

ax.set_yticks([0, 1, 3, 6, 12], minor=False)
#ax.set_yticklabels(['a','b','c','d','e'])
#ax.set_yticks([0.3, 0.55, 0.7], minor=True)
ax.yaxis.grid(True, which='major')
#ax.yaxis.grid(True, which='minor')

ax.set_xticks([0, 2,4,6,8,10,12,14], minor=False)
ax.set_xticklabels(['2005','2007','2009','2011','2013', '2015', '2017', '2019'])
ax.xaxis.grid(True, which='major')

plt.scatter(xx, yy, marker='o', s=[x * 100 for x in ww])
#plt.grid()

plt.savefig('testing.pdf', bbox_inches='tight')
plt.show()




#%%

fig, ax = plt.subplots(figsize=(10,12))

xx=TT.yr_frac_from_0TD.tolist()
yy=TT.yr_frac.tolist()
ww=TT.weight_OA.tolist()
ytm=TT.ytm_fix.tolist()

ax.set_yticks([0, 1, 3, 6, 12], minor=False)
ax.yaxis.grid(True, which='major')
ax.set_xticks([0, 2,4,6,8,10,12,14], minor=False)
ax.set_xticklabels(['2005','2007','2009','2011','2013', '2015', '2017', '2019'])
ax.xaxis.grid(True, which='major')

plt.scatter(xx, yy, marker='o', s=[x * 100 for x in ww], c=ytm, cmap='jet')
cbar= plt.colorbar(shrink=0.5)
cbar.set_label("YTM", labelpad=+1)

plt.savefig('testing2.pdf', bbox_inches='tight')
plt.show()


#%% GRAPHS
# This import registers the 3D projection, but is otherwise unused.

# choose dates to generate yield curve points
dates=['2005-01-03', '2007-01-02', '2009-01-02', '2011-01-03', '2013-01-02', '2015-01-02', '2017-01-02', '2019-01-02']
d_yc=pd.DataFrame()
for dd in dates:
    ddd=TT.xs(slice(dd, dd), level='date')
    d_yc = pd.concat([d_yc,ddd])

xx_YC=d_yc.yr_frac_from_0TD.tolist()
yy_YC=d_yc.yr_frac.tolist()
ytm_YC=d_yc.ytm_fix.tolist()

ytm=TT.ytm_fix.tolist()


fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(111, projection='3d')

ax.scatter(xx, yy, ytm, c='b', marker='.', alpha=0.15)
ax.scatter(xx_YC, yy_YC, ytm_YC, c='r', marker='o', alpha=0.95, s=50)

ax.set_xticks([0, 2,4,6,8,10,12,14], minor=False)
ax.set_xticklabels(['2005','2007','2009','2011','2013', '2015', '2017', '2019'])


ax.set_xlabel('Year')
ax.set_ylabel('Tenor')
ax.set_zlabel('YTM')

plt.savefig('testing3.pdf', bbox_inches='tight')
plt.show()



#%% Drawing yield curves for a certain date
pd.options.mode.chained_assignment = None 

def draw_YC(data):
    
    path1="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN/YieldCurveGraphsNSSworks"
    path2="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN"
    os.chdir(path2)
    
#    TTx= pd.read_hdf('bondbase_add_final4.h5')
#    TT=TTx.xs(slice(data, data), level='date')
    TT=bondbase_add.loc[data]  # not 
    #tbb11=tab_price_list_full_nss(data, 2, [0.0496,	0.466,	0.257,	0.0185,	0.00756, 5,51])
    
    
    fig, ax = plt.subplots(figsize=(14,8))
    
    ytm=TT.ytm_fix.tolist()
    xx=TT.yr_frac.tolist() # not TT?
    ww=TT.weight_OA.tolist()
    
    ytm_ask=TT.ytm_ask.tolist()
    ytm_bid=TT.ytm_bid.tolist()
    
    plt.scatter(xx, ytm, marker='o', s=[x * 300 for x in ww])
    plt.scatter(xx, ytm_bid, marker='_', c='r', s=8)
    plt.scatter(xx, ytm_ask, marker='_', c='r', s=8)
    
    #TT=TT.reset_index()
    bond_list=TT['bond'].tolist()
    
    for i, txt in enumerate(bond_list):
        ax.annotate(txt, (xx[i]+0.3, ytm[i]), fontsize=8)
    
    samplesNSS_SLSQP = pd.read_hdf('NSS_SLSQP__2try_tol5_1_3562.h5')
    vec2=samplesNSS_SLSQP.loc[samplesNSS_SLSQP['date']==data]
    
    #   bondbase_add
    xxx=np.linspace(0.35,12,500)
    yyy_nssq=[NSS_RATES(xxxff, float(vec2.b0), float(vec2.b1), float(vec2.b2), float(vec2.b3), float(vec2.tau1), float(vec2.tau2))[0]*100 for xxxff in xxx]
    plt.scatter(xxx, yyy_nssq, marker='.', c='g', s=2)
    # 0.0496,	0.466,	0.257,	0.0185,	0.00756, 5,51

    tbb=tab_price_list_full_nss(data, 2, [float(vec2.b0), float(vec2.b1), float(vec2.b2), float(vec2.b3), float(vec2.tau1), float(vec2.tau2)])
    ytm_nss=tbb[0].ytm_nss.tolist()
    plt.scatter(xx, ytm_nss, marker='o', facecolors='none', edgecolors='k', s=40)
    
    plt.title('Yield curve as of '+str(data)+'    ObFuVal: '+str(round(float(vec2.ObFuVal),4))+',  Diff_avg:  '+str(round(float(vec2.diff_avg),4))+' pct points')
    plt.grid()
    
    os.chdir(path1)
    plt.savefig('YC_'+str(data)+'.pdf', bbox_inches='tight')
    os.chdir(path2)
    
    plt.show()

draw_YC('2005-03-02')

#%%
os.chdir("/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN")
samplesNSS_SLSQP = pd.read_hdf('NSS_SLSQP__2try_tol5_1_3562.h5')
list_of_dates33=samplesNSS_SLSQP.date.unique()


for dcc in list_of_dates33:
    draw_YC(pd.DatetimeIndex([dcc])[0])

#%% Drawing


# Draw all available ytm/prc for a ceratin bond from the begining of the data base till maturity or last day of the database
    

def draw_TS_YTM(bond):
    
    path1="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN/BondTimeSeriesGraphs"
    path2="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN"
    os.chdir(path2)
    
    TTx= pd.read_hdf('bondbase_add_final4.h5')
    
    TT=TTx.xs(slice(bond, bond), level='bond')
    
    fig, ax = plt.subplots(figsize=(14,10))
    
    ytm=TT.ytm_fix.tolist()
    TT['new_yr_0td']=TT['yr_frac_from_0TD']+2004.9068493150684932
    xx=TT.new_yr_0td.tolist()
    
    ytm_ask=TT.ytm_ask.tolist()
    ytm_bid=TT.ytm_bid.tolist()
    
    #ax.set_yticks([0, 1, 3, 6, 12], minor=False)
    #ax.set_yticklabels(['a','b','c','d','e'])
    #ax.set_yticks([0.3, 0.55, 0.7], minor=True)
    #ax.yaxis.grid(True, which='major')
    #ax.yaxis.grid(True, which='minor')
    
    #ax.set_xticks([0, 2,4,6,8,10,12,14], minor=False)
    #ax.set_xticklabels(['2005','2007','2009','2011','2013', '2015', '2017', '2019'])
    #ax.xaxis.grid(True, which='major')
    # s=[x * 300 for x in ww]
    
    plt.scatter(xx, ytm, marker='o', s=20)
    plt.scatter(xx, ytm_bid, marker='_', c='r', s=8)
    plt.scatter(xx, ytm_ask, marker='_', c='r', s=8)
    
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    
    plt.title('Bond: '+str(bond)+' yields time series')
    plt.grid()
    
    os.chdir(path1)
    plt.savefig('TS_YTM_'+str(bond)+'.pdf', bbox_inches='tight')
    os.chdir(path2)
    
    plt.show()



#%%
os.chdir("/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN")
TTx= pd.read_hdf('bondbase_add_final4.h5')
liss33=TTx.index.unique(level='bond')

#%%
for bondb in liss33:
    draw_TS_YTM(bondb)

#%%
    


def draw_TS_BA_YTM(bond):
    
    path1="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN/BondBidAskGraphs"
    path2="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN"
    os.chdir(path2)
    
    TTx= pd.read_hdf('bondbase_add_final4.h5')
    
    TT=TTx.xs(slice(bond, bond), level='bond')
    
    fig, ax = plt.subplots(figsize=(14,10))
    
    TT['new_yr_0td']=TT['yr_frac_from_0TD']+2004.9068493150684932
    xx=TT.new_yr_0td.tolist()
    
    TT['BAspr']=TT.ytm_bid - TT.ytm_ask
    ytm_BAS=TT.BAspr.tolist()

    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    
    plt.scatter(xx, ytm_BAS, marker='o', s=20)
#    plt.scatter(xx, ytm_bid, marker='_', c='r', s=8)
#    plt.scatter(xx, ytm_ask, marker='_', c='r', s=8)
    
    plt.title('Bond: '+str(bond)+' yield bid-ask spreads time series')
    plt.grid()
    
    os.chdir(path1)
    plt.savefig('TS_YTM_BA_'+str(bond)+'.pdf', bbox_inches='tight')
    os.chdir(path2)
    
    plt.show()




#%% Drawing all bid-ask spreads for all bonds
os.chdir("/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN")
TTx= pd.read_hdf('bondbase_add_final5.h5')
liss33=TTx.index.unique(level='bond')
#%%

for bondb in liss33:
    draw_TS_BA_YTM(bondb)



#%%

bonds2YRlist=['OK0805','OK0406','OK0806','OK1206', 'OK0407','OK0807', 'OK1207','OK0408', 'OK0808', 'OK1208', 'OK0709', 'OK0710', 'OK0711', 'OK0112','OK0712', 'OK1012', 'OK0113', 'OK0713', 'OK0114', 'OK0714','OK0715', 'OK0116', 'OK0716', 'OK0717', 'OK1018','OK0419', 'OK0720', 'OK0521']
bonds5YRlist=['PS1005', 'PS0206', 'PS0506', 'PS1106', 'PS0507', 'PS0608', 'PS0605', 'PS0310', 'PS0511', 'PS0412', 'PS0413',  'PS0414', 'PS0415', 'PS0416', 'PS1016', 'PS0417', 'PS0418', 'PS0718',  'PS0719','PS0420', 'PS0421', 'PS0721', 'PS0422', 'PS0123',  'PS0424']
bonds10YRlist= ['DS0509', 'DS1109', 'DS1110', 'DS1013','DS1015','DS1017', 'DS1019', 'DS1020', 'DS1021', 'DS1023', 'DS0725', 'DS0726', 'DS0727']
bonds20YRlist= ['WS0922', 'WS0429','WS0428']

        

#%% Draw all spreads mean
    
def draw_TS_BA_YTM_all(bondlist, filename):
    
    path1="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN/BondBidAskGraphs"
    path2="/Users/marcindec/PycharmProjects/YieldCurveNSS_PLN"
    os.chdir(path2)
    
    TT= pd.read_hdf('bondbase_add_final4.h5')
    
    TT=TT.loc[pd.IndexSlice[:,:,bondlist],:]
    
    fig, ax = plt.subplots(figsize=(14,10))
    
    TT['new_yr_0td']=TT['yr_frac_from_0TD']+2004.9068493150684932
    TT['BAspr']=TT.ytm_bid - TT.ytm_ask
    
    ytm_BAS=TT.BAspr.tolist()       
    xx=TT.yr_frac.tolist()
    
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    
    plt.scatter(xx, ytm_BAS, marker='o', s=20)

    
    plt.title('All bonds mean bid-ask spreads')
    plt.grid()
    
    os.chdir(path1)
    plt.savefig('TS_YTM_BA_all'+str(filename)+'.pdf', bbox_inches='tight')
    os.chdir(path2)
    
    plt.show()

#%% Addiing to bondbase
    

#bondbase_add4= pd.read_hdf('bondbase_add_final4.h5')
#bondbase_add4 = bondbase_add4.reset_index();
#bondbase_add4=bondbase_add4.set_index(['date']);
#
#lkj2=[]
#for i in range(len(bondbase_add4)):
#    lkj2.append(cpn_period_table(bondbase_add4['bond'][i]).beg[0])
#
#bondbase_add4['first_issue']=lkj2
#bondbase_add4['yr_frac_first']=(bondbase_add4.index - bondbase_add4['first_issue']).dt.days/365
#
#lkj3=[]
#for i in range(len(bondbase_add4)):
#    lkj3.append(outstanding(bondbase_add4['bond'][i], pd.Timestamp(bondbase_add4.index[i])))
#
#bondbase_add4['outstand_amnt']=lkj3
##bondbase_add4.to_hdf('bondbase_add_final5.h5', 'df') 


#%%

#bondbase_add4['prev_ytm_fix'] = bondbase_add4.groupby('bond')['ytm_fix'].shift()
#bondbase_add4['ytm_fix_chng']=bondbase_add4['prev_ytm_fix']-bondbase_add4['ytm_fix']
#bondbase_add4.to_hdf('bondbase_add_final5.h5', 'df') 

#start_date='2004-11-26'
#end_date='2005-12-30'
#bondd='PS0506'
#mask = (bondbase_add4.index > start_date) & (bondbase_add4.index <= end_date) & (bondbase_add4['bond']==bondd)
#df33 = bondbase_add4.loc[mask]






    
#writer = pd.ExcelWriter('bondbase_add.xlsx', engine='xlsxwriter')
#TT.to_excel(writer, sheet_name='weights')
#writer.save()