View
224
Download
0
Category
Preview:
Citation preview
8/8/2019 piyush sachdeva - Homework 5
1/48
Lucas Triana
Part 1
Homework 5
#loading the data bonddata = read.table("http://www.stat.cmu.edu/~cschafer/MSCF/bonddata.txt",
sep=",", header=T)#creating the data frameattach(bonddata)newdata=as.data.frame(cbind(weight, current_coupon, time_to_maturity,
reporting_delay, trade_size,curve_based_price, received_time_diff_last1,trade_price_last1,trade_size_last1,curve_based_price_last1,is_callable, trade_type, trade_type_last1,trade_price))
#removing influential observation # 1457newdata=as.data.frame(newdata[-1457,])
#initial histograms of weight, time_to_maturity, trade_size, trade_size_last1 hist(newdata$weight, main="Weight")
hist(newdata$time_to_maturity, main="Time to maturity")
8/8/2019 piyush sachdeva - Homework 5
2/48
hist(newdata$trade_size, main="Trade size")
hist(newdata$trade_size_last1, main="Trade size last 1")
8/8/2019 piyush sachdeva - Homework 5
3/48
#log transformation of the previous variables (to spread the data more evenly) log_weight=log(newdata$weight)hist(log_weight, main="log Weight")
8/8/2019 piyush sachdeva - Homework 5
4/48
8/8/2019 piyush sachdeva - Homework 5
5/48
log_trade_size_last1=log(newdata$trade_size_last1)hist(log_trade_size_last1, main="log Trade size last 1")
#transformation of reporting delay and received time diff last1 into a categorical variable categorical_reporting_delay=as.factor(cut(newdata$reporting_delay, c(-Inf,2,10,100,Inf)))
categorical_received_time_diff_last1=as.factor(cut(newdata$received_time_diff_last1,c(-Inf,500,75000,4000000,Inf)))
#fitting GAM from mgcv library(mgcv)
## Loading required package: nlme## This is mgcv 1.8-3. For overview type 'help("mgcv-package")'.
8/8/2019 piyush sachdeva - Homework 5
6/48
#assembling the data frame with the transformed variables attach(newdata)
## The following objects are masked from bonddata:#### current_coupon, curve_based_price, curve_based_price_last1,## is_callable, received_time_diff_last1, reporting_delay,
## time_to_maturity, trade_price, trade_price_last1, trade_size,## trade_size_last1, trade_type, trade_type_last1, weight
transformeddata=as.data.frame(cbind(log_weight, current_coupon, log_time_to_maturity,categorical_reporting_delay, log_trade_size,curve_based_price, categorical_received_time_diff
_last1,trade_price_last1, log_trade_size_last1,curve_bas
ed_price_last1,is_callable, trade_type, trade_type_last1, trade_
price))
#given that all variables in the data frame are treated as numeric the following is
to specify factor to categorical variables transformeddata$categorical_reporting_delay=factor(transformeddata$categorical_reporting_delay)transformeddata$categorical_received_time_diff_last1=factor(transformeddata$categorical_received_time_diff_last1)transformeddata$is_callable=factor(transformeddata$is_callable)transformeddata$trade_type=factor(transformeddata$trade_type)transformeddata$trade_type_last1=factor(transformeddata$trade_type_last1)
8/8/2019 piyush sachdeva - Homework 5
7/48
#fits the GAM holdgam=gam(trade_price~s(log_weight)+s(current_coupon)+s(log_time_to_maturity)+
categorical_reporting_delay+ s(log_trade_size)+s(curve_based_price)+categorical_received_time_diff_last1+ s(trade_price_last1)+s(log_trade_s
ize_last1)+s(curve_based_price_last1)+is_callable+trade_type+trade_type_last1, data
=transformeddata)
summary(holdgam)
#### Family: gaussian## Link function: identity#### Formula:## trade_price ~ s(log_weight) + s(current_coupon) + s(log_time_to_maturity) +## categorical_reporting_delay + s(log_trade_size) + s(curve_based_price) +## categorical_received_time_diff_last1 + s(trade_price_last1) +## s(log_trade_size_last1) + s(curve_based_price_last1) + is_callable +## trade_type + trade_type_last1
#### Parametric coefficients:## Estimate Std. Error t value## (Intercept) 105.56558 0.22310 473.181## categorical_reporting_delay2 -0.23967 0.09514 -2.519## categorical_reporting_delay3 -0.29058 0.09462 -3.071## categorical_reporting_delay4 -0.42087 0.12867 -3.271## categorical_received_time_diff_last12 -0.33503 0.20980 -1.597## categorical_received_time_diff_last13 -0.55544 0.27039 -2.054## categorical_received_time_diff_last14 -1.47683 0.45408 -3.252## is_callable1 -0.18380 0.12913 -1.423## trade_type3 1.55252 0.09667 16.060## trade_type4 0.73781 0.09016 8.184
## trade_type_last13 -0.94159 0.09679 -9.728## trade_type_last14 -0.42791 0.09327 -4.588## Pr(>|t|)## (Intercept) < 2e-16 ***## categorical_reporting_delay2 0.01187 *## categorical_reporting_delay3 0.00217 **## categorical_reporting_delay4 0.00110 **## categorical_received_time_diff_last12 0.11048## categorical_received_time_diff_last13 0.04012 *## categorical_received_time_diff_last14 0.00117 **## is_callable1 0.15483## trade_type3 < 2e-16 ***## trade_type4 5.63e-16 ***
## trade_type_last13 < 2e-16 ***## trade_type_last14 4.83e-06 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Approximate significance of smooth terms:## edf Ref.df F p-value## s(log_weight) 1.000 1.000 4.562 0.03284 *## s(current_coupon) 2.435 3.108 2.403 0.06374 .
8/8/2019 piyush sachdeva - Homework 5
8/48
## s(log_time_to_maturity) 5.231 6.444 1.751 0.09992 .## s(log_trade_size) 1.681 2.115 12.755 2.28e-06 ***## s(curve_based_price) 8.510 8.936 19.838 < 2e-16 ***## s(trade_price_last1) 4.846 6.217 278.604 < 2e-16 ***## s(log_trade_size_last1) 4.744 5.758 3.180 0.00497 **## s(curve_based_price_last1) 8.948 8.994 6.757 1.47e-09 ***
## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### R-sq.(adj) = 0.985 Deviance explained = 98.5%## GCV = 2.0643 Scale est. = 2.0014 n = 1619
#Predictions for trade_price for bonds of trade type 3 and 4 type3vs4pricedif=(holdgam$coefficient[10]-holdgam$coefficient[9])
#plot of fitted relationships between continuous predictors and the response plot(holdgam, pages=4,scale=0, scheme=1)
8/8/2019 piyush sachdeva - Homework 5
9/48
8/8/2019 piyush sachdeva - Homework 5
10/48
#residuals vs fitted values dev.off()
8/8/2019 piyush sachdeva - Homework 5
11/48
## null device## 1
plot(as.numeric(holdgam$fitted.values), as.numeric(holdgam$residuals),pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals" ,cex.axis=1.3,cex.lab=1.3)
abline(h = 0,lwd=2,col=4,lty=2)print("Analyzing the fitted values vs. the residuals one can argue that there is nostrong evidence of uneven variance across the data. Also it seems reasonable toassume that the data are spread around zero.")
## [1] "Analyzing the fitted values vs. the residuals one can argue that there is nostrong evidence of uneven variance across the data. Also it seems reasonable to assume that the data are spread around zero."
#Actual response vs fitted values plot(as.numeric(holdgam$fitted.values), as.numeric(transformeddata$trade_price),
pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response" ,cex.axis=1.3,cex.lab=1.3)
abline(a=0,b=1,lwd=2,col=4,lty=1)
print("Analyzing the fitted values vs. the response one can argue that the model is reasonably good in its predicting power given that the data are scattered around the perfect agreement line here plotted. This introduces some worries as the prediction istoo good which might indicate possible overffiting or spurious relationships betweenthe regressors and the response.")
## [1] "Analyzing the fitted values vs. the response one can argue that the model isreasonably good in its predicting power given that the data are scattered around theperfect agreement line here plotted. This introduces some worries as the prediction is too good which might indicate possible overffiting or spurious relationships between the regressors and the response."
#qq plot qqnorm(as.numeric(holdgam$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")qqline(as.numeric(holdgam$residuals))print("There is clear evidence of heavy tails on both the lower and upper end of theplot, perhaps modifying the normality assumption would be advisable.")
## [1] "There is clear evidence of heavy tails on both the lower and upper end of theplot, perhaps modifying the normality assumption would be advisable."
#fitting the linear modelholdlinear=gam(trade_price~log_weight+current_coupon+log_time_to_maturity+
categorical_reporting_delay+log_trade_size+curve_based_price+categorical_received_time_diff_last1+trade_price_last1+log_trade_siz
e_last1+curve_based_price_last1+is_callable+trade_type+trade_type_last1, data
=transformeddata)
summary(holdlinear)
#### Family: gaussian## Link function: identity#### Formula:## trade_price ~ log_weight + current_coupon + log_time_to_maturity +
8/8/2019 piyush sachdeva - Homework 5
12/48
## categorical_reporting_delay + log_trade_size + curve_based_price +## categorical_received_time_diff_last1 + trade_price_last1 +## log_trade_size_last1 + curve_based_price_last1 + is_callable +## trade_type + trade_type_last1#### Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)## (Intercept) 1.24374 0.50380 2.469 0.013664## log_weight 0.12614 0.04974 2.536 0.011304## current_coupon 0.03085 0.02633 1.172 0.241572## log_time_to_maturity 0.03324 0.03924 0.847 0.397070## categorical_reporting_delay2 -0.18564 0.09816 -1.891 0.058791## categorical_reporting_delay3 -0.26398 0.09763 -2.704 0.006924## categorical_reporting_delay4 -0.40421 0.13214 -3.059 0.002258## log_trade_size 0.11996 0.02313 5.187 2.41e-07## curve_based_price 0.42527 0.03433 12.388 < 2e-16## categorical_received_time_diff_last12 -0.35606 0.21590 -1.649 0.099316## categorical_received_time_diff_last13 -0.63120 0.27888 -2.263 0.023748## categorical_received_time_diff_last14 -1.61833 0.46655 -3.469 0.000537## trade_price_last1 0.69842 0.01683 41.500 < 2e-16## log_trade_size_last1 -0.08395 0.02273 -3.693 0.000229## curve_based_price_last1 -0.13939 0.03589 -3.883 0.000107## is_callable1 -0.03317 0.11815 -0.281 0.778937## trade_type3 1.62292 0.09957 16.299 < 2e-16## trade_type4 0.81764 0.09287 8.804 < 2e-16## trade_type_last13 -0.89904 0.09972 -9.016 < 2e-16## trade_type_last14 -0.37862 0.09602 -3.943 8.39e-05#### (Intercept) *## log_weight *## current_coupon## log_time_to_maturity## categorical_reporting_delay2 .## categorical_reporting_delay3 **## categorical_reporting_delay4 **## log_trade_size ***## curve_based_price ***## categorical_received_time_diff_last12 .## categorical_received_time_diff_last13 *## categorical_received_time_diff_last14 ***## trade_price_last1 ***## log_trade_size_last1 ***## curve_based_price_last1 ***## is_callable1## trade_type3 ***## trade_type4 ***## trade_type_last13 ***## trade_type_last14 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1###### R-sq.(adj) = 0.984 Deviance explained = 98.4%## GCV = 2.2 Scale est. = 2.1728 n = 1619
8/8/2019 piyush sachdeva - Homework 5
13/48
plot(as.numeric(holdlinear$fitted.values), as.numeric(holdlinear$residuals),pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals" ,cex.axis=1.3,cex.lab=1.3)
abline(h = 0,lwd=2,col=3,lty=2)print("Analyzing the fitted values vs. the residuals one can argue that there is no strong evidence of uneven variance across the data. Also it seems reasonable to assume
that the data are spread around zero.")## [1] "Analyzing the fitted values vs. the residuals one can argue that there is nostrong evidence of uneven variance across the data. Also it seems reasonable to assume that the data are spread around zero."
#Actual response vs fitted values plot(as.numeric(holdgam$fitted.values), as.numeric(transformeddata$trade_price),
pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response" ,cex.axis=1.3,cex.lab=1.3)
abline(a=0,b=1,lwd=2,col=3,lty=1)print("Analyzing the fitted values vs. the response one can argue that the model is reasonably good in its predicting power given that the data are scattered around the perfect agreement line here plotted. This introduces some worries as the prediction is
too good which might indicate possible overffiting or spurious relationships betweenthe regressors and the response.")
## [1] "Analyzing the fitted values vs. the response one can argue that the model isreasonably good in its predicting power given that the data are scattered around theperfect agreement line here plotted. This introduces some worries as the prediction is too good which might indicate possible overffiting or spurious relationships between the regressors and the response."
#qq plot qqnorm(as.numeric(holdgam$residuals),cex.axis=1.3,cex.lab=1.3,pch=4,main="QQ PLot")qqline(as.numeric(holdgam$residuals))print("There is clear evidence of heavy tails on both the lower and upper end of the
plot, perhaps modifying the normality assumption would be advisable.")## [1] "There is clear evidence of heavy tails on both the lower and upper end of theplot, perhaps modifying the normality assumption would be advisable."
#AIC comparison holdgam$aic
## [1] 5768.452
holdlinear$aic
## [1] 5872.798
print("Analyzing diagnostic plots for both models and their AIC values (which are dis
tribution dependant and given that the qq plots are showing little reliability on thenormality assumption), one could argue that there is no justification in adding morecomplexity in the model given that the simple linear relationships seem to explain asmuch variability as in the complex model.")
## [1] "Analyzing diagnostic plots for both models and their AIC values (which are distribution dependent and given that the qq plots are showing little reliability on the normality assumption), one could argue that there is no justification in adding mor
8/8/2019 piyush sachdeva - Homework 5
14/48
e complexity in the model given that the simple linear relationships seem to explainas much variability as in the complex model."
8/8/2019 piyush sachdeva - Homework 5
15/48
Lucas Triana
Part 2
Homework 5
market_symbols=read.table("http://www.stat.cmu.edu/~cschafer/MSCF/Project/ChallengeSy
mbols2015.txt")
#including the following predictors#^GSPC S&P500#^VIX CBOE Volatility Index
#^VXN CBOE NASDAQ Volatility Index
#^VXO CBOE S&P 100 Volatility Index
my_symbols=read.table("mySymbols.txt")
library(quantmod)
market_symbols=as.vector(market_symbols$V1)
my_symbols=as.vector(my_symbols$V1)
#vectors that will store the volatility for the two months
VOLMONTH1=seq(length(data))
VOLMONTH2=seq(length(data))
#vectors that will store the the price values and returns
returns=data.frame(matrix(ncol = length(my_symbols)*4, nrow =
length(market_symbols)))
pricePerasset=data.frame(matrix(ncol = length(my_symbols)*4, nrow =
length(market_symbols)))
#loop that renames the columns for the data frames needed in the regression
j=1
k=1
for (i in 1:length(my_symbols)*4)
{
colname=sprintf("%s_weekly return", my_symbols[k])name=sprintf("%s%d", "X", j)
names(returns)[names(returns)==name]
8/8/2019 piyush sachdeva - Homework 5
16/48
names(returns)[names(returns)==name]
8/8/2019 piyush sachdeva - Homework 5
17/48
#price last month close
pricePerasset[length(market_symbols)-j,6]=asset$VIX.Adjusted[nrow(asset)-21-j] #
21 trading days
#price last quarter close
pricePerasset[length(market_symbols)-j,7]=asset$VIX.Adjusted[nrow(asset)-63-j]
#63 trading days
#price last year closepricePerasset[length(market_symbols)-j,8]=asset$VIX.Adjusted[nrow(asset)-252-j]
#252 trading days
#weekly returns
returns[length(market_symbols)-
j,5]=weeklyReturn(asset,type="log")[length(weeklyReturn(asset,type="log"))-j]#monthly returns
returns[length(market_symbols)-
j,6]=monthlyReturn(asset,type="log")[length(monthlyReturn(asset,type="log"))-j]
#quarterly returns
returns[length(market_symbols)-
j,7]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]#yearly returns
returns[length(market_symbols)-
j,8]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]
}
#^VXN dataasset=getSymbols(my_symbols[3], from=(Sys.Date()-(7560)), to=(Sys.Date()-(30)),
auto.assign=F)
print(my_symbols[3])
for(j in 0:(length(market_symbols)-1))
{#price last week close
pricePerasset[length(market_symbols)-j,9]=asset$VXN.Adjusted[nrow(asset)-5-j] #5
trading days
#price last month close
pricePerasset[length(market_symbols)-j,10]=asset$VXN.Adjusted[nrow(asset)-21-j] #
21 trading days#price last quarter close
pricePerasset[length(market_symbols)-j,11]=asset$VXN.Adjusted[nrow(asset)-63-j]
#63 trading days
#price last year close
pricePerasset[length(market_symbols)-j,12]=asset$VXN.Adjusted[nrow(asset)-252-j]
#252 trading days
#weekly returns
returns[length(market_symbols)-
j,9]=weeklyReturn(asset,type="log")[length(weeklyReturn(asset,type="log"))-j]
#monthly returns
returns[length(market_symbols)-
j,10]=monthlyReturn(asset,type="log")[length(monthlyReturn(asset,type="log"))-j]
8/8/2019 piyush sachdeva - Homework 5
18/48
#quarterly returns
returns[length(market_symbols)-
j,11]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]
#yearly returns
returns[length(market_symbols)-
j,12]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]
}
#^VXO data
asset=getSymbols(my_symbols[4], from=(Sys.Date()-(7560)), to=(Sys.Date()-(30)),
auto.assign=F)
print(my_symbols[4])
for(j in 0:(length(market_symbols)-1))
{
#price last week close
pricePerasset[length(market_symbols)-j,13]=asset$VXO.Adjusted[nrow(asset)-5-j] #5
trading days
#price last month closepricePerasset[length(market_symbols)-j,14]=asset$VXO.Adjusted[nrow(asset)-21-j] #
21 trading days
#price last quarter close
pricePerasset[length(market_symbols)-j,15]=asset$VXO.Adjusted[nrow(asset)-63-j]
#63 trading days
#price last year closepricePerasset[length(market_symbols)-j,16]=asset$VXO.Adjusted[nrow(asset)-252-j]
#252 trading days
#weekly returnsreturns[length(market_symbols)-
j,13]=weeklyReturn(asset,type="log")[length(weeklyReturn(asset,type="log"))-j]
#monthly returns
returns[length(market_symbols)-
j,14]=monthlyReturn(asset,type="log")[length(monthlyReturn(asset,type="log"))-j]
#quarterly returnsreturns[length(market_symbols)-
j,15]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]
#yearly returns
returns[length(market_symbols)-
j,16]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]
}
#Computes the daily returns and volatility of the 70 initial stocks for two months
for (i in 1:length(market_symbols))
{
month1=getSymbols(market_symbols[i], from=(Sys.Date()-60), to=(Sys.Date()-30),
auto.assign=F)
DAILYRETURNS= dailyReturn(month1,type="log")
volatilitymonth1=sqrt(sum(DAILYRETURNS^2)/length(DAILYRETURNS))
8/8/2019 piyush sachdeva - Homework 5
19/48
VOLMONTH1[i]=volatilitymonth1
month2=getSymbols(market_symbols[i], from=(Sys.Date()-32), to=Sys.Date(),
auto.assign=F)
DAILYRETURNS2= dailyReturn(month2,type="log")
volatilitymonth2= sqrt(sum(DAILYRETURNS2^2)/length(DAILYRETURNS2))
VOLMONTH2[i]=volatilitymonth2
}
data=data.frame(cbind(returns,pricePerasset,as.data.frame(VOLMONTH1),as.data.frame(VO
LMONTH2)))
noNAdata=na.omit(data) #removing N/A data
8/8/2019 piyush sachdeva - Homework 5
20/48
8/8/2019 piyush sachdeva - Homework 5
21/48
8/8/2019 piyush sachdeva - Homework 5
22/48
8/8/2019 piyush sachdeva - Homework 5
23/48
8/8/2019 piyush sachdeva - Homework 5
24/48
8/8/2019 piyush sachdeva - Homework 5
25/48
8/8/2019 piyush sachdeva - Homework 5
26/48
8/8/2019 piyush sachdeva - Homework 5
27/48
8/8/2019 piyush sachdeva - Homework 5
28/48
8/8/2019 piyush sachdeva - Homework 5
29/48
8/8/2019 piyush sachdeva - Homework 5
30/48
#histogram drawing of the predictors and response to see if any transformations are
needed
for (i in 1:ncol(noNAdata)){hist(as.matrix(noNAdata[i]),main=colnames(noNAdata)[i],xlab=i)
}
#noticing that several columns of the data might need transformations
coltotransform=c(3,4,17,19,21,22,23,24,25,26,27,29,30,31,32,33)
skew=seq(length(coltotransform))
#columns whose skewness is to be less seem to be largely positive or negative are tobe transformed:
#two transformations will take place, log(x) (if all data are positive), x^2 or e^x
depending on the skew
library(moments)counter=0
for(i in 1:length(coltotransform)){
skew[i]=skewness(as.matrix(noNAdata[coltotransform[i]]),na.rm = FALSE)
8/8/2019 piyush sachdeva - Homework 5
31/48
if(min(noNAdata[coltotransform[i]])0){
temp=noNAdata[coltotransform[i]]noNAdata[coltotransform[i]]=log(noNAdata[coltotransform[i]])
hist(as.matrix(noNAdata[coltotransform[i]]),main=colnames(noNAdata)[i],xlab=i)
hist(as.matrix(temp),col=3,lwd=2,lty=2,main=colnames(noNAdata)[i],xlab=i,add=T)
counter=counter+1
print(counter)
}}
8/8/2019 piyush sachdeva - Homework 5
32/48
8/8/2019 piyush sachdeva - Homework 5
33/48
8/8/2019 piyush sachdeva - Homework 5
34/48
8/8/2019 piyush sachdeva - Homework 5
35/48
8/8/2019 piyush sachdeva - Homework 5
36/48
8/8/2019 piyush sachdeva - Homework 5
37/48
#cox-box transformation search for the response
library(car)
BC_Transformation=boxcox(VOLMONTH2~.,data=noNAdata)#finds the optimal value for lambda
lambda=BC_Transformation$x[which(BC_Transformation$y==max(BC_Transformation$y))]
8/8/2019 piyush sachdeva - Homework 5
38/48
#stepwise simple linear analysis
attach(no.na.data)
fullmod=lm(VOLMONTH2~.,data=noNAdata) #full modelsummary(fullmod)
linearmod=step(fullmod, direction="both")
summary(linearmod)Call:lm(formula = VOLMONTH2 ~ X.GSPC_weekly.return + X.VXN_weekly.return +
X.VXN_monthly.return + X.VXO_monthly.return + X.GSPC_monthly.price +X.VIX_quarterly.price + X.VXN_yearly.price + X.VXO_monthly.price +VOLMONTH1, data = noNAdata)
Residuals:Min 1Q Median 3Q Max
-0.007720 -0.002627 0.000164 0.001879 0.009041
Coefficients:Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.550e-01 5.348e-02 2.898 0.00564 **X.GSPC_weekly.return -1.538e-01 8.126e-02 -1.893 0.06439 .X.VXN_weekly.return -1.418e-02 9.224e-03 -1.538 0.13068X.VXN_monthly.return -1.194e-02 7.371e-03 -1.620 0.11170X.VXO_monthly.return 1.667e-02 6.542e-03 2.548 0.01409 *X.GSPC_monthly.price -5.809e-05 2.196e-05 -2.645 0.01102 *X.VIX_quarterly.price 7.347e-03 3.705e-03 1.983 0.05308 .X.VXN_yearly.price 3.345e-03 7.823e-04 4.275 9.02e-05 ***X.VXO_monthly.price -1.412e-02 6.311e-03 -2.237 0.02997 *VOLMONTH1 1.249e-02 9.805e-04 12.736 < 2e-16 ***---Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.003814 on 48 degrees of freedomMultiple R-squared: 0.8028, Adjusted R-squared: 0.7658F-statistic: 21.71 on 9 and 48 DF, p-value: 4.576e-14
#Influential observation search
cookd=as.numeric(cooks.distance(linearmod))
plot(cookd,xlab="Observation",ylab="Cook's Distance",main="Cook's Distance")
lines(c(1,length(cookd)),c(4/length(cookd),4/length(cookd)),lwd=2,col=3,lty=2)
-2 -1 0 1 2
-140
-60
log-Likelih
ood
95%
8/8/2019 piyush sachdeva - Homework 5
39/48
#residuals vs fitted values
plot(as.numeric(linearmod$fitted.values),as.numeric(linearmod$residuals),
pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals",cex.axis=1.3,cex.lab=1.3)
abline(h = 0,lwd=2,col=3,lty=2)
#residuals over time
plot(linearmod$residuals, xlab="Time",ylab="Residuals", main="Evolution of residuals"
,cex.axis=1.3,cex.lab=1.3,pch=16)
abline(h = 0,lwd=2,col=3,lty=2)
0 10 20 30 40 50 600.00
0.20
Cook's Distance
Observation
Cook'sDis
tance
0.005 0.020-0.0
05
Fitted values vs. Residuals
Fitted Values
es
uas
8/8/2019 piyush sachdeva - Homework 5
40/48
#Actual response vs fitted values
plot(as.numeric(linearmod$fitted.values),as.numeric(noNAdata$VOLMONTH2),
pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response",cex.axis=1.3,cex.lab=1.3)
abline(a=0,b=1,lwd=2,col=3,lty=1)
#qq plot
qqnorm(as.numeric(finalmod$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")
qqline(as.numeric(finalmod$residuals))
0 20 40 60-0.005
Evolution of residuals
Time
esu
as
0.005 0.0200.0
1
Fitted values vs. response
Fitted Values
res
ponse
8/8/2019 piyush sachdeva - Homework 5
41/48
#fit gam
library(mgcv)
attributes(noNAdata)
#noticing that there is a lack of data compared to the number of predictors and rows
in the
#data frame, those variables chosen by glm will be the ones included in gam
#(data frame noNAdata of size 58x34)
holdgam=gam(VOLMONTH2~s(X.GSPC_weekly.return)+(X.VXN_weekly.return)+s(X.VXN_monthly.r
eturn)+
(X.VXO_monthly.return)+s(X.GSPC_monthly.price)+s(X.VIX_quarterly.price)+
(X.VXN_yearly.price)+s(X.VXO_monthly.price)+s(VOLMONTH1)
,data=noNAdata)
summary(holdgam)Family: gaussianLink function: identity
Formula:VOLMONTH2 ~ s(X.GSPC_weekly.return) + (X.VXN_weekly.return) +
s(X.VXN_monthly.return) + (X.VXO_monthly.return) + s(X.GSPC_monthly.price) +
s(X.VIX_quarterly.price) + (X.VXN_yearly.price) + s(X.VXO_monthly.price)+
s(VOLMONTH1)
Parametric coefficients:Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.0255803 0.0110534 -2.314 0.025176 *
X.VXN_weekly.return -0.0076158 0.0091063 -0.836 0.407304X.VXO_monthly.return 0.0147098 0.0061787 2.381 0.021479 *X.VXN_yearly.price 0.0028573 0.0007523 3.798 0.000426 ***---Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:edf Ref.df F p-value
s(X.GSPC_weekly.return) 1.000 1.000 1.922 0.1722s(X.VXN_monthly.return) 1.000 1.000 1.691 0.1999s(X.GSPC_monthly.price) 1.000 1.000 5.514 0.0231 *
-2 -1 0 1 2-0.005
QQ PLot
Theoretical Quantilesampe
u
an
es
8/8/2019 piyush sachdeva - Homework 5
42/48
s(X.VIX_quarterly.price) 1.000 1.000 3.825 0.0565 .s(X.VXO_monthly.price) 1.905 2.342 3.399 0.0355 *s(VOLMONTH1) 2.128 2.616 73.487
8/8/2019 piyush sachdeva - Homework 5
43/48
#residuals vs fitted values
dev.off()plot(as.numeric(holdgam$fitted.values),as.numeric(holdgam$residuals),
pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals"
,cex.axis=1.3,cex.lab=1.3)
abline(h = 0,lwd=2,col=4,lty=2)
#Actual response vs fitted values
plot(as.numeric(holdgam$fitted.values),as.numeric(noNAdata$VOLMONTH2),
pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response"
,cex.axis=1.3,cex.lab=1.3)abline(a=0,b=1,lwd=2,col=4,lty=1)
2.4 3.0-0.010
X.VXO_monthly.pris(X.VXO_monthly.price,1.9
-5.5 -3.5-0.02
0.01
VOLMONTH1s(VOLMONTH1,2.13)
0.005 0.020 0.035-0.006
Fitted values vs. Residuals
Fitted Values
es
uas
8/8/2019 piyush sachdeva - Homework 5
44/48
#qq plot
qqnorm(as.numeric(holdgam$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")
qqline(as.numeric(holdgam$residuals))
#fitting PPR
holdppr=ppr(VOLMONTH2~(X.GSPC_weekly.return)+(X.VXN_weekly.return)+(X.VXN_monthly.ret
urn)+
(X.VXO_monthly.return)+(X.GSPC_monthly.price)+(X.VIX_quarterly.price)+
(X.VXN_yearly.price)+(X.VXO_monthly.price)+(VOLMONTH1),nterms=2
,data=noNAdata,sm.method="gcvspline")
summary(holdppr)Call:ppr(formula = VOLMONTH2 ~ (X.GSPC_weekly.return) + (X.VXN_weekly.return) +
(X.VXN_monthly.return) + (X.VXO_monthly.return) + (X.GSPC_monthly.price)+
(X.VIX_quarterly.price) + (X.VXN_yearly.price) + (X.VXO_monthly.price) +(VOLMONTH1), data = noNAdata, nterms = 2, sm.method = "gcvspline")
Goodness of fit:
0.005 0.020 0.0350.01
Fitted values vs. response
Fitted Values
respon
se
-2 -1 0 1 2-0.006
QQ PLot
Theoretical Quantilesam
pe
uan
es
8/8/2019 piyush sachdeva - Homework 5
45/48
8/8/2019 piyush sachdeva - Homework 5
46/48
#qq plot
qqnorm(as.numeric(holdppr$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")
qqline(as.numeric(holdppr$residuals))
#fit NNET
library(nnet)
holdnnet=nnet(VOLMONTH2~(X.GSPC_weekly.return)+(X.VXN_weekly.return)+(X.VXN_monthly.r
eturn)+
(X.VXO_monthly.return)+(X.GSPC_monthly.price)+(X.VIX_quarterly.price)+
(X.VXN_yearly.price)+(X.VXO_monthly.price)+(VOLMONTH1),
data=noNAdata,size=4, lineout=TRUE, decay=0.001, maxit=2000)summary(holdnnet)a 9-4-1 network with 45 weightsoptions were - decay=0.001b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1 i8->h1 i9->h10.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2 i8->h2 i9->h20.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3 i8->h3 i9->h30.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
0.01 0.030.01
Fitted values vs. response
Fitted Values
respon
se
-2 -1 0 1 2-0.003
QQ PLot
Theoretical Quantilesam
pe
uan
es
8/8/2019 piyush sachdeva - Homework 5
47/48
b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4 i8->h4 i9->h40.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
b->o h1->o h2->o h3->o h4->o-0.81 -0.81 -0.81 -0.81 -0.81
#residuals vs fitted values
plot(as.numeric(holdnnet$fitted.values),as.numeric(holdnnet$residuals),
pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals",cex.axis=1.3,cex.lab=1.3)
abline(h = 0,lwd=2,col=2,lty=2)
#Actual response vs fitted values
plot(as.numeric(holdnnet$fitted.values),as.numeric(noNAdata$VOLMONTH2),
pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response"
,cex.axis=1.3,cex.lab=1.3)
abline(a=0,b=1,lwd=2,col=2,lty=1)
#qq plot
qqnorm(as.numeric(holdppr$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")
qqline(as.numeric(holdppr$residuals))
0.01723805 0.01723830-0.0
1
Fitted values vs. Residuals
Fitted Values
es
uas
0.01723805 0.01723830
0.01
Fitted values vs. response
Fitted Values
response
8/8/2019 piyush sachdeva - Homework 5
48/48
save(finalmod, file="LTRIANAL_models.Robj")
-2 -1 0 1 2-0.003
QQ PLot
Theoretical Quantilesampe
u
an
es
Recommended