Using mgwrsar package

The MGWRSAR function requires a dataframe and a matrix of coordinates, and eventually a spatial weight matrix if model include spatiale dependence. The package includes a simulated data example which is used for this vignette.

library(mgwrsar)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
## loading data example
data(mydata)
coords=as.matrix(mydata[,c("x","y")])
## Creating a spatial weight matrix (sparce dgCMatrix) of 8 nearest neighbors with 0 in diagonal
W=kernel_matW(H=4,kernels='rectangle',coord_i=coords,NN=4,adaptive=TRUE,diagnull=TRUE,rowNorm=T)

Estimation

Estimation of a GWR with a gauss kernel without parallel computation:


### without parallel computing with distance computation for all points
  ptm1<-proc.time()
  model_GWR0<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=0.13, Model = 'GWR',control=list(SE=T,get_ts=TRUE))
  (proc.time()-ptm1)[3]
#> elapsed 
#>   0.617
  
  summary_mgwrsar(model_GWR0)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coords = coords, 
#>     fixed_vars = NULL, kernels = c("gauss"), H = 0.13, Model = "GWR", 
#>     control = list(SE = T, get_ts = TRUE))
#> Model: GWR 
#> Kernels function: gauss 
#> Kernels adaptive: NO 
#> Kernels type: GD 
#> Bandwidth: 0.13 
#> Computation time: 0.615 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept       X1       X2      X3
#> Min.     -2.63740  0.16880 -0.37306 -1.7588
#> 1st Qu.  -1.46100  0.38590  1.52729 -1.3070
#> Median   -1.08812  0.51409  1.94806 -1.0194
#> Mean     -0.97597  0.48879  1.94778 -1.0289
#> 3rd Qu.  -0.62307  0.59454  2.58871 -0.7439
#> Max.      1.06100  0.79570  3.37755 -0.3083
#> Effective degrees of freedom: 955.0674 
#> AICc: 1560.364 
#> Residual sum of squares: 64.0684 
#> RMSE: 0.2531174


### with parallel computing with distance computation for all points
ptm1<-proc.time()
model_GWR0_parallel<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=0.03, Model = 'GWR',control=list(SE=T,ncore=2,doMC=TRUE))
(proc.time()-ptm1)[3]
#> elapsed 
#>   0.531

# W<-spdep::mat2listw(W)
# 
# gp2mM <- lagsarlm(Y_gwr ~ X1+X2+X3,data=mydata, lW, method="Matrix")
# 
# mdo<-s2sls(Y_gwr ~ X1+X2+X3,data=mydata,Produc,W)
# 
# mm<-lagmess(Y_gwr ~ X1+X2+X3,data=mydata, lW)

## how to speed up computation using rough kernels (0 weights for nearest neighbors > 300th position)
ptm1<-proc.time()
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=0.03, Model = 'GWR',control=list(SE=TRUE,NN=300))
(proc.time()-ptm1)[3]
#> elapsed 
#>   0.347

summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coords = coords, 
#>     fixed_vars = NULL, kernels = c("gauss"), H = 0.03, Model = "GWR", 
#>     control = list(SE = TRUE, NN = 300))
#> Model: GWR 
#> Kernels function: gauss 
#> Kernels adaptive: NO 
#> Kernels type: GD 
#> Bandwidth: 0.03 
#> Computation time: 0.343 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: YES, 300 neighbors / 1000 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept        X1        X2      X3
#> Min.    -3.793217  0.016931 -0.765588 -2.0329
#> 1st Qu. -1.480176  0.351177  1.095174 -1.2921
#> Median  -0.835614  0.542020  1.631293 -1.0081
#> Mean    -0.830668  0.500057  1.674643 -1.0011
#> 3rd Qu. -0.236283  0.637046  2.453404 -0.7241
#> Max.     2.102565  0.942520  4.035453  0.0457
#> Effective degrees of freedom: 572.4888 
#> AICc: 1179.575 
#> Residual sum of squares: 1.797923 
#> RMSE: 0.04240192

## how to speed up computation using rough kernels ( 0 weights for nearest neighbors > 300th position) 
TP=find_TP(formula = 'Y_gwr~X1+X2+X3', data =mydata,coords=coords,K=10,type='residuals')
# only 90 targets points are used
length(TP)
#> [1] 31

## how to speed up computation using Target points
ptm1<-proc.time()
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=0.03, Model = 'GWR',control=list(SE=TRUE,TP=TP))
(proc.time()-ptm1)[3]
#> elapsed 
#>   0.182

## how to speed up computation using rough kernels ( 0 weights for nearest neighbors > 300th position)  and Target points

ptm1<-proc.time()
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=0.1, Model = 'GWR',control=list(SE=TRUE,NN=300,TP=TP))
(proc.time()-ptm1)[3]
#> elapsed 
#>   0.174

## how to speed up computation using adapative kernels 
ptm1<-proc.time()
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('bisq'),H=20, Model = 'GWR',control=list(SE=TRUE,adaptive=TRUE))
(proc.time()-ptm1)[3]
#> elapsed 
#>   0.222

## how to speed up computation using adapative kernels and Target points
  ptm1<-proc.time()
  model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('bisq'),H=20, Model = 'GWR',control=list(SE=TRUE,adaptive=TRUE,TP=TP))
  (proc.time()-ptm1)[3]
#> elapsed 
#>    0.17


library(microbenchmark)
res=microbenchmark(MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=0.1, Model = 'GWR',control=list(SE=TRUE)),MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=0.1, Model = 'GWR',control=list(SE=TRUE,NN=300,TP=TP)),times=2)
#> Warning in microbenchmark(MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, :
#> less accurate nanosecond times to avoid potential integer overflows
res
#> Unit: milliseconds
#>                                                                                                                                                                                          expr
#>                     MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coords = coords,      fixed_vars = NULL, kernels = c("gauss"), H = 0.1, Model = "GWR",      control = list(SE = TRUE))
#>  MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coords = coords,      fixed_vars = NULL, kernels = c("gauss"), H = 0.1, Model = "GWR",      control = list(SE = TRUE, NN = 300, TP = TP))
#>       min       lq     mean   median       uq      max neval cld
#>  616.7437 616.7437 616.8783 616.8783 617.0129 617.0129     2  a 
#>  170.9253 170.9253 171.1031 171.1031 171.2810 171.2810     2   b

res=microbenchmark(MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=0.1, Model = 'GWR',control=list(SE=TRUE)),MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('bisq'),H=20, Model = 'GWR',control=list(SE=TRUE,adaptive=T,TP=TP)),times=2)
res
#> Unit: milliseconds
#>                                                                                                                                                                                            expr
#>                       MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coords = coords,      fixed_vars = NULL, kernels = c("gauss"), H = 0.1, Model = "GWR",      control = list(SE = TRUE))
#>  MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coords = coords,      fixed_vars = NULL, kernels = c("bisq"), H = 20, Model = "GWR",      control = list(SE = TRUE, adaptive = T, TP = TP))
#>       min       lq     mean   median       uq      max neval cld
#>  478.2233 478.2233 620.7698 620.7698 763.3162 763.3162     2   a
#>  170.0988 170.0988 175.2822 175.2822 180.4655 180.4655     2   a

Summary and plot of mgwrsar object using leaflet:

summary_mgwrsar(model_GWR0)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coords = coords, 
#>     fixed_vars = NULL, kernels = c("gauss"), H = 0.13, Model = "GWR", 
#>     control = list(SE = T, get_ts = TRUE))
#> Model: GWR 
#> Kernels function: gauss 
#> Kernels adaptive: NO 
#> Kernels type: GD 
#> Bandwidth: 0.13 
#> Computation time: 0.615 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept       X1       X2      X3
#> Min.     -2.63740  0.16880 -0.37306 -1.7588
#> 1st Qu.  -1.46100  0.38590  1.52729 -1.3070
#> Median   -1.08812  0.51409  1.94806 -1.0194
#> Mean     -0.97597  0.48879  1.94778 -1.0289
#> 3rd Qu.  -0.62307  0.59454  2.58871 -0.7439
#> Max.      1.06100  0.79570  3.37755 -0.3083
#> Effective degrees of freedom: 955.0674 
#> AICc: 1560.364 
#> Residual sum of squares: 64.0684 
#> RMSE: 0.2531174
plot_mgwrsar(model_GWR0,type='B_coef',var='X2')

plot_mgwrsar(model_GWR0,type='t_coef',var='X2')

Plot of the effects of spatially varying coefficients:

plot_effect(model_GWR0,title='Effects')

Estimation of a GWR with spgwr package:

library(spgwr)
mydataSP=mydata
coordinates(mydataSP)=c('x','y')
ptm1<-proc.time()
model_spgwr <- gwr(Y_gwr~X1+X2+X3, data=mydataSP, bandwidth=0.13,hatmatrix=TRUE)
(proc.time()-ptm1)[3]

head(model_spgwr$SDF@data$gwr.e)
model_spgwr

all(abs(model_GWR0$residuals-model_spgwr$SDF@data$gwr.e)<0.00000000001)

[1] -0.1585824 -0.1861144 -0.4450174 -0.1767796 -0.2014330
[6]  0.2670349
Call:
gwr(formula = Y_gwr ~ X1 + X2 + X3, data = mydataSP, bandwidth = 0.13, 
    hatmatrix = TRUE)
Kernel function: gwr.Gauss 
Fixed bandwidth: 0.13 
Summary of GWR coefficient estimates at data points:
                 Min.  1st Qu.   Median  3rd Qu.     Max.  Global
X.Intercept. -2.63740 -1.46100 -1.08812 -0.62307  1.06100 -1.4845
X1            0.16880  0.38590  0.51409  0.59454  0.79570  0.5000
X2           -0.37306  1.52729  1.94806  2.58871  3.37755  2.5481
X3           -1.75876 -1.30704 -1.01941 -0.74386 -0.30828 -1.0762
Number of data points: 1000 
Effective number of parameters (residual: 2traceS - traceS'S): 62.85371    
Effective degrees of freedom (residual: 2traceS - traceS'S): 937.1463 
Sigma (residual: 2traceS - traceS'S): 0.2614678 
Effective number of parameters (model: traceS): 44.93259 
Effective degrees of freedom (model: traceS): 955.0674 
Sigma (model: traceS): 0.2590031 
Sigma (ML): 0.2531174 
AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 186.462 
AIC (GWR p. 96, eq. 4.22): 135.0056 
Residual sum of squares: 64.0684 
Quasi-global R2: 0.9813492 
[1] TRUE

Estimation of a MGWR with stationary intercept using an adapative gaussian kernel (based on 20 nearest neighbors):


model_MGWR<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars='X3',kernels=c('gauss'),H=20, Model = 'MGWR',control=list(SE=T,adaptive=T))
summary_mgwrsar(model_MGWR)
#> Call:
#> MGWRSAR(formula = "Y_mgwr~X1+X2+X3", data = mydata, coords = coords, 
#>     fixed_vars = "X3", kernels = c("gauss"), H = 20, Model = "MGWR", 
#>     control = list(SE = T, adaptive = T))
#> Model: MGWR 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 0.717 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: X3 
#> -1.00471 
#> Varying parameters: Intercept X1 X2 
#>         Intercept       X1      X2
#> Min.     -2.81276  0.11756 -0.4245
#> 1st Qu.  -1.52541  0.36076  1.3433
#> Median   -0.99545  0.53148  1.7245
#> Mean     -0.86301  0.49545  1.7449
#> 3rd Qu.  -0.38685  0.61695  2.4882
#> Max.      1.22304  0.84347  3.3954
#> Effective degrees of freedom: 932.5146 
#> AICc: 998.574 
#> Residual sum of squares: 18.81683 
#> RMSE: 0.1371744

Estimation of a mgwrsar(0,kc,kv) model with stationary intercept and stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coords=coords, fixed_vars='X2',kernels=c('gauss'),H=20, Model = 'MGWRSAR_0_kc_kv',control=list(SE=F,adaptive=T,W=W))
summary_mgwrsar(mgwrsar_0_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata, 
#>     coords = coords, fixed_vars = "X2", kernels = c("gauss"), 
#>     H = 20, Model = "MGWRSAR_0_kc_kv", control = list(SE = F, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_0_kc_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 0.609 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: X2 lambda 
#> 0.870593 0.4120387 
#> Varying parameters: Intercept X1 X3 
#>         Intercept        X1      X3
#> Min.    -1.254418  0.029588 -1.2702
#> 1st Qu. -0.444425  0.366633 -1.0810
#> Median  -0.033372  0.540500 -1.0026
#> Mean     0.065257  0.498703 -1.0016
#> 3rd Qu.  0.445573  0.627798 -0.9243
#> Max.     2.099437  0.873585 -0.6744
#> Residual sum of squares: 137.9687 
#> RMSE: 0.3714414

Estimation of a mgwrsar(0,0,kv) model with stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_0_0_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=20, Model = 'MGWRSAR_0_0_kv',control=list(SE=F,adaptive=T,W=W))
summary_mgwrsar(mgwrsar_0_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata, 
#>     coords = coords, fixed_vars = NULL, kernels = c("gauss"), 
#>     H = 20, Model = "MGWRSAR_0_0_kv", control = list(SE = F, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_0_0_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 0.618 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: lambda 
#> 0.3676871 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept        X1        X2      X3
#> Min.    -3.763567  0.080428 -2.209824 -1.2308
#> 1st Qu. -1.219268  0.357217  0.706989 -1.0647
#> Median  -0.618269  0.536381  1.425772 -0.9997
#> Mean    -0.513883  0.499074  1.398492 -1.0014
#> 3rd Qu. -0.017472  0.627690  2.638912 -0.9437
#> Max.     2.108716  0.889871  4.369973 -0.6752
#> Residual sum of squares: 151.7064 
#> RMSE: 0.3894951

Estimation of a mgwrsar(1,0,kv) model with all parameter spatially varying using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_1_0_kv<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_0_kv',control=list(SE=F,adaptive=T,W=W))
summary_mgwrsar(mgwrsar_1_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata, 
#>     coords = coords, fixed_vars = NULL, kernels = c("gauss"), 
#>     H = 20, Model = "MGWRSAR_1_0_kv", control = list(SE = F, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_1_0_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 0.54 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3  
#>         Intercept       X1       X2       X3  lambda
#> Min.     -5.55249  0.11207 -1.20584 -1.86027 -0.2375
#> 1st Qu.  -1.47283  0.36956  0.64156 -1.24644  0.1831
#> Median   -0.75406  0.54369  1.72877 -0.99875  0.3845
#> Mean     -0.79631  0.50721  1.75803 -1.00349  0.3568
#> 3rd Qu.  -0.15439  0.63505  2.86983 -0.71468  0.5382
#> Max.      2.06110  0.95748  5.14466 -0.27205  0.8808
#> Residual sum of squares: 868.1985 
#> RMSE: 0.9317717

Estimation of a mgwrsar(1,kc,kv) model with stationary X1 using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_1_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata,coords=coords, fixed_vars='X2',kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_kc_kv',control=list(SE=F,adaptive=T,W=W))
summary_mgwrsar(mgwrsar_1_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata, 
#>     coords = coords, fixed_vars = "X2", kernels = c("gauss"), 
#>     H = 20, Model = "MGWRSAR_1_kc_kv", control = list(SE = F, 
#>         adaptive = T, W = W))
#> Model: MGWRSAR_1_kc_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 1.364 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: X2 
#> 2.459628 
#> Varying parameters: Intercept X1 X3  
#>         Intercept        X1        X3  lambda
#> Min.    -2.785597  0.064134 -1.373242 -0.6412
#> 1st Qu. -0.783444  0.371202 -1.092831  0.0467
#> Median  -0.454995  0.534294 -1.006845  0.3538
#> Mean    -0.435127  0.501815 -1.009762  0.3553
#> 3rd Qu. -0.111683  0.633466 -0.933401  0.8027
#> Max.     2.392965  0.910200 -0.617579  0.9900
#> Residual sum of squares: 6519.053 
#> RMSE: 2.553244

Estimation of a mgwrsar(1,kc,0) model with stationary X1 using an adapative gaussian kernel (based on 20 nearest neighbors):

mgwrsar_1_kc_0<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata,coords=coords, fixed_vars=c('Intercept','X1','X2','X3'),kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_kc_0',control=list(SE=F,adaptive=T,W=W))
summary_mgwrsar(mgwrsar_1_kc_0)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata, 
#>     coords = coords, fixed_vars = c("Intercept", "X1", "X2", 
#>         "X3"), kernels = c("gauss"), H = 20, Model = "MGWRSAR_1_kc_0", 
#>     control = list(SE = F, adaptive = T, W = W))
#> Model: MGWRSAR_1_kc_0 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 20 
#> Computation time: 1.307 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: Intercept X1 X2 X3 
#> -0.8205521 0.509378 1.69077 -1.035463 
#> Varying parameters: 
#>          lambda
#> Min.    -0.9137
#> 1st Qu.  0.2291
#> Median   0.6009
#> Mean     0.4822
#> 3rd Qu.  0.9900
#> Max.     0.9995
#> Residual sum of squares: 2736.795 
#> RMSE: 1.654326

Prediction

In this example, we use an in-sample of size 800 for model estimation and an out-sample of size 200 for prediction. Note that for model with spatial autocorrelation you must provide a global weight matrix of size 1000, ordered by in-sample then out-sample locations.

For GWR and MGWR_1_0_kv, where all coefficients vary in space, the predictions are carried out using the corresponding model estimate with the out-sample location as target points, so the estimated coefficients are not used: only the call and the data of the estimated model are used. For mixed model that have some constant coefficients (MGWR, MGXWR_0_0_kv, MGXWR_1_kc_kv, MGXWR_1_kc_0), the estimated coefficients are extrapolated using a weighting matrix. By default, the weighting matrix for prediction is based on the expected weights of outsample data if they were had been added to insample data to estimate the corresponding MGWRSAR (see Geniaux 2022 for further detail). The user can also choose to extrapolate the model coefficients using a shepperd kernel with custom number of neighbours or using the same kernel and bandwidth as the estimated model.

Prediction of GWR model using sheppard kernel with 8 neighbors:

length_out=800
index_in=sample(1:1000,length_out)
index_out=(1:1000)[-index_in]

model_GWR_insample<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata[index_in,],coords=coords[index_in,], fixed_vars=NULL,kernels=c('gauss'),H=8, Model = 'GWR',control=list(adaptive=T))
summary_mgwrsar(model_GWR_insample)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata[index_in, ], 
#>     coords = coords[index_in, ], fixed_vars = NULL, kernels = c("gauss"), 
#>     H = 8, Model = "GWR", control = list(adaptive = T))
#> Model: GWR 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 8 
#> Computation time: 0.315 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 800 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept        X1        X2      X3
#> Min.    -3.431046  0.057831 -0.613649 -1.9259
#> 1st Qu. -1.481228  0.347417  1.190307 -1.2686
#> Median  -0.888871  0.545087  1.748743 -1.0012
#> Mean    -0.822152  0.499287  1.704531 -1.0095
#> 3rd Qu. -0.272541  0.628847  2.489687 -0.7335
#> Max.     1.585348  0.897832  3.579135 -0.1883
#> Residual sum of squares: 5.988363 
#> RMSE: 0.08651852

newdata=mydata[index_out,]
newdata_coords=coords[index_out,]
newdata$Y_mgwrsar_1_0_kv=0

Y_pred=predict_mgwrsar(model_GWR_insample, newdata=newdata, newdata_coords=newdata_coords)
head(Y_pred)
#> [1] 0.4664548 2.3094268 4.5104714 1.8733030 3.9355001 2.9333140
head(mydata$Y_gwr[index_out])
#> [1] 0.3224649 2.0656554 4.5648040 1.9159209 3.8348415 2.9941564
sqrt(mean((mydata$Y_gwr[index_out]-Y_pred)^2)) # RMSE
#> [1] 0.13121

model_MGWR_insample<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata[index_in,],coords=coords[index_in,], fixed_vars='X3',kernels=c('gauss'),H=8, Model = 'MGWR',control=list(adaptive=T))
summary_mgwrsar(model_MGWR_insample)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata[index_in, ], 
#>     coords = coords[index_in, ], fixed_vars = "X3", kernels = c("gauss"), 
#>     H = 8, Model = "MGWR", control = list(adaptive = T))
#> Model: MGWR 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 8 
#> Computation time: 0.511 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 800 
#> Constant parameters: X3 
#> -1.006481 
#> Varying parameters: Intercept X1 X2 
#>         Intercept        X1      X2
#> Min.    -3.936929  0.060461 -0.6137
#> 1st Qu. -1.498654  0.346186  1.0902
#> Median  -0.948519  0.541705  1.7289
#> Mean    -0.855135  0.498004  1.7338
#> 3rd Qu. -0.235306  0.644812  2.6282
#> Max.     1.640384  0.897495  3.7508
#> Residual sum of squares: 36.93015 
#> RMSE: 0.214855

newdata=mydata[index_out,]
newdata_coords=coords[index_out,]
newdata$Y_mgwrsar_1_0_kv=0

## Prediction with method_pred='tWtp' 
Y_pred=predict_mgwrsar(model_MGWR_insample, newdata=newdata, newdata_coords=newdata_coords)
#> Warnings: method_pred=='TP' is not implemented for  Model in ('MGWR','MGWRSAR_1_kc_0','MGWRSAR_1_kc_kv','MGWRSAR_0_kc_kv','MGWRSAR_0_0_kv'): automatic swith to method_pred='tWtp_model'
head(Y_pred)
#> [1] 0.4585569 2.7000676 4.7308344 1.7981054 4.0253231 3.0013471
head(mydata$Y_gwr[index_out])
#> [1] 0.3224649 2.0656554 4.5648040 1.9159209 3.8348415 2.9941564
sqrt(mean((mydata$Y_gwr[index_out]-Y_pred)^2)) # RMSE
#> [1] 0.2826883

Prediction of MGWRSAR_1_0_kv model using sheppard kernel with 4 neighbors and Best Linear Unbiased Predictor (BLUP) :


length_out=800
index_in=sample(1:1000,length_out)
index_out=(1:1000)[-index_in]

### Global Spatial Weight matrix W should be ordered by in_sample (S) then out_sample
W_in_out=kernel_matW(H=4,kernels='rectangle',coord_i=rbind(coords[index_in,],coords[index_out,]),NN=4,adaptive=TRUE,diagnull=TRUE,rowNorm=T)
W_in=W_in_out[1:length(index_in),1:length(index_in)]
W_in=mgwrsar::normW(W_in)
newdata=mydata[index_out,]
newdata_coords=coords[index_out,]


### SAR Model
model_SAR_insample<-MGWRSAR(formula = 'Y_sar~X1+X2+X3', data = mydata[index_in,],coords=coords[index_in,], fixed_vars=NULL,kernels=NULL,H=11, Model = 'SAR',control=list(W=W_in))
model_SAR_insample$RMSE
#> [1] 0.1303956
summary_mgwrsar(model_SAR_insample)
#> Call:
#> MGWRSAR(formula = "Y_sar~X1+X2+X3", data = mydata[index_in, ], 
#>     coords = coords[index_in, ], fixed_vars = NULL, kernels = NULL, 
#>     H = 11, Model = "SAR", control = list(W = W_in))
#> Model: SAR 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: 
#> Kernels adaptive: NO 
#> Kernels type: GD 
#> Bandwidth: 11 
#> Computation time: 0.001 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel:  
#> Use of Target Points: NO 
#> Number of data points: 800 
#> Constant parameters: Intercept X1 X2 X3 lambda 
#> 0.1515706 0.5040549 1.153973 -1.012715 0.3147484 
#> Effective degrees of freedom: 795 
#> Residual sum of squares: 13.6024 
#> RMSE: 0.1303956

## without Best Linear Unbiased Predictor
# prediction YTC
Y_pred=predict_mgwrsar(model_SAR_insample, newdata=newdata, newdata_coords=newdata_coords,W=W_in_out,type='YTC')
head(Y_pred)
#> [1] 2.027912 3.731384 5.910859 2.949735 4.971490 5.277246
RMSE_YTC=sqrt(mean((mydata$Y_sar[index_out]-Y_pred)^2))
RMSE_YTC
#> [1] 0.08441952

## Using Best Linear Unbiased Predictor
Y_pred=predict_mgwrsar(model_SAR_insample, newdata=newdata, newdata_coords=newdata_coords,W=W_in_out,type='BPN')
head(Y_pred)
#> [1] 2.015560 3.680499 5.923186 2.923057 4.940062 5.313581
RMSE_BPN=sqrt(mean((mydata$Y_sar[index_out]-Y_pred)^2))
RMSE_BPN
#> [1] 0.06039921


#### MGWRSAR_1_0_kv
model_MGWRSAR_1_0_kv_insample<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata[index_in,],coords=coords[index_in,], fixed_vars=NULL,kernels=c('gauss'),H=11, Model = 'MGWRSAR_1_0_kv',control=list(W=W_in,adaptive=TRUE,isgcv=F))
model_MGWRSAR_1_0_kv_insample$RMSE
#> [1] 0.708789
summary_mgwrsar(model_MGWRSAR_1_0_kv_insample)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata[index_in, 
#>     ], coords = coords[index_in, ], fixed_vars = NULL, kernels = c("gauss"), 
#>     H = 11, Model = "MGWRSAR_1_0_kv", control = list(W = W_in, 
#>         adaptive = TRUE, isgcv = F))
#> Model: MGWRSAR_1_0_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 11 
#> Computation time: 0.472 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 800 
#> Varying parameters: Intercept X1 X2 X3  
#>          Intercept         X1         X2         X3  lambda
#> Min.    -12.479607   0.077377  -1.241909  -1.984773 -0.4086
#> 1st Qu.  -1.793509   0.356913   0.973790  -1.258653  0.1056
#> Median   -0.874141   0.547879   1.882220  -1.014936  0.2745
#> Mean     -1.075548   0.515188   2.245951  -0.997859  0.2801
#> 3rd Qu.  -0.145199   0.658945   3.482105  -0.688769  0.4424
#> Max.      2.143243   1.040762  12.859764  -0.202572  0.8068
#> Residual sum of squares: 401.9055 
#> RMSE: 0.708789



# prediction with method_pred='tWtp'
Y_pred=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W_in_out,type='BPN',method_pred='tWtp')
head(Y_pred)
#> [1] 0.5246521 3.4805124 5.6597370 2.6974158 6.5664039 4.4513093
RMSE_tWtp_BPN=sqrt(mean((mydata$Y_mgwrsar_1_0_kv[index_out]-Y_pred)^2))
RMSE_tWtp_BPN
#> [1] 0.5200496


## Using Best Linear Unbiased Predictor with method_pred='model' 
Y_pred=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W,type='BPN',method_pred='model' )
head(Y_pred)
#> [1] 0.9431835 2.4151135 5.1652828 2.1911860 6.2417757 4.9202018
RMSE_model_BPN=sqrt(mean((mydata$Y_mgwrsar_1_0_kv[index_out]-Y_pred)^2))
RMSE_model_BPN
#> [1] 0.9717156

## Using Best Linear Unbiased Predictor with method_pred='sheppard' 
W_out=kernel_matW(H=4,kernels='rectangle',coord_i=coords[index_out,],NN=4,adaptive=TRUE,diagnull=TRUE,rowNorm=T)
Y_pred=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W,type='BPN',method_pred='sheppard',k_extra=8)
head(Y_pred)
#> [1] 1.318562 2.380605 5.191412 2.228458 6.211813 4.734835
RMSE_sheppard_BPN=sqrt(mean((mydata$Y_mgwrsar_1_0_kv[index_out]-Y_pred)^2))
RMSE_sheppard_BPN
#> [1] 0.9695924

Prediction of MGWRSAR_1_kc_kv model using sheppard kernel with 4 neighbors and Best Linear Unbiased Predictor (BLUP) :


length_out=800
index_in=sample(1:1000,length_out)
index_out=(1:1000)[-index_in]

### Global Spatial Weight matrix W should be ordered by in_ sample (S) then out_sample
W=kernel_matW(H=4,kernels='rectangle',coord_i=rbind(coords[index_in,],coords[index_out,]),NN=4,adaptive=TRUE,diagnull=TRUE,rowNorm=T)

W_in=W[index_in,index_in]
W_in=mgwrsar::normW(W_in)

model_MGWRSAR_1_kc_kv_insample<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata[index_in,],coords=coords[index_in,], fixed_vars='X3',kernels=c('gauss'),H=11, Model = 'MGWRSAR_1_kc_kv',control=list(W=W_in,adaptive=TRUE,isgcv=F))
model_MGWRSAR_1_kc_kv_insample$RMSE
#> [1] 0.3502106
summary_mgwrsar(model_MGWRSAR_1_kc_kv_insample)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata[index_in, 
#>     ], coords = coords[index_in, ], fixed_vars = "X3", kernels = c("gauss"), 
#>     H = 11, Model = "MGWRSAR_1_kc_kv", control = list(W = W_in, 
#>         adaptive = TRUE, isgcv = F))
#> Model: MGWRSAR_1_kc_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss 
#> Kernels adaptive: YES 
#> Kernels type: GD 
#> Bandwidth: 11 
#> Computation time: 1.165 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO 
#> Use of Target Points: NO 
#> Number of data points: 800 
#> Constant parameters: X3 
#> -1.042863 
#> Varying parameters: Intercept X1 X2  
#>         Intercept        X1        X2  lambda
#> Min.    -10.72384   0.08488  -0.77331 -0.2800
#> 1st Qu.  -1.70402   0.35135   2.04683 -0.0410
#> Median   -0.78341   0.53193   3.01252 -0.0047
#> Mean     -0.99026   0.50042   3.42218 -0.0116
#> 3rd Qu.   0.06569   0.63682   5.18079  0.0268
#> Max.      1.62136   0.98688  12.43667  0.1318
#> Residual sum of squares: 98.11797 
#> RMSE: 0.3502106

## without Best Linear Unbiased Predictor
newdata=mydata[index_out,]
newdata_coords=coords[index_out,]
newdata$Y_mgwrsar_1_kc_kv=0

Y_pred=predict_mgwrsar(model_MGWRSAR_1_kc_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W,type='YTC')
#> Warnings: method_pred=='TP' is not implemented for  Model in ('MGWR','MGWRSAR_1_kc_0','MGWRSAR_1_kc_kv','MGWRSAR_0_kc_kv','MGWRSAR_0_0_kv'): automatic swith to method_pred='tWtp_model'
head(Y_pred)
#> [1] 0.6663112 3.6869125 5.8678272 2.6071807 7.9223965 4.2593302
RMSE_YTC=sqrt(mean((mydata$Y_mgwrsar_1_kc_kv[index_out]-Y_pred)^2))
RMSE_YTC
#> [1] 0.4522916

## Using Best Linear Unbiased Predictor
Y_pred=predict_mgwrsar(model_MGWRSAR_1_kc_kv_insample, newdata=newdata, newdata_coords=newdata_coords,W=W,type='BPN')#,method_pred='sheppard')
#> Warnings: method_pred=='TP' is not implemented for  Model in ('MGWR','MGWRSAR_1_kc_0','MGWRSAR_1_kc_kv','MGWRSAR_0_kc_kv','MGWRSAR_0_0_kv'): automatic swith to method_pred='tWtp_model'
head(Y_pred)
#> [1] 0.667970 3.849695 5.867610 2.607104 7.938369 4.259173
RMSE_BPN=sqrt(mean((mydata$Y_mgwrsar_1_kc_kv[index_out]-Y_pred)^2))
RMSE_BPN
#> [1] 0.4636561

Optimal bandwidths search

In the following example, we show how to find optimal bandwidth by hand.


######################
#### Finding bandwidth by hand
#####################
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=0.03, Model = 'GWR',control=list(isgcv=TRUE,adaptive=FALSE))
summary_mgwrsar(model_GWR)
 
  myCV<-function(H){
    model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H, Model = 'GWR',control=list(isgcv=TRUE,adaptive=FALSE))
    cat('... Try h=',H,' ')
    CV=model_GWR$RMSE
    CV
  }

resCV=optimize(myCV,lower=0.01,upper=0.4)

resCV

## model with optimal bandwith with gaussian kernel

model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss'),H=resCV$minimum, Model = 'GWR',control=list(isgcv=F,adaptive=F))
summary_mgwrsar(model_GWR)

Call:
MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, 
    fixed_vars = NULL, kernels = c("gauss"), H = 0.03, Model = "GWR", 
    control = list(isgcv = TRUE, adaptive = FALSE))
Model: GWR 
Kernels function: gauss 
Kernels adaptive: NO 
Kernels type: GD 
Bandwidth: 0.03 
Computation time: 0.319 
Use of parallel computing: FALSE  ncore = 1 
Use of rough kernel: NO 
Use of Target Points: NO 
Number of data points: 1000 
Varying parameters: Intercept X1 X2 X3 
        Intercept        X1        X2      X3
Min.    -4.396980  0.016334 -0.745746 -2.0461
1st Qu. -1.483364  0.350953  1.087729 -1.2907
Median  -0.827288  0.543547  1.637012 -1.0072
Mean    -0.827471  0.499977  1.673425 -1.0023
3rd Qu. -0.211782  0.636800  2.446022 -0.7158
Max.     2.077957  0.950236  4.189594  0.0364
Residual sum of squares: 8.096586 
RMSE: 0.08998103 
... Try h= 0.1589667  ... Try h= 0.2510333  ... Try h= 0.1020665  ...     Try h= 0.06690023  ... Try h= 0.04516628  ... Try h= 0.03173396  ...     Try h= 0.01560834  ... Try h= 0.02557452  ... Try h= 0.03239781  ...     Try h= 0.03102658  ... Try h= 0.02894408  ... Try h= 0.03069048  ...     Try h= 0.0307406  ... Try h= 0.03078129  ... Try h= 0.0307406  $minimum
[1] 0.0307406

$objective
[1] 0.0899334

Call:
MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, 
    fixed_vars = NULL, kernels = c("gauss"), H = resCV$minimum, 
    Model = "GWR", control = list(isgcv = F, adaptive = F))
Model: GWR 
Kernels function: gauss 
Kernels adaptive: NO 
Kernels type: GD 
Bandwidth: 0.0307406 
Computation time: 0.316 
Use of parallel computing: FALSE  ncore = 1 
Use of rough kernel: NO 
Use of Target Points: NO 
Number of data points: 1000 
Varying parameters: Intercept X1 X2 X3 
        Intercept        X1        X2      X3
Min.    -3.775736  0.017713 -0.751288 -2.0367
1st Qu. -1.490655  0.351921  1.104662 -1.2919
Median  -0.830822  0.541941  1.629693 -1.0085
Mean    -0.831229  0.500069  1.675354 -1.0013
3rd Qu. -0.235924  0.637217  2.450903 -0.7251
Max.     2.079699  0.941899  3.938823  0.0426
Residual sum of squares: 1.900295 
RMSE: 0.04359237

mgwrsar package provides a wrapper that allows to find optimal bandwidth for a selection of model and kernel types. It is designed only for spatial kernel (type=‘GD’). It stores results for all models and kernels using an optimal bandwidth (leave-one out CV criteria) and provides both results with and without leave-one-out sampling. It also allows to find an optimal spatial weight matrix for a given kernel when the model includes spatial dependence.

Example of automatic search of optimal bandwidth for GWR and MGWR (stationnary intercept) models using either an adaptive gaussian kernel or adapative bisquare kernel or gaussian kernel, with at least 8 neigbhors for adaptive kernel and at least a bandwidth of 0.03 for gaussian kernel:

mytab<-bandwidths_mgwrsar(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=c('Intercept','X1'),Models=c('GWR','MGWR'),candidates_Kernels=c('bisq','gauss'),control=list(NN=300,adaptive=TRUE),control_search=list())

names(mytab)
names(mytab[['GWR_bisq_adaptive']])

mytab[['GWR_bisq_adaptive']]$config_model
mytab[['GWR_bisq_adaptive']]$CV
summary(mytab[['GWR_bisq_adaptive']]$model$Betav)

mybestmodel=mytab[['GWR_gauss_adaptive']]$model

#####  GWR  #####
#####  bisq  adaptive= TRUE  #####

########
Search bandwidth stage
########

 ....kernel = bisq adaptive  objective =  0.098356  minimum =  21 

#####  gauss  adaptive= TRUE  #####

########
Search bandwidth stage
########

 ....kernel = gauss adaptive  objective =  0.09718914  minimum =  5 

#####  MGWR  #####
#####  bisq  adaptive= TRUE  #####

########
Search bandwidth stage
########

 .....
 ...try larger suppport

 ........
 Border solution !!! Try to increase NNkernel = bisq adaptive      objective =  0.4496839  minimum =  94 

#####  gauss  adaptive= TRUE  #####

########
Search bandwidth stage
########

 ....kernel = gauss adaptive  objective =  0.4523508  minimum =  16 

[1] "GWR_bisq_adaptive"   "GWR_gauss_adaptive" 
[3] "MGWR_bisq_adaptive"  "MGWR_gauss_adaptive"
[1] "config_model" "CV"           "SSR"          "model"       
$Model
[1] "GWR"

$kernels
[1] "bisq"

$adaptive
[1] TRUE

$H
[1] 21

$kernel_w
[1] "rectangle"

$h_w
NULL

[1] 0.098356
   Intercept             X1               X2         
 Min.   :-3.6889   Min.   :0.0165   Min.   :-0.8843  
 1st Qu.:-1.4614   1st Qu.:0.3526   1st Qu.: 1.0836  
 Median :-0.8098   Median :0.5425   Median : 1.6525  
 Mean   :-0.8239   Mean   :0.4992   Mean   : 1.6717  
 3rd Qu.:-0.2008   3rd Qu.:0.6358   3rd Qu.: 2.4624  
 Max.   : 2.1706   Max.   :0.9191   Max.   : 3.7134  
       X3          
 Min.   :-2.12121  
 1st Qu.:-1.29786  
 Median :-1.00958  
 Mean   :-1.00068  
 3rd Qu.:-0.71484  
 Max.   :-0.03991

Example of automatic search of optimal bandwidth for MGWRSAR_0_kc_kv (stationnary intercept) model using an adaptive gaussian kernel + Automatic search of optimal spatial weight matrix for SAR part of the model using either a bisquare or adpative bisquare kernel, not run.

mytab2<-bandwidths_mgwrsar(formula='Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,Models=c('MGWRSAR_0_0_kv'),candidates_Kernels=c('bisq'),control=list(adaptive=TRUE,NN=500),control_search=list(search_W=TRUE,kernel_w='rectangle',adaptive_W=TRUE))

names(mytab2)
mytab2[['MGWRSAR_0_0_kv_bisq_adaptive']]$config_model
mytab2[['MGWRSAR_0_0_kv_bisq_adaptive']]$CV
mytab2[['MGWRSAR_0_0_kv_bisq_adaptive']]$model$Betac
summary(mytab2[['MGWRSAR_0_0_kv_bisq_adaptive']]$model$Betav)

#####  MGWRSAR_0_0_kv  #####
#####  bisq  adaptive= TRUE  #####

########
Search W stage
########
 .........
 .....
 W : kernel = rectangle adaptive : objective =  0.226692  minimum =  16    


########
Search bandwidth stage
########

 .....kernel = bisq adaptive  objective =  0.1016582  minimum =  22 

[1] "MGWRSAR_0_0_kv_bisq_adaptive"
$Model
[1] "MGWRSAR_0_0_kv"

$kernels
[1] "bisq"

$adaptive
[1] TRUE

$H
[1] 22

$kernel_w
[1] "rectangle"

$h_w
NULL

[1] 0.1016582
   lambda 
0.0419462 
   Intercept             X1                X2         
 Min.   :-3.7090   Min.   :0.02378   Min.   :-0.9346  
 1st Qu.:-1.5217   1st Qu.:0.35387   1st Qu.: 1.0550  
 Median :-0.8644   Median :0.54467   Median : 1.5860  
 Mean   :-0.8622   Mean   :0.50028   Mean   : 1.6214  
 3rd Qu.:-0.2208   3rd Qu.:0.63670   3rd Qu.: 2.3904  
 Max.   : 2.1615   Max.   :0.91634   Max.   : 3.6352  
       X3          
 Min.   :-2.12126  
 1st Qu.:-1.29790  
 Median :-1.01142  
 Mean   :-1.00342  
 3rd Qu.:-0.71371  
 Max.   :-0.05576

General kernel Product functions

The package provides additional functions that allow to estimate locally linear model with other dimensions than space. Using the control variable ‘type’, it is possible to add time in the kernel and a limited set of other variables (continuous or categorical). If several dimensions are involved in the kernel, a general product kernel is used and you need to provide a list of bandwidths and a list of kernel types. For time kernel, it uses asymetric kernel, eventually with a decay. For categorical variable, it uses the propositions of Li and Racine (2010); see also np package. Optimization of bandwidths has to be done by yourself.

Note that when time or other additional variables are used for kernels, then two small bandwidths could lead to empty local subsamples. We recommend to use gauss and gauss_adapt kernels that suffering less this issue.

## space + time kernel
mytime_index=sort(rep(1:500,2))
mytime_index[1:150]
#>   [1]  1  1  2  2  3  3  4  4  5  5  6  6  7  7  8  8  9  9 10 10 11 11 12 12 13
#>  [26] 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23 24 24 25 25
#>  [51] 26 26 27 27 28 28 29 29 30 30 31 31 32 32 33 33 34 34 35 35 36 36 37 37 38
#>  [76] 38 39 39 40 40 41 41 42 42 43 43 44 44 45 45 46 46 47 47 48 48 49 49 50 50
#> [101] 51 51 52 52 53 53 54 54 55 55 56 56 57 57 58 58 59 59 60 60 61 61 62 62 63
#> [126] 63 64 64 65 65 66 66 67 67 68 68 69 69 70 70 71 71 72 72 73 73 74 74 75 75
W=kernel_matW(H=8,kernels='rectangle',coord_i=coords,NN=10,adaptive=TRUE,diagnull=TRUE,rowNorm=T)

  
model_MGWRSART_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coords=coords, fixed_vars=c('Intercept'),kernels=c('gauss','gauss'),H=c(50,50), Model = 'MGWRSAR_0_kc_kv',control=list(Z=mytime_index,W=W,adaptive=c(TRUE,TRUE),Type='GDT'))

summary_mgwrsar(model_MGWRSART_0_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata, 
#>     coords = coords, fixed_vars = c("Intercept"), kernels = c("gauss", 
#>         "gauss"), H = c(50, 50), Model = "MGWRSAR_0_kc_kv", control = list(Z = mytime_index, 
#>         W = W, adaptive = c(TRUE, TRUE), Type = "GDT"))
#> Model: MGWRSAR_0_kc_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss gauss 
#> Kernels adaptive: YES YES 
#> Kernels type: GDT 
#> Bandwidth: 50 50 
#> Computation time: 0.854 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: Intercept lambda 
#> -0.4460577 0.2657842 
#> Varying parameters: X1 X2 X3 
#>                 X1         X2      X3
#> Min.     0.0034374 -0.8162198 -2.6362
#> 1st Qu.  0.3715497  1.4244343 -1.3558
#> Median   0.4585510  2.1506913 -1.1208
#> Mean     0.4571081  2.0970690 -1.1146
#> 3rd Qu.  0.5556844  2.8302322 -0.8747
#> Max.     0.9022376  5.0097052  0.4869
#> Residual sum of squares: 107.8131 
#> RMSE: 0.3283491


### space  + continuous variable kernel
model_GWRX<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coords=coords, fixed_vars=NULL,kernels=c('gauss','gauss'),H=c(120,120), Model = 'GWR',control=list(Z=mydata$X2,Type='GDX',adaptive=c(T,T)))
summary_mgwrsar(model_GWRX)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coords = coords, 
#>     fixed_vars = NULL, kernels = c("gauss", "gauss"), H = c(120, 
#>         120), Model = "GWR", control = list(Z = mydata$X2, Type = "GDX", 
#>         adaptive = c(T, T)))
#> Model: GWR 
#> Kernels function: gauss gauss 
#> Kernels adaptive: YES YES 
#> Kernels type: GDX 
#> Bandwidth: 120 120 
#> Computation time: 0.613 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept       X1       X2      X3
#> Min.     -6.45292  0.15257 -0.84387 -1.8089
#> 1st Qu.  -2.30342  0.32481  1.00002 -1.2329
#> Median   -1.13830  0.45537  1.91297 -0.9922
#> Mean     -1.30274  0.46953  2.06651 -1.0200
#> 3rd Qu.  -0.03019  0.62710  3.05773 -0.7868
#> Max.      1.54070  0.73745  6.44482 -0.2804
#> Residual sum of squares: 202.5684 
#> RMSE: 0.4500759


### space + categorical kernel (Li and Racine 2010)
Z=1+as.numeric(mydata$X2>quantile(mydata$X2,0.9))*2+as.numeric(mydata$X2<=quantile(mydata$X2,0.1))
table(Z)
#> Z
#>   1   2   3 
#> 800 100 100

model_MGWRSARC_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X3', data = mydata,coords=coords, fixed_vars=c('Intercept'),kernels=c('gauss','li','li','li'),H=c(120,0.1,0.9,0.9), Model = 'MGWRSAR_0_kc_kv',control=list(Z=Z,W=W,Type='GDC',adaptive=c(T,F,F,F)))
summary_mgwrsar(model_MGWRSARC_0_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X3", data = mydata, coords = coords, 
#>     fixed_vars = c("Intercept"), kernels = c("gauss", "li", "li", 
#>         "li"), H = c(120, 0.1, 0.9, 0.9), Model = "MGWRSAR_0_kc_kv", 
#>     control = list(Z = Z, W = W, Type = "GDC", adaptive = c(T, 
#>         F, F, F)))
#> Model: MGWRSAR_0_kc_kv 
#> Method for spatial autocorrelation: 2SLS 
#> Kernels function: gauss li li li 
#> Kernels adaptive: YES NO NO NO 
#> Kernels type: GDC 
#> Bandwidth: 120 0.1 0.9 0.9 
#> Computation time: 0.617 
#> Use of parallel computing: FALSE  ncore = 1 
#> Use of rough kernel: NO NO NO NO 
#> Use of Target Points: NO 
#> Number of data points: 1000 
#> Constant parameters: Intercept lambda 
#> 1.149469 0.3528613 
#> Varying parameters: X1 X3 
#>              X1      X3
#> Min.    0.13802 -1.3391
#> 1st Qu. 0.34306 -1.2704
#> Median  0.45967 -1.2161
#> Mean    0.43625 -1.1279
#> 3rd Qu. 0.53071 -1.0092
#> Max.    0.65289 -0.6043
#> Residual sum of squares: 1067.886 
#> RMSE: 1.033386

Examples of basic uses of mgwrsar package

Ghislain Geniaux

2023-11-30

Introduction

Using mgwrsar package

Estimation

Prediction

Optimal bandwidths search

General kernel Product functions