We try to investigate the distribution of prices among the different alpine huts in dolomities; We collected manually the data from the SAT website where we found a detailed list of prices for each of the hut handled by them.
#install.packages("rgdal")
#install.packages("spdep")
#install.packages("boot")
library(rgdal)
library(spdep)
library(boot)
library(ggplot2)
## Warning in readOGR("data/R/dolomiti"): Z-dimension discarded
tab_10 <- c('#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf')
plot(trentino, main="Trentino Dolomities Area")
plot(dolomiti_area, add=TRUE, col=tab_10)
# preprocessing of prices data
dolomiti_area_prices$elevation <- as.numeric(dolomiti_area_prices$elevation)
dolomiti_area_prices$posto_ripo <- as.numeric(dolomiti_area_prices$posto_ripo)
dolomiti_area_prices$posto_lett <- as.numeric(dolomiti_area_prices$posto_lett)
dolomiti_area_prices$posto_le_1 <- as.numeric(dolomiti_area_prices$posto_le_1)
dolomiti_area_prices$mezza_pens <- as.numeric(dolomiti_area_prices$mezza_pens)
dolomiti_area_prices$mezza_pe_1 <- as.numeric(dolomiti_area_prices$mezza_pe_1)
dolomiti_area_prices$capacity <- as.numeric(dolomiti_area_prices$capacity)
dolomiti_area_prices$categoria <- as.factor(dolomiti_area_prices$categoria)
#summary information
summary(dolomiti_area_prices[, c('elevation', 'posto_ripo', 'posto_lett', 'posto_le_1', 'mezza_pens', 'mezza_pe_1', 'capacity')])
## Object of class SpatialPointsDataFrame
## Coordinates:
## min max
## coords.x1 10.56253 11.83902
## coords.x2 45.81038 46.51461
## Is projected: FALSE
## proj4string : [+proj=longlat +datum=WGS84 +no_defs]
## Number of points: 28
## Data attributes:
## elevation posto_ripo posto_lett posto_le_1 mezza_pens
## Min. : 611 Min. :5 Min. :15.00 Min. :30.00 Min. :42.00
## 1st Qu.:1996 1st Qu.:5 1st Qu.:16.00 1st Qu.:32.00 1st Qu.:44.75
## Median :2277 Median :5 Median :17.00 Median :34.00 Median :46.00
## Mean :2153 Mean :5 Mean :16.46 Mean :32.93 Mean :46.14
## 3rd Qu.:2456 3rd Qu.:5 3rd Qu.:17.00 3rd Qu.:34.00 3rd Qu.:48.00
## Max. :3535 Max. :5 Max. :19.00 Max. :38.00 Max. :56.00
## NA's :3
## mezza_pe_1 capacity
## Min. :53.00 Min. : 16.00
## 1st Qu.:55.00 1st Qu.: 30.00
## Median :56.00 Median : 56.00
## Mean :56.79 Mean : 55.36
## 3rd Qu.:58.00 3rd Qu.: 70.00
## Max. :67.00 Max. :130.00
## NA's :3
Explore statistical properties of alpine huts prices
library("ggpubr")
# 1. Create a box plot (bp)
ele_box <- ggplot(
data=dolomiti_area_prices@data,
aes(y = dolomiti_area_prices@data[, "elevation"])) +
geom_boxplot(
color=tab_10[1], fill=tab_10[1], alpha=0.2,
aes(y = dolomiti_area_prices@data[, "elevation"],)
)+
labs(y='Elevation')
cap_box <- ggplot(
data=dolomiti_area_prices@data,
aes(y = dolomiti_area_prices@data[, "capacity"])) +
geom_boxplot(
color=tab_10[2], fill=tab_10[2], alpha=0.2,
aes(y = dolomiti_area_prices@data[, "capacity"])
)+
labs(y='Capacity')
let_soci_box <- ggplot(
data=dolomiti_area_prices@data,
aes(y = dolomiti_area_prices@data[, "posto_lett"])) +
geom_boxplot(
color=tab_10[3], fill=tab_10[3], alpha=0.2,
aes(y = dolomiti_area_prices@data[, "posto_lett"])
)+
labs(y='Posto letto soci')
let_non_soci_box <- ggplot(
data=dolomiti_area_prices@data,
aes(y = dolomiti_area_prices@data[, "posto_le_1"])) +
geom_boxplot(
color=tab_10[5], fill=tab_10[5], alpha=0.2,
aes(y = dolomiti_area_prices@data[, "posto_le_1"])
)+
labs(y='Posto letto non soci')
pens_soci_box <- ggplot(
data=dolomiti_area_prices@data,
aes(y = dolomiti_area_prices@data[, "mezza_pens"])) +
geom_boxplot(
color=tab_10[6], fill=tab_10[6], alpha=0.2,
aes(y = dolomiti_area_prices@data[, "mezza_pens"])
)+
labs(y='Mezza pensione soci')
pens_non_soci_box <- ggplot(
data=dolomiti_area_prices@data,
aes(y = dolomiti_area_prices@data[, "mezza_pe_1"])) +
geom_boxplot(
color=tab_10[7], fill=tab_10[7], alpha=0.2,
aes(y = dolomiti_area_prices@data[, "mezza_pe_1"])
)+
labs(y='Mezza pensione non soci')
par(mar=(c(0,0,0,0)))
figure <- ggarrange(
ele_box, cap_box,
let_soci_box, let_non_soci_box,
pens_soci_box, pens_non_soci_box,
labels = c(
"Elevation", "Capacity",
"Posto Letto Soci", "Posto Letto Non Soci",
"Pensione Soci", "Pensione Non Soci"),
ncol = 2, nrow = 3
)
figure
# sat huts
plot(trentino, main="Trentino Dolomities Area Huts")
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(dolomiti_area_prices, add=TRUE, col='blue', cex=.8, pch=10)
## Neighbours ### k-Nearest neighbours
Definition: The k-nearest neighbours criterion implies that two spatial units are considered as neighbours if their distance is equal, or less than equal, to the minimum possible distance that can be found amongst all the observations.This definition of neighbourhood ensures that each spatial unit has exactly the same number k of neighbours.
knn_1 <- knn2nb(
knearneigh(dolomiti_area_prices, k=1)
)
plot(trentino, border="grey", main='huts with k neighbor = 1')
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(knn_1, dolomiti_area_prices, add=TRUE)
knn_2 <- knn2nb(
knearneigh(dolomiti_area_prices, k=2)
)
plot(trentino, border="grey", main='huts with k neighbor = 2')
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(knn_2, dolomiti_area_prices, add=TRUE)
knn_5 <- knn2nb(
knearneigh(dolomiti_area_prices, k=5)
)
plot(trentino, border="grey", main='huts with k neighbor = 5')
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(knn_5, dolomiti_area_prices, add=TRUE)
Definition: Critical cut-off neighbourhood criterion implies that two spatial units are considered as neighbours if their distance is equal, or less than equal, to a certain fixed distance which represents a critical cut-off. This threshold distance should not be smaller than the minimum value that ensures that each spatial unit has at least one neighbour.
cut_off_min <- max(
unlist(nbdists(knn_1, dolomiti_area_prices, longlat = T))
)
# different cut of experiments
cut_off_minimum = dnearneigh(dolomiti_area_prices, 0, cut_off_min, longlat=T)
print('Summary neighbour stats:')
## [1] "Summary neighbour stats:"
cut_off_minimum
## Neighbour list object:
## Number of regions: 28
## Number of nonzero links: 156
## Percentage nonzero weights: 19.89796
## Average number of links: 5.571429
plot(trentino, main= paste('neighbour minimum cut off', round(cut_off_min,2)))
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(cut_off_minimum,
dolomiti_area_prices,
add = TRUE,
col = "red")
cut_off_20 = dnearneigh(dolomiti_area_prices, 0, cut_off_min + (cut_off_min * .2), longlat=T)
# different cut of experiments
print('Summary neighbour stats:')
## [1] "Summary neighbour stats:"
cut_off_20
## Neighbour list object:
## Number of regions: 28
## Number of nonzero links: 210
## Percentage nonzero weights: 26.78571
## Average number of links: 7.5
plot(trentino, main= paste('neighbour minimum cut off', round(cut_off_min + (cut_off_min * .2),2)))
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(cut_off_20,
dolomiti_area_prices,
add = TRUE,
col = "red")
cut_off_50 = dnearneigh(dolomiti_area_prices, 0, cut_off_min + (cut_off_min * .5), longlat=T)
## Warning in dnearneigh(dolomiti_area_prices, 0, cut_off_min + (cut_off_min * :
## dnearneigh: longlat argument overrides object
# different cut of experiments
print('Summary neighbour stats:')
## [1] "Summary neighbour stats:"
cut_off_50
## Neighbour list object:
## Number of regions: 28
## Number of nonzero links: 296
## Percentage nonzero weights: 37.7551
## Average number of links: 10.57143
plot(trentino, main= paste('neighbour minimum cut off', round(cut_off_min + (cut_off_min * .5),2)))
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(cut_off_50,
dolomiti_area_prices,
add = TRUE,
col = "red")
Let us define \(N(i)\) as the set of all neighbours (however defined) of spatial unit \(i\). The form of a binary spatial weight matrix \(W\) of generic element \(w_{ij}\)
\[ \begin{equation} w_{ij}=\begin{cases} 1, & \text{if $j \in N(i)$}.\\ 0, & \text{otherwise}. \end{cases} \end \]
It is often useful to spatial weight matrices that are row-standardized, in the sense that each row sums up to 1.
Once the neighbourhood relationships between observations have been defined, the spatial weights matrix can be created. In our case we will calculate a standardised spatial weight matrix for each list of critical and k-nearest neighbours created earlier.
#To create a row-standardized spatial weights matrix for each critical cut-off neighbours
knn_1.weight_matrix <- nb2listw(knn_1,style="W")
knn_2.weight_matrix <- nb2listw(knn_2,style="W")
knn_5.weight_matrix <- nb2listw(knn_5,style="W")
cut_off_minimum.weight_matrix <- nb2listw(cut_off_minimum,style="W")
cut_off_20.weight_matrix <- nb2listw(cut_off_20,style="W")
cut_off_50.weight_matrix <- nb2listw(cut_off_50,style="W")
The most popular measure of (global) spatial autocorrelation of a variable is the Moran’s I index (Moran 1950), which is given by
$$ I = {{i=1}^{n} {j=1}^{n} w_{ij}} {{i=1}^{n} {j=1}^{n} (x_{i} - {x}) (x_{j} - {x})} {{i=1}^{n}(x{i} - {x}^{2})}
$$
and essentially account for the association between x and its spatial lag. Where the Spatia leg represents the average of the \(x\) values for \(i\)’s neighbours.
Its values range from (basically) -1 (negative association) and +1 (positive association). Values close to zero indicate absence of spatial autocorrelation.
data_quantiles <- dolomiti_area_prices@data$mezza_pe_1
quantile_breaks <- quantile(data_quantiles)
plot(trentino, main='Quantile breaks of Mezza Pensione - Non soci')
points(dolomiti_area_prices, col = "black", pch = 20, cex=3)
points(dolomiti_area_prices, col=tab_10[findInterval(data_quantiles, quantile_breaks, all.inside=TRUE)], pch = 20, cex=2.5)
data_quantiles <- dolomiti_area_prices@data$posto_le_1
quantile_breaks <- quantile(data_quantiles, na.rm=T)
plot(trentino, main='Quantile breaks of Posto Letto - Non soci')
points(dolomiti_area_prices, col = "black", pch = 20, cex=3)
points(dolomiti_area_prices, col=tab_10[findInterval(data_quantiles, quantile_breaks, all.inside=TRUE)], pch = 20, cex=2.5)
Between the two it seems more intersting to analyze the price of the “Mezza Pensione - Non Soci” which includes foods.
# we try to combine the cost with an indicator of the pensione and posto letto non soci.
var_to_analyze <- dolomiti_area_prices$mezza_pe_1
var_to_analyze <- dolomiti_area_prices$mezza_pe_1 + dolomiti_area_prices$posto_le_1
spaces <- print(paste(replicate(50, "-"), collapse = ""))
## [1] "--------------------------------------------------"
#perform the global Morans I test
moran.test(var_to_analyze, knn_1.weight_matrix, randomisation=FALSE); spaces
##
## Moran I test under normality
##
## data: var_to_analyze
## weights: knn_1.weight_matrix
##
## Moran I statistic standard deviate = 2.3125, p-value = 0.01038
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.50755375 -0.03703704 0.05546095
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, knn_2.weight_matrix, randomisation=FALSE); spaces
##
## Moran I test under normality
##
## data: var_to_analyze
## weights: knn_2.weight_matrix
##
## Moran I statistic standard deviate = 2.8459, p-value = 0.002214
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.43608367 -0.03703704 0.02763756
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, knn_5.weight_matrix, randomisation=FALSE); spaces
##
## Moran I test under normality
##
## data: var_to_analyze
## weights: knn_5.weight_matrix
##
## Moran I statistic standard deviate = 2.5495, p-value = 0.005394
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.212957583 -0.037037037 0.009615304
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, cut_off_minimum.weight_matrix, randomisation=FALSE); spaces
##
## Moran I test under normality
##
## data: var_to_analyze
## weights: cut_off_minimum.weight_matrix
##
## Moran I statistic standard deviate = 2.6129, p-value = 0.004488
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.24560270 -0.03703704 0.01170068
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, cut_off_20.weight_matrix, randomisation=FALSE) ; spaces
##
## Moran I test under normality
##
## data: var_to_analyze
## weights: cut_off_20.weight_matrix
##
## Moran I statistic standard deviate = 2.5575, p-value = 0.005272
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.18995395 -0.03703704 0.00787767
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, cut_off_50.weight_matrix, randomisation=FALSE) ; spaces
##
## Moran I test under normality
##
## data: var_to_analyze
## weights: cut_off_50.weight_matrix
##
## Moran I statistic standard deviate = 2.9162, p-value = 0.001771
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.167284857 -0.037037037 0.004908902
## [1] "--------------------------------------------------"
# under the assumption of randomisation
print('Under Randomisation Assumptions')
## [1] "Under Randomisation Assumptions"
moran.test(var_to_analyze, knn_1.weight_matrix, randomisation=TRUE); spaces
##
## Moran I test under randomisation
##
## data: var_to_analyze
## weights: knn_1.weight_matrix
##
## Moran I statistic standard deviate = 2.557, p-value = 0.005279
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.50755375 -0.03703704 0.04536124
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, knn_2.weight_matrix, randomisation=TRUE); spaces
##
## Moran I test under randomisation
##
## data: var_to_analyze
## weights: knn_2.weight_matrix
##
## Moran I statistic standard deviate = 3.1462, p-value = 0.0008271
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.43608367 -0.03703704 0.02261387
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, knn_5.weight_matrix, randomisation=TRUE); spaces
##
## Moran I test under randomisation
##
## data: var_to_analyze
## weights: knn_5.weight_matrix
##
## Moran I statistic standard deviate = 2.8122, p-value = 0.00246
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.212957583 -0.037037037 0.007902364
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, cut_off_minimum.weight_matrix, randomisation=TRUE); spaces
##
## Moran I test under randomisation
##
## data: var_to_analyze
## weights: cut_off_minimum.weight_matrix
##
## Moran I statistic standard deviate = 2.8889, p-value = 0.001933
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.245602703 -0.037037037 0.009572089
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, cut_off_20.weight_matrix, randomisation=TRUE); spaces
##
## Moran I test under randomisation
##
## data: var_to_analyze
## weights: cut_off_20.weight_matrix
##
## Moran I statistic standard deviate = 2.8259, p-value = 0.002358
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.189953951 -0.037037037 0.006452278
## [1] "--------------------------------------------------"
moran.test(var_to_analyze, cut_off_50.weight_matrix, randomisation=TRUE); spaces
##
## Moran I test under randomisation
##
## data: var_to_analyze
## weights: cut_off_50.weight_matrix
##
## Moran I statistic standard deviate = 3.2245, p-value = 0.0006309
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.167284857 -0.037037037 0.004015067
## [1] "--------------------------------------------------"
#perform the global Morans I test with MonteCarlo simulation
moran.mc(var_to_analyze, knn_1.weight_matrix, nsim = 999); spaces
##
## Monte-Carlo simulation of Moran I
##
## data: var_to_analyze
## weights: knn_1.weight_matrix
## number of simulations + 1: 1000
##
## statistic = 0.50755, observed rank = 999, p-value = 0.001
## alternative hypothesis: greater
## [1] "--------------------------------------------------"
moran.mc(var_to_analyze, knn_2.weight_matrix, nsim = 999); spaces
##
## Monte-Carlo simulation of Moran I
##
## data: var_to_analyze
## weights: knn_2.weight_matrix
## number of simulations + 1: 1000
##
## statistic = 0.43608, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
## [1] "--------------------------------------------------"
moran.mc(var_to_analyze, knn_5.weight_matrix, nsim = 999); spaces
##
## Monte-Carlo simulation of Moran I
##
## data: var_to_analyze
## weights: knn_5.weight_matrix
## number of simulations + 1: 1000
##
## statistic = 0.21296, observed rank = 986, p-value = 0.014
## alternative hypothesis: greater
## [1] "--------------------------------------------------"
moran.mc(var_to_analyze, cut_off_minimum.weight_matrix, nsim = 999); spaces
##
## Monte-Carlo simulation of Moran I
##
## data: var_to_analyze
## weights: cut_off_minimum.weight_matrix
## number of simulations + 1: 1000
##
## statistic = 0.2456, observed rank = 997, p-value = 0.003
## alternative hypothesis: greater
## [1] "--------------------------------------------------"
moran.mc(var_to_analyze, cut_off_20.weight_matrix, nsim = 999); spaces
##
## Monte-Carlo simulation of Moran I
##
## data: var_to_analyze
## weights: cut_off_20.weight_matrix
## number of simulations + 1: 1000
##
## statistic = 0.18995, observed rank = 991, p-value = 0.009
## alternative hypothesis: greater
## [1] "--------------------------------------------------"
moran.mc(var_to_analyze, cut_off_50.weight_matrix, nsim = 999); spaces
##
## Monte-Carlo simulation of Moran I
##
## data: var_to_analyze
## weights: cut_off_50.weight_matrix
## number of simulations + 1: 1000
##
## statistic = 0.16728, observed rank = 988, p-value = 0.012
## alternative hypothesis: greater
## [1] "--------------------------------------------------"
The values are coherent across the three approaches, it seems that there is a positive global spatial autocorrelation when we use a metric for computing the neighbours very small as \(k=1\) or \(k=2\) reaching a Moran I statistic = \(.45\), \(.37\).
The analysis turned out to be positive, there is spatial correlation in the prices among alpine huts, there could be many reasons for that for instance, since it is very high for close neighbour, a possible explation could be related to the condition of the trail for providing resources and food to the huts or many others possibilities.
Another possibility is not to involve any kind of spatial prospective but trying to understand if the prices are really related to the elevation or not. To do this we perform a simple linear regression, trying to understand trough the parameters the impact of the elevation into the prices.
intersting_cols <- c("elevation", "capacity", "posto_le_1")
lm_elevation <- lm(posto_le_1 ~ ., dolomiti_area_prices[,intersting_cols])
summary(lm_elevation)
##
## Call:
## lm(formula = posto_le_1 ~ ., data = dolomiti_area_prices[, intersting_cols])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.41423 -0.34017 -0.00501 0.37763 1.10768
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.455e+02 4.811e+01 3.025 0.00669 **
## elevation 3.370e-03 3.584e-04 9.403 8.82e-09 ***
## capacity 1.308e-03 5.004e-03 0.261 0.79646
## coords.x1 -7.971e-01 3.516e-01 -2.267 0.03464 *
## coords.x2 -2.403e+00 1.097e+00 -2.190 0.04058 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.655 on 20 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8819, Adjusted R-squared: 0.8583
## F-statistic: 37.33 on 4 and 20 DF, p-value: 5.188e-09
As expected also therelevation seems very important for predicting the prices of the alpine huts.