Alpine huts price analysis

We try to investigate the distribution of prices among the different alpine huts in dolomities; We collected manually the data from the SAT website where we found a detailed list of prices for each of the hut handled by them.

#install.packages("rgdal")
#install.packages("spdep")
#install.packages("boot")

library(rgdal)
library(spdep)
library(boot)
library(ggplot2)

Visualize the data

## Warning in readOGR("data/R/dolomiti"): Z-dimension discarded

tab_10 <- c('#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf')

plot(trentino, main="Trentino Dolomities Area")
plot(dolomiti_area, add=TRUE, col=tab_10)

Explore the huts prices and details.

# preprocessing of prices data
dolomiti_area_prices$elevation <- as.numeric(dolomiti_area_prices$elevation)
dolomiti_area_prices$posto_ripo <- as.numeric(dolomiti_area_prices$posto_ripo)
dolomiti_area_prices$posto_lett <- as.numeric(dolomiti_area_prices$posto_lett)
dolomiti_area_prices$posto_le_1 <- as.numeric(dolomiti_area_prices$posto_le_1)
dolomiti_area_prices$mezza_pens <- as.numeric(dolomiti_area_prices$mezza_pens)
dolomiti_area_prices$mezza_pe_1 <- as.numeric(dolomiti_area_prices$mezza_pe_1)
dolomiti_area_prices$capacity <- as.numeric(dolomiti_area_prices$capacity)
dolomiti_area_prices$categoria <- as.factor(dolomiti_area_prices$categoria)

#summary information
summary(dolomiti_area_prices[, c('elevation', 'posto_ripo', 'posto_lett', 'posto_le_1', 'mezza_pens', 'mezza_pe_1', 'capacity')])

## Object of class SpatialPointsDataFrame
## Coordinates:
##                min      max
## coords.x1 10.56253 11.83902
## coords.x2 45.81038 46.51461
## Is projected: FALSE 
## proj4string : [+proj=longlat +datum=WGS84 +no_defs]
## Number of points: 28
## Data attributes:
##    elevation      posto_ripo   posto_lett      posto_le_1      mezza_pens   
##  Min.   : 611   Min.   :5    Min.   :15.00   Min.   :30.00   Min.   :42.00  
##  1st Qu.:1996   1st Qu.:5    1st Qu.:16.00   1st Qu.:32.00   1st Qu.:44.75  
##  Median :2277   Median :5    Median :17.00   Median :34.00   Median :46.00  
##  Mean   :2153   Mean   :5    Mean   :16.46   Mean   :32.93   Mean   :46.14  
##  3rd Qu.:2456   3rd Qu.:5    3rd Qu.:17.00   3rd Qu.:34.00   3rd Qu.:48.00  
##  Max.   :3535   Max.   :5    Max.   :19.00   Max.   :38.00   Max.   :56.00  
##                 NA's   :3                                                   
##    mezza_pe_1       capacity     
##  Min.   :53.00   Min.   : 16.00  
##  1st Qu.:55.00   1st Qu.: 30.00  
##  Median :56.00   Median : 56.00  
##  Mean   :56.79   Mean   : 55.36  
##  3rd Qu.:58.00   3rd Qu.: 70.00  
##  Max.   :67.00   Max.   :130.00  
##                  NA's   :3

Explore statistical properties of alpine huts prices

library("ggpubr")

# 1. Create a box plot (bp)
ele_box <- ggplot(
  data=dolomiti_area_prices@data, 
  aes(y = dolomiti_area_prices@data[, "elevation"])) +
  geom_boxplot(
    color=tab_10[1], fill=tab_10[1], alpha=0.2,
    aes(y = dolomiti_area_prices@data[, "elevation"],)
    )+
  labs(y='Elevation')

cap_box <- ggplot(
  data=dolomiti_area_prices@data, 
  aes(y = dolomiti_area_prices@data[, "capacity"])) +
  geom_boxplot(
    color=tab_10[2], fill=tab_10[2], alpha=0.2,
    aes(y = dolomiti_area_prices@data[, "capacity"])
    )+
  labs(y='Capacity')


let_soci_box <- ggplot(
  data=dolomiti_area_prices@data, 
  aes(y = dolomiti_area_prices@data[, "posto_lett"])) +
  geom_boxplot(
    color=tab_10[3], fill=tab_10[3], alpha=0.2,
    aes(y = dolomiti_area_prices@data[, "posto_lett"])
    )+
  labs(y='Posto letto soci')

let_non_soci_box <- ggplot(
  data=dolomiti_area_prices@data, 
  aes(y = dolomiti_area_prices@data[, "posto_le_1"])) +
  geom_boxplot(
    color=tab_10[5], fill=tab_10[5], alpha=0.2,
    aes(y = dolomiti_area_prices@data[, "posto_le_1"])
    )+
  labs(y='Posto letto non soci')

pens_soci_box <- ggplot(
  data=dolomiti_area_prices@data, 
  aes(y = dolomiti_area_prices@data[, "mezza_pens"])) +
  geom_boxplot(
    color=tab_10[6], fill=tab_10[6], alpha=0.2,
    aes(y = dolomiti_area_prices@data[, "mezza_pens"])
    )+
  labs(y='Mezza pensione soci')

pens_non_soci_box <- ggplot(
  data=dolomiti_area_prices@data, 
  aes(y = dolomiti_area_prices@data[, "mezza_pe_1"])) +
  geom_boxplot(
    color=tab_10[7], fill=tab_10[7], alpha=0.2,
    aes(y = dolomiti_area_prices@data[, "mezza_pe_1"])
    )+
  labs(y='Mezza pensione non soci')

par(mar=(c(0,0,0,0)))
figure <- ggarrange(
  ele_box, cap_box, 
  let_soci_box, let_non_soci_box,
  pens_soci_box, pens_non_soci_box,
  labels = c(
    "Elevation", "Capacity", 
    "Posto Letto Soci", "Posto Letto Non Soci",
    "Pensione Soci", "Pensione Non Soci"),
  ncol = 2, nrow = 3
)

figure

# sat huts
plot(trentino, main="Trentino Dolomities Area Huts")
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(dolomiti_area_prices, add=TRUE, col='blue', cex=.8, pch=10)

## Neighbours ### k-Nearest neighbours

Definition: The k-nearest neighbours criterion implies that two spatial units are considered as neighbours if their distance is equal, or less than equal, to the minimum possible distance that can be found amongst all the observations.This definition of neighbourhood ensures that each spatial unit has exactly the same number k of neighbours.

knn_1 <- knn2nb(
    knearneigh(dolomiti_area_prices, k=1)
  )

plot(trentino, border="grey", main='huts with k neighbor = 1')
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(knn_1, dolomiti_area_prices, add=TRUE)

knn_2 <- knn2nb(
    knearneigh(dolomiti_area_prices, k=2)
  )

plot(trentino, border="grey", main='huts with k neighbor = 2')
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(knn_2, dolomiti_area_prices, add=TRUE)

knn_5 <- knn2nb(
    knearneigh(dolomiti_area_prices, k=5)
  )

plot(trentino, border="grey", main='huts with k neighbor = 5')
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(knn_5, dolomiti_area_prices, add=TRUE)

Critical cut-off neighbourhood

Definition: Critical cut-off neighbourhood criterion implies that two spatial units are considered as neighbours if their distance is equal, or less than equal, to a certain fixed distance which represents a critical cut-off. This threshold distance should not be smaller than the minimum value that ensures that each spatial unit has at least one neighbour.

cut_off_min <- max(
    unlist(nbdists(knn_1, dolomiti_area_prices, longlat = T))
  ) 


# different cut of experiments
cut_off_minimum = dnearneigh(dolomiti_area_prices, 0, cut_off_min, longlat=T)
print('Summary neighbour stats:')

## [1] "Summary neighbour stats:"

cut_off_minimum

## Neighbour list object:
## Number of regions: 28 
## Number of nonzero links: 156 
## Percentage nonzero weights: 19.89796 
## Average number of links: 5.571429

plot(trentino, main= paste('neighbour minimum cut off', round(cut_off_min,2)))
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(cut_off_minimum, 
     dolomiti_area_prices, 
     add = TRUE, 
     col = "red")

cut_off_20 = dnearneigh(dolomiti_area_prices, 0, cut_off_min + (cut_off_min * .2), longlat=T)
# different cut of experiments
print('Summary neighbour stats:')

## [1] "Summary neighbour stats:"

cut_off_20

## Neighbour list object:
## Number of regions: 28 
## Number of nonzero links: 210 
## Percentage nonzero weights: 26.78571 
## Average number of links: 7.5

plot(trentino, main= paste('neighbour minimum cut off', round(cut_off_min + (cut_off_min * .2),2)))
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(cut_off_20, 
     dolomiti_area_prices, 
     add = TRUE, 
     col = "red")

cut_off_50 = dnearneigh(dolomiti_area_prices, 0, cut_off_min + (cut_off_min * .5), longlat=T)

## Warning in dnearneigh(dolomiti_area_prices, 0, cut_off_min + (cut_off_min * :
## dnearneigh: longlat argument overrides object

# different cut of experiments
print('Summary neighbour stats:')

## [1] "Summary neighbour stats:"

cut_off_50

## Neighbour list object:
## Number of regions: 28 
## Number of nonzero links: 296 
## Percentage nonzero weights: 37.7551 
## Average number of links: 10.57143

plot(trentino, main= paste('neighbour minimum cut off', round(cut_off_min + (cut_off_min * .5),2)))
plot(dolomiti_area, add=TRUE, col=tab_10)
plot(cut_off_50, 
     dolomiti_area_prices, 
     add = TRUE, 
     col = "red")

Defining matrices of spatial weights

Let us define $N(i)$ as the set of all neighbours (however defined) of spatial unit $i$. The form of a binary spatial weight matrix $W$ of generic element $w_{ij}$

\[ \begin{equation} w_{ij}=\begin{cases} 1, & \text{if $j \in N(i)$}.\\ 0, & \text{otherwise}. \end{cases} \end \]

It is often useful to spatial weight matrices that are row-standardized, in the sense that each row sums up to 1.

Once the neighbourhood relationships between observations have been defined, the spatial weights matrix can be created. In our case we will calculate a standardised spatial weight matrix for each list of critical and k-nearest neighbours created earlier.

#To create a row-standardized spatial weights matrix for each critical cut-off neighbours 

knn_1.weight_matrix <- nb2listw(knn_1,style="W")
knn_2.weight_matrix <- nb2listw(knn_2,style="W")
knn_5.weight_matrix <- nb2listw(knn_5,style="W")
cut_off_minimum.weight_matrix <- nb2listw(cut_off_minimum,style="W")
cut_off_20.weight_matrix <- nb2listw(cut_off_20,style="W")
cut_off_50.weight_matrix <- nb2listw(cut_off_50,style="W")

The most popular measure of (global) spatial autocorrelation of a variable is the Moran’s I index (Moran 1950), which is given by

$$ I = {{i=1}^{n} {j=1}^{n} w_{ij}} {{i=1}^{n} {j=1}^{n} (x_{i} - {x}) (x_{j} - {x})} {{i=1}^{n}(x{i} - {x}^{2})}

and essentially account for the association between x and its spatial lag. Where the Spatia leg represents the average of the $x$ values for $i$’s neighbours.

Its values range from (basically) -1 (negative association) and +1 (positive association). Values close to zero indicate absence of spatial autocorrelation.

data_quantiles <- dolomiti_area_prices@data$mezza_pe_1
quantile_breaks <- quantile(data_quantiles)

plot(trentino, main='Quantile breaks of Mezza Pensione - Non soci')
points(dolomiti_area_prices,  col = "black", pch = 20, cex=3)
points(dolomiti_area_prices, col=tab_10[findInterval(data_quantiles, quantile_breaks, all.inside=TRUE)], pch = 20, cex=2.5)

data_quantiles <- dolomiti_area_prices@data$posto_le_1
quantile_breaks <- quantile(data_quantiles, na.rm=T)

plot(trentino, main='Quantile breaks of Posto Letto - Non soci')
points(dolomiti_area_prices,  col = "black", pch = 20, cex=3)
points(dolomiti_area_prices, col=tab_10[findInterval(data_quantiles, quantile_breaks, all.inside=TRUE)], pch = 20, cex=2.5)

Between the two it seems more intersting to analyze the price of the “Mezza Pensione - Non Soci” which includes foods.

# we try to combine the cost with an indicator of the pensione and posto letto non soci.
var_to_analyze <- dolomiti_area_prices$mezza_pe_1
var_to_analyze <- dolomiti_area_prices$mezza_pe_1 + dolomiti_area_prices$posto_le_1

spaces <-  print(paste(replicate(50, "-"), collapse = ""))

## [1] "--------------------------------------------------"

#perform the global Morans I test 
moran.test(var_to_analyze, knn_1.weight_matrix, randomisation=FALSE); spaces

## 
##  Moran I test under normality
## 
## data:  var_to_analyze  
## weights: knn_1.weight_matrix    
## 
## Moran I statistic standard deviate = 2.3125, p-value = 0.01038
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.50755375       -0.03703704        0.05546095

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, knn_2.weight_matrix, randomisation=FALSE); spaces

## 
##  Moran I test under normality
## 
## data:  var_to_analyze  
## weights: knn_2.weight_matrix    
## 
## Moran I statistic standard deviate = 2.8459, p-value = 0.002214
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.43608367       -0.03703704        0.02763756

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, knn_5.weight_matrix, randomisation=FALSE); spaces

## 
##  Moran I test under normality
## 
## data:  var_to_analyze  
## weights: knn_5.weight_matrix    
## 
## Moran I statistic standard deviate = 2.5495, p-value = 0.005394
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.212957583      -0.037037037       0.009615304

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, cut_off_minimum.weight_matrix, randomisation=FALSE); spaces

## 
##  Moran I test under normality
## 
## data:  var_to_analyze  
## weights: cut_off_minimum.weight_matrix    
## 
## Moran I statistic standard deviate = 2.6129, p-value = 0.004488
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.24560270       -0.03703704        0.01170068

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, cut_off_20.weight_matrix, randomisation=FALSE) ; spaces

## 
##  Moran I test under normality
## 
## data:  var_to_analyze  
## weights: cut_off_20.weight_matrix    
## 
## Moran I statistic standard deviate = 2.5575, p-value = 0.005272
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.18995395       -0.03703704        0.00787767

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, cut_off_50.weight_matrix, randomisation=FALSE) ; spaces

## 
##  Moran I test under normality
## 
## data:  var_to_analyze  
## weights: cut_off_50.weight_matrix    
## 
## Moran I statistic standard deviate = 2.9162, p-value = 0.001771
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.167284857      -0.037037037       0.004908902

## [1] "--------------------------------------------------"

# under the assumption of randomisation
print('Under Randomisation Assumptions')

## [1] "Under Randomisation Assumptions"

moran.test(var_to_analyze, knn_1.weight_matrix, randomisation=TRUE); spaces

## 
##  Moran I test under randomisation
## 
## data:  var_to_analyze  
## weights: knn_1.weight_matrix    
## 
## Moran I statistic standard deviate = 2.557, p-value = 0.005279
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.50755375       -0.03703704        0.04536124

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, knn_2.weight_matrix, randomisation=TRUE); spaces

## 
##  Moran I test under randomisation
## 
## data:  var_to_analyze  
## weights: knn_2.weight_matrix    
## 
## Moran I statistic standard deviate = 3.1462, p-value = 0.0008271
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.43608367       -0.03703704        0.02261387

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, knn_5.weight_matrix, randomisation=TRUE); spaces

## 
##  Moran I test under randomisation
## 
## data:  var_to_analyze  
## weights: knn_5.weight_matrix    
## 
## Moran I statistic standard deviate = 2.8122, p-value = 0.00246
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.212957583      -0.037037037       0.007902364

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, cut_off_minimum.weight_matrix, randomisation=TRUE); spaces

## 
##  Moran I test under randomisation
## 
## data:  var_to_analyze  
## weights: cut_off_minimum.weight_matrix    
## 
## Moran I statistic standard deviate = 2.8889, p-value = 0.001933
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.245602703      -0.037037037       0.009572089

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, cut_off_20.weight_matrix, randomisation=TRUE); spaces

## 
##  Moran I test under randomisation
## 
## data:  var_to_analyze  
## weights: cut_off_20.weight_matrix    
## 
## Moran I statistic standard deviate = 2.8259, p-value = 0.002358
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.189953951      -0.037037037       0.006452278

## [1] "--------------------------------------------------"

moran.test(var_to_analyze, cut_off_50.weight_matrix, randomisation=TRUE); spaces

## 
##  Moran I test under randomisation
## 
## data:  var_to_analyze  
## weights: cut_off_50.weight_matrix    
## 
## Moran I statistic standard deviate = 3.2245, p-value = 0.0006309
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.167284857      -0.037037037       0.004015067

## [1] "--------------------------------------------------"

#perform the global Morans I test with MonteCarlo simulation
moran.mc(var_to_analyze, knn_1.weight_matrix, nsim = 999); spaces

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  var_to_analyze 
## weights: knn_1.weight_matrix  
## number of simulations + 1: 1000 
## 
## statistic = 0.50755, observed rank = 999, p-value = 0.001
## alternative hypothesis: greater

## [1] "--------------------------------------------------"

moran.mc(var_to_analyze, knn_2.weight_matrix, nsim = 999); spaces

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  var_to_analyze 
## weights: knn_2.weight_matrix  
## number of simulations + 1: 1000 
## 
## statistic = 0.43608, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater

## [1] "--------------------------------------------------"

moran.mc(var_to_analyze, knn_5.weight_matrix, nsim = 999); spaces

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  var_to_analyze 
## weights: knn_5.weight_matrix  
## number of simulations + 1: 1000 
## 
## statistic = 0.21296, observed rank = 986, p-value = 0.014
## alternative hypothesis: greater

## [1] "--------------------------------------------------"

moran.mc(var_to_analyze, cut_off_minimum.weight_matrix, nsim = 999); spaces

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  var_to_analyze 
## weights: cut_off_minimum.weight_matrix  
## number of simulations + 1: 1000 
## 
## statistic = 0.2456, observed rank = 997, p-value = 0.003
## alternative hypothesis: greater

## [1] "--------------------------------------------------"

moran.mc(var_to_analyze, cut_off_20.weight_matrix, nsim = 999); spaces

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  var_to_analyze 
## weights: cut_off_20.weight_matrix  
## number of simulations + 1: 1000 
## 
## statistic = 0.18995, observed rank = 991, p-value = 0.009
## alternative hypothesis: greater

## [1] "--------------------------------------------------"

moran.mc(var_to_analyze, cut_off_50.weight_matrix, nsim = 999); spaces

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  var_to_analyze 
## weights: cut_off_50.weight_matrix  
## number of simulations + 1: 1000 
## 
## statistic = 0.16728, observed rank = 988, p-value = 0.012
## alternative hypothesis: greater

## [1] "--------------------------------------------------"

The values are coherent across the three approaches, it seems that there is a positive global spatial autocorrelation when we use a metric for computing the neighbours very small as $k=1$ or $k=2$ reaching a Moran I statistic = $.45$, $.37$.

The analysis turned out to be positive, there is spatial correlation in the prices among alpine huts, there could be many reasons for that for instance, since it is very high for close neighbour, a possible explation could be related to the condition of the trail for providing resources and food to the huts or many others possibilities.

Prices vs Elevation

Another possibility is not to involve any kind of spatial prospective but trying to understand if the prices are really related to the elevation or not. To do this we perform a simple linear regression, trying to understand trough the parameters the impact of the elevation into the prices.

intersting_cols <- c("elevation",  "capacity", "posto_le_1")

lm_elevation <- lm(posto_le_1 ~ ., dolomiti_area_prices[,intersting_cols])
summary(lm_elevation)

## 
## Call:
## lm(formula = posto_le_1 ~ ., data = dolomiti_area_prices[, intersting_cols])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.41423 -0.34017 -0.00501  0.37763  1.10768 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.455e+02  4.811e+01   3.025  0.00669 ** 
## elevation    3.370e-03  3.584e-04   9.403 8.82e-09 ***
## capacity     1.308e-03  5.004e-03   0.261  0.79646    
## coords.x1   -7.971e-01  3.516e-01  -2.267  0.03464 *  
## coords.x2   -2.403e+00  1.097e+00  -2.190  0.04058 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.655 on 20 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8819, Adjusted R-squared:  0.8583 
## F-statistic: 37.33 on 4 and 20 DF,  p-value: 5.188e-09

As expected also therelevation seems very important for predicting the prices of the alpine huts.

Alpine huts price analysis

Gabriele

Alpine huts price analysis

Visualize the data

Explore the huts prices and details.

Critical cut-off neighbourhood

Defining matrices of spatial weights

Prices vs Elevation