# Load necessary libraries
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(agriutilities) #install first
library(corrplot)
## corrplot 0.92 loaded
# Load additional libraries for performance analysis
library(PerformanceAnalytics)
## Cargando paquete requerido: xts
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Adjuntando el paquete: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
##
## Adjuntando el paquete: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
library(psych)
##
## Adjuntando el paquete: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
# Lilliefors test for normality
library(nortest)
#Carga de datos
#Cargar el conjunto de datos
wine_data <- read.csv("wine.csv", header = TRUE)
head(wine_data)
## Alcohol Malic Ash Alcalinity Magnesium Phenols Flavanoids Nonflavanoid
## 1 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28
## 2 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26
## 3 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30
## 4 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24
## 5 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39
## 6 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34
## Proanthocyanins Intensity Hue OD280 Proline
## 1 2.29 5.64 1.04 3.92 1065
## 2 1.28 4.38 1.05 3.40 1050
## 3 2.81 5.68 1.03 3.17 1185
## 4 2.18 7.80 0.86 3.45 1480
## 5 1.82 4.32 1.04 2.93 735
## 6 1.97 6.75 1.05 2.85 1450
names(wine_data)
## [1] "Alcohol" "Malic" "Ash" "Alcalinity"
## [5] "Magnesium" "Phenols" "Flavanoids" "Nonflavanoid"
## [9] "Proanthocyanins" "Intensity" "Hue" "OD280"
## [13] "Proline"
# Utilizando attach
attach(wine_data)
wine_data %>%
cor() %>%
corrplot(
type = 'upper',
order = 'hclust',
tl.col = 'black',
tl.srt = 45
)
De acuerdo con éste análisis, se puede apreciar que hay algunas
correlaciones interesantes, por ejemplo, se puede apreciar la
correlación entre 0D280 y Phenols y Flavonoids, así como las
Proanthocyanins, con Phenols y Flavonoids. También se puede evidenciar,
que algunas variables no están correlacionadas, como por ejemplo, el Hue
y Malic, entre otras.
# Creando scatter plot
wine_data %>% pairs
# Chart correlation with histogram using PerformanceAnalytics
chart.Correlation(wine_data, histogram = TRUE, pch = 19)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
# Shapiro-Wilk test for normality
shapiro.test(Alcohol)
##
## Shapiro-Wilk normality test
##
## data: Alcohol
## W = 0.9818, p-value = 0.02005
hist(Alcohol) # Plot histogram for visual normality check
# Plot pairwise panels with Pearson correlation
pairs.panels(
wine_data,
method = "pearson",
density = T,
ellipses = FALSE,
smooth = T
)
hist(Alcohol)
lillie.test(Alcohol)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: Alcohol
## D = 0.067924, p-value = 0.04405
# Multiple regression models
RLM_vacio <- lm(Alcohol ~ 1, data = wine_data) # Empty model
summary(RLM_vacio)
##
## Call:
## lm(formula = Alcohol ~ 1, data = wine_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97062 -0.63812 0.04938 0.67688 1.82938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.00062 0.06085 213.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8118 on 177 degrees of freedom
RLM_Completo <- lm(Alcohol ~ ., data = wine_data) # Full model
summary(RLM_Completo)
##
## Call:
## lm(formula = Alcohol ~ ., data = wine_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.45178 -0.30646 -0.02275 0.33197 1.54411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.107e+01 5.964e-01 18.565 < 2e-16 ***
## Malic 1.316e-01 4.528e-02 2.907 0.00415 **
## Ash 1.379e-01 2.169e-01 0.636 0.52582
## Alcalinity -3.779e-02 1.781e-02 -2.122 0.03536 *
## Magnesium 4.248e-06 3.359e-03 0.001 0.99899
## Phenols 5.211e-02 1.340e-01 0.389 0.69779
## Flavanoids 9.129e-03 1.069e-01 0.085 0.93208
## Nonflavanoid -2.077e-01 4.336e-01 -0.479 0.63251
## Proanthocyanins -1.525e-01 9.823e-02 -1.553 0.12246
## Intensity 1.630e-01 2.745e-02 5.940 1.64e-08 ***
## Hue 2.167e-01 2.811e-01 0.771 0.44182
## OD280 1.608e-01 1.097e-01 1.466 0.14458
## Proline 1.016e-03 2.000e-04 5.080 1.01e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5361 on 165 degrees of freedom
## Multiple R-squared: 0.5936, Adjusted R-squared: 0.564
## F-statistic: 20.08 on 12 and 165 DF, p-value: < 2.2e-16
AIC(RLM_Completo)
## [1] 297.67
Es claro que el modelo es demasiado extenso, así, veamos lo que hace R, con el modelo forward.
# Stepwise model selection approaches
RLM_Forward <- step(RLM_vacio, scope = list(lower = RLM_vacio, upper = RLM_Completo), direction = "forward")
## Start: AIC=-73.22
## Alcohol ~ 1
##
## Df Sum of Sq RSS AIC
## + Proline 1 48.339 68.315 -166.461
## + Intensity 1 34.823 81.831 -134.328
## + Alcalinity 1 11.227 105.427 -89.231
## + Phenols 1 9.750 106.904 -86.753
## + Magnesium 1 8.554 108.100 -84.774
## + Flavanoids 1 6.542 110.112 -81.491
## + Ash 1 5.220 111.434 -79.367
## + Nonflavanoid 1 2.836 113.818 -75.599
## + Proanthocyanins 1 2.180 114.474 -74.575
## <none> 116.654 -73.218
## + Malic 1 1.039 115.615 -72.811
## + OD280 1 0.611 116.044 -72.152
## + Hue 1 0.602 116.052 -72.139
##
## Step: AIC=-166.46
## Alcohol ~ Proline
##
## Df Sum of Sq RSS AIC
## + Intensity 1 15.2375 53.078 -209.38
## + Hue 1 6.1936 62.122 -181.38
## + Malic 1 5.7560 62.559 -180.13
## + OD280 1 2.1513 66.164 -170.16
## + Flavanoids 1 1.0204 67.295 -167.14
## <none> 68.315 -166.46
## + Proanthocyanins 1 0.7563 67.559 -166.44
## + Ash 1 0.5610 67.754 -165.93
## + Nonflavanoid 1 0.2560 68.059 -165.13
## + Phenols 1 0.1544 68.161 -164.86
## + Alcalinity 1 0.1025 68.213 -164.73
## + Magnesium 1 0.0427 68.273 -164.57
##
## Step: AIC=-209.38
## Alcohol ~ Proline + Intensity
##
## Df Sum of Sq RSS AIC
## + Malic 1 1.36256 51.715 -212.01
## + Alcalinity 1 1.12964 51.948 -211.21
## + OD280 1 1.02097 52.057 -210.84
## <none> 53.078 -209.38
## + Phenols 1 0.40638 52.672 -208.75
## + Flavanoids 1 0.35311 52.725 -208.57
## + Nonflavanoid 1 0.29314 52.785 -208.37
## + Proanthocyanins 1 0.09458 52.983 -207.70
## + Magnesium 1 0.01749 53.060 -207.44
## + Hue 1 0.00241 53.076 -207.39
## + Ash 1 0.00216 53.076 -207.39
##
## Step: AIC=-212.01
## Alcohol ~ Proline + Intensity + Malic
##
## Df Sum of Sq RSS AIC
## + Alcalinity 1 1.68641 50.029 -215.91
## + OD280 1 1.53387 50.182 -215.37
## + Flavanoids 1 0.90662 50.809 -213.16
## + Phenols 1 0.82660 50.889 -212.88
## + Nonflavanoid 1 0.58076 51.135 -212.02
## <none> 51.715 -212.01
## + Hue 1 0.42070 51.295 -211.47
## + Ash 1 0.05641 51.659 -210.21
## + Proanthocyanins 1 0.02455 51.691 -210.10
## + Magnesium 1 0.01569 51.700 -210.07
##
## Step: AIC=-215.91
## Alcohol ~ Proline + Intensity + Malic + Alcalinity
##
## Df Sum of Sq RSS AIC
## + OD280 1 1.41984 48.609 -219.04
## + Flavanoids 1 0.76507 49.264 -216.66
## + Phenols 1 0.71512 49.314 -216.48
## <none> 50.029 -215.91
## + Ash 1 0.43639 49.593 -215.47
## + Hue 1 0.38945 49.640 -215.31
## + Nonflavanoid 1 0.26263 49.766 -214.85
## + Proanthocyanins 1 0.02950 50.000 -214.02
## + Magnesium 1 0.00000 50.029 -213.91
##
## Step: AIC=-219.04
## Alcohol ~ Proline + Intensity + Malic + Alcalinity + OD280
##
## Df Sum of Sq RSS AIC
## + Proanthocyanins 1 0.65830 47.951 -219.47
## <none> 48.609 -219.04
## + Ash 1 0.24762 48.362 -217.95
## + Hue 1 0.16350 48.446 -217.64
## + Phenols 1 0.01282 48.596 -217.09
## + Nonflavanoid 1 0.00859 48.601 -217.07
## + Flavanoids 1 0.00234 48.607 -217.05
## + Magnesium 1 0.00042 48.609 -217.04
##
## Step: AIC=-219.47
## Alcohol ~ Proline + Intensity + Malic + Alcalinity + OD280 +
## Proanthocyanins
##
## Df Sum of Sq RSS AIC
## <none> 47.951 -219.47
## + Hue 1 0.213074 47.738 -218.26
## + Ash 1 0.179158 47.772 -218.13
## + Flavanoids 1 0.178790 47.772 -218.13
## + Phenols 1 0.166484 47.784 -218.09
## + Nonflavanoid 1 0.032445 47.918 -217.59
## + Magnesium 1 0.024580 47.926 -217.56
summary(RLM_Forward)
##
## Call:
## lm(formula = Alcohol ~ Proline + Intensity + Malic + Alcalinity +
## OD280 + Proanthocyanins, data = wine_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.50233 -0.34225 0.00116 0.33005 1.69364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.3332831 0.3943623 28.738 < 2e-16 ***
## Proline 0.0011358 0.0001708 6.651 3.76e-10 ***
## Intensity 0.1585200 0.0231627 6.844 1.32e-10 ***
## Malic 0.1143127 0.0397878 2.873 0.00458 **
## Alcalinity -0.0324405 0.0137533 -2.359 0.01947 *
## OD280 0.2254528 0.0834109 2.703 0.00757 **
## Proanthocyanins -0.1296362 0.0846088 -1.532 0.12732
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5295 on 171 degrees of freedom
## Multiple R-squared: 0.5889, Adjusted R-squared: 0.5745
## F-statistic: 40.83 on 6 and 171 DF, p-value: < 2.2e-16
AIC(RLM_Forward)
## [1] 287.6761
RLM_Backward <- step(RLM_Completo, scope = list(lower = RLM_vacio, upper = RLM_Completo), direction = "backward")
## Start: AIC=-209.47
## Alcohol ~ Malic + Ash + Alcalinity + Magnesium + Phenols + Flavanoids +
## Nonflavanoid + Proanthocyanins + Intensity + Hue + OD280 +
## Proline
##
## Df Sum of Sq RSS AIC
## - Magnesium 1 0.0000 47.413 -211.47
## - Flavanoids 1 0.0021 47.416 -211.46
## - Phenols 1 0.0435 47.457 -211.31
## - Nonflavanoid 1 0.0660 47.479 -211.22
## - Ash 1 0.1161 47.530 -211.04
## - Hue 1 0.1708 47.584 -210.83
## <none> 47.413 -209.47
## - OD280 1 0.6175 48.031 -209.17
## - Proanthocyanins 1 0.6926 48.106 -208.89
## - Alcalinity 1 1.2936 48.707 -206.68
## - Malic 1 2.4284 49.842 -202.58
## - Proline 1 7.4167 54.830 -185.60
## - Intensity 1 10.1393 57.553 -176.98
##
## Step: AIC=-211.47
## Alcohol ~ Malic + Ash + Alcalinity + Phenols + Flavanoids + Nonflavanoid +
## Proanthocyanins + Intensity + Hue + OD280 + Proline
##
## Df Sum of Sq RSS AIC
## - Flavanoids 1 0.0021 47.416 -213.46
## - Phenols 1 0.0436 47.457 -213.31
## - Nonflavanoid 1 0.0719 47.485 -213.20
## - Ash 1 0.1264 47.540 -213.00
## - Hue 1 0.1712 47.585 -212.83
## <none> 47.413 -211.47
## - OD280 1 0.6277 48.041 -211.13
## - Proanthocyanins 1 0.7194 48.133 -210.79
## - Alcalinity 1 1.2976 48.711 -208.67
## - Malic 1 2.4293 49.843 -204.58
## - Proline 1 7.6970 55.110 -186.69
## - Intensity 1 10.1400 57.553 -178.97
##
## Step: AIC=-213.46
## Alcohol ~ Malic + Ash + Alcalinity + Phenols + Nonflavanoid +
## Proanthocyanins + Intensity + Hue + OD280 + Proline
##
## Df Sum of Sq RSS AIC
## - Phenols 1 0.0806 47.496 -215.16
## - Nonflavanoid 1 0.0818 47.497 -215.16
## - Ash 1 0.1407 47.556 -214.94
## - Hue 1 0.1823 47.598 -214.78
## <none> 47.416 -213.46
## - OD280 1 0.7335 48.149 -212.73
## - Proanthocyanins 1 0.7625 48.178 -212.62
## - Alcalinity 1 1.3364 48.752 -210.52
## - Malic 1 2.4381 49.854 -206.54
## - Proline 1 7.7137 55.129 -188.63
## - Intensity 1 10.1415 57.557 -180.96
##
## Step: AIC=-215.16
## Alcohol ~ Malic + Ash + Alcalinity + Nonflavanoid + Proanthocyanins +
## Intensity + Hue + OD280 + Proline
##
## Df Sum of Sq RSS AIC
## - Nonflavanoid 1 0.0960 47.592 -216.80
## - Ash 1 0.1828 47.679 -216.48
## - Hue 1 0.2087 47.705 -216.38
## <none> 47.496 -215.16
## - Proanthocyanins 1 0.6820 48.178 -214.62
## - OD280 1 1.2365 48.733 -212.59
## - Alcalinity 1 1.4343 48.931 -211.87
## - Malic 1 2.3885 49.885 -208.43
## - Proline 1 7.9424 55.439 -189.64
## - Intensity 1 10.8271 58.323 -180.61
##
## Step: AIC=-216.8
## Alcohol ~ Malic + Ash + Alcalinity + Proanthocyanins + Intensity +
## Hue + OD280 + Proline
##
## Df Sum of Sq RSS AIC
## - Ash 1 0.1456 47.738 -218.26
## - Hue 1 0.1795 47.772 -218.13
## <none> 47.592 -216.80
## - Proanthocyanins 1 0.6379 48.230 -216.43
## - Alcalinity 1 1.4924 49.085 -213.31
## - OD280 1 1.6328 49.225 -212.80
## - Malic 1 2.3115 49.904 -210.36
## - Proline 1 8.3587 55.951 -190.00
## - Intensity 1 10.8041 58.396 -182.39
##
## Step: AIC=-218.26
## Alcohol ~ Malic + Alcalinity + Proanthocyanins + Intensity +
## Hue + OD280 + Proline
##
## Df Sum of Sq RSS AIC
## - Hue 1 0.2131 47.951 -219.47
## <none> 47.738 -218.26
## - Proanthocyanins 1 0.7079 48.446 -217.64
## - Alcalinity 1 1.5453 49.283 -214.59
## - OD280 1 1.8304 49.568 -213.56
## - Malic 1 2.4795 50.217 -211.25
## - Proline 1 10.5966 58.334 -184.57
## - Intensity 1 11.7108 59.449 -181.21
##
## Step: AIC=-219.47
## Alcohol ~ Malic + Alcalinity + Proanthocyanins + Intensity +
## OD280 + Proline
##
## Df Sum of Sq RSS AIC
## <none> 47.951 -219.47
## - Proanthocyanins 1 0.6583 48.609 -219.04
## - Alcalinity 1 1.5601 49.511 -215.77
## - OD280 1 2.0486 50.000 -214.02
## - Malic 1 2.3147 50.266 -213.07
## - Proline 1 12.4058 60.357 -180.51
## - Intensity 1 13.1338 61.085 -178.38
summary(RLM_Backward)
##
## Call:
## lm(formula = Alcohol ~ Malic + Alcalinity + Proanthocyanins +
## Intensity + OD280 + Proline, data = wine_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.50233 -0.34225 0.00116 0.33005 1.69364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.3332831 0.3943623 28.738 < 2e-16 ***
## Malic 0.1143127 0.0397878 2.873 0.00458 **
## Alcalinity -0.0324405 0.0137533 -2.359 0.01947 *
## Proanthocyanins -0.1296362 0.0846088 -1.532 0.12732
## Intensity 0.1585200 0.0231627 6.844 1.32e-10 ***
## OD280 0.2254528 0.0834109 2.703 0.00757 **
## Proline 0.0011358 0.0001708 6.651 3.76e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5295 on 171 degrees of freedom
## Multiple R-squared: 0.5889, Adjusted R-squared: 0.5745
## F-statistic: 40.83 on 6 and 171 DF, p-value: < 2.2e-16
AIC(RLM_Backward)
## [1] 287.6761
RLM_Stepwise <- step(RLM_vacio, scope = list(lower = RLM_vacio, upper = RLM_Completo), direction = "both")
## Start: AIC=-73.22
## Alcohol ~ 1
##
## Df Sum of Sq RSS AIC
## + Proline 1 48.339 68.315 -166.461
## + Intensity 1 34.823 81.831 -134.328
## + Alcalinity 1 11.227 105.427 -89.231
## + Phenols 1 9.750 106.904 -86.753
## + Magnesium 1 8.554 108.100 -84.774
## + Flavanoids 1 6.542 110.112 -81.491
## + Ash 1 5.220 111.434 -79.367
## + Nonflavanoid 1 2.836 113.818 -75.599
## + Proanthocyanins 1 2.180 114.474 -74.575
## <none> 116.654 -73.218
## + Malic 1 1.039 115.615 -72.811
## + OD280 1 0.611 116.044 -72.152
## + Hue 1 0.602 116.052 -72.139
##
## Step: AIC=-166.46
## Alcohol ~ Proline
##
## Df Sum of Sq RSS AIC
## + Intensity 1 15.237 53.078 -209.384
## + Hue 1 6.194 62.122 -181.378
## + Malic 1 5.756 62.559 -180.128
## + OD280 1 2.151 66.164 -170.157
## + Flavanoids 1 1.020 67.295 -167.140
## <none> 68.315 -166.461
## + Proanthocyanins 1 0.756 67.559 -166.443
## + Ash 1 0.561 67.754 -165.929
## + Nonflavanoid 1 0.256 68.059 -165.129
## + Phenols 1 0.154 68.161 -164.864
## + Alcalinity 1 0.103 68.213 -164.729
## + Magnesium 1 0.043 68.273 -164.573
## - Proline 1 48.339 116.654 -73.218
##
## Step: AIC=-209.38
## Alcohol ~ Proline + Intensity
##
## Df Sum of Sq RSS AIC
## + Malic 1 1.3626 51.715 -212.01
## + Alcalinity 1 1.1296 51.948 -211.21
## + OD280 1 1.0210 52.057 -210.84
## <none> 53.078 -209.38
## + Phenols 1 0.4064 52.672 -208.75
## + Flavanoids 1 0.3531 52.725 -208.57
## + Nonflavanoid 1 0.2931 52.785 -208.37
## + Proanthocyanins 1 0.0946 52.983 -207.70
## + Magnesium 1 0.0175 53.060 -207.44
## + Hue 1 0.0024 53.076 -207.39
## + Ash 1 0.0022 53.076 -207.39
## - Intensity 1 15.2375 68.315 -166.46
## - Proline 1 28.7532 81.831 -134.33
##
## Step: AIC=-212.01
## Alcohol ~ Proline + Intensity + Malic
##
## Df Sum of Sq RSS AIC
## + Alcalinity 1 1.6864 50.029 -215.91
## + OD280 1 1.5339 50.182 -215.37
## + Flavanoids 1 0.9066 50.809 -213.16
## + Phenols 1 0.8266 50.889 -212.88
## + Nonflavanoid 1 0.5808 51.135 -212.02
## <none> 51.715 -212.01
## + Hue 1 0.4207 51.295 -211.47
## + Ash 1 0.0564 51.659 -210.21
## + Proanthocyanins 1 0.0246 51.691 -210.10
## + Magnesium 1 0.0157 51.700 -210.07
## - Malic 1 1.3626 53.078 -209.38
## - Intensity 1 10.8441 62.559 -180.13
## - Proline 1 29.9001 81.616 -132.80
##
## Step: AIC=-215.91
## Alcohol ~ Proline + Intensity + Malic + Alcalinity
##
## Df Sum of Sq RSS AIC
## + OD280 1 1.4198 48.609 -219.04
## + Flavanoids 1 0.7651 49.264 -216.66
## + Phenols 1 0.7151 49.314 -216.48
## <none> 50.029 -215.91
## + Ash 1 0.4364 49.593 -215.47
## + Hue 1 0.3895 49.640 -215.31
## + Nonflavanoid 1 0.2626 49.766 -214.85
## + Proanthocyanins 1 0.0295 50.000 -214.02
## + Magnesium 1 0.0000 50.029 -213.91
## - Alcalinity 1 1.6864 51.715 -212.01
## - Malic 1 1.9193 51.948 -211.21
## - Intensity 1 11.7206 61.750 -180.45
## - Proline 1 19.4861 69.515 -159.36
##
## Step: AIC=-219.04
## Alcohol ~ Proline + Intensity + Malic + Alcalinity + OD280
##
## Df Sum of Sq RSS AIC
## + Proanthocyanins 1 0.6583 47.951 -219.47
## <none> 48.609 -219.04
## + Ash 1 0.2476 48.362 -217.95
## + Hue 1 0.1635 48.446 -217.64
## + Phenols 1 0.0128 48.596 -217.09
## + Nonflavanoid 1 0.0086 48.601 -217.07
## + Flavanoids 1 0.0023 48.607 -217.05
## + Magnesium 1 0.0004 48.609 -217.04
## - OD280 1 1.4198 50.029 -215.91
## - Alcalinity 1 1.5724 50.182 -215.37
## - Malic 1 2.4554 51.065 -212.27
## - Proline 1 12.0473 60.656 -181.63
## - Intensity 1 12.4994 61.109 -180.31
##
## Step: AIC=-219.47
## Alcohol ~ Proline + Intensity + Malic + Alcalinity + OD280 +
## Proanthocyanins
##
## Df Sum of Sq RSS AIC
## <none> 47.951 -219.47
## - Proanthocyanins 1 0.6583 48.609 -219.04
## + Hue 1 0.2131 47.738 -218.26
## + Ash 1 0.1792 47.772 -218.13
## + Flavanoids 1 0.1788 47.772 -218.13
## + Phenols 1 0.1665 47.784 -218.09
## + Nonflavanoid 1 0.0324 47.918 -217.59
## + Magnesium 1 0.0246 47.926 -217.56
## - Alcalinity 1 1.5601 49.511 -215.77
## - OD280 1 2.0486 50.000 -214.02
## - Malic 1 2.3147 50.266 -213.07
## - Proline 1 12.4058 60.357 -180.51
## - Intensity 1 13.1338 61.085 -178.38
summary(RLM_Stepwise)
##
## Call:
## lm(formula = Alcohol ~ Proline + Intensity + Malic + Alcalinity +
## OD280 + Proanthocyanins, data = wine_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.50233 -0.34225 0.00116 0.33005 1.69364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.3332831 0.3943623 28.738 < 2e-16 ***
## Proline 0.0011358 0.0001708 6.651 3.76e-10 ***
## Intensity 0.1585200 0.0231627 6.844 1.32e-10 ***
## Malic 0.1143127 0.0397878 2.873 0.00458 **
## Alcalinity -0.0324405 0.0137533 -2.359 0.01947 *
## OD280 0.2254528 0.0834109 2.703 0.00757 **
## Proanthocyanins -0.1296362 0.0846088 -1.532 0.12732
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5295 on 171 degrees of freedom
## Multiple R-squared: 0.5889, Adjusted R-squared: 0.5745
## F-statistic: 40.83 on 6 and 171 DF, p-value: < 2.2e-16
#modelo sin variables significativas <- John Leal
RLM_propio1 <- lm(Alcohol ~ Proline + Intensity + Malic + OD280)
summary(RLM_propio1)
##
## Call:
## lm(formula = Alcohol ~ Proline + Intensity + Malic + OD280)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4030 -0.3367 -0.0018 0.3764 1.5213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.062e+01 2.650e-01 40.088 < 2e-16 ***
## Proline 1.263e-03 1.613e-04 7.827 4.77e-13 ***
## Intensity 1.477e-01 2.309e-02 6.397 1.43e-09 ***
## Malic 1.012e-01 3.979e-02 2.543 0.0119 *
## OD280 1.731e-01 7.526e-02 2.300 0.0227 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5386 on 173 degrees of freedom
## Multiple R-squared: 0.5698, Adjusted R-squared: 0.5599
## F-statistic: 57.29 on 4 and 173 DF, p-value: < 2.2e-16
AIC(RLM_propio1)
## [1] 291.7699
Es un modelo que tiene un buen comportamiento, más sencillo que los que proponen los algorítmos de R y con buenos resultados para la evaluación.