# 1. Ensure CRAN mirror in non-interactive rendersrepos <-getOption("repos")if (is.null(repos) ||is.na(repos["CRAN"]) || repos["CRAN"] =="@CRAN@") {options(repos =c(CRAN ="https://cloud.r-project.org"))}# 2) Declare the packages you use directlyrequired_pkgs <-c("readxl", "writexl", "dplyr", "ggplot2", "earth", "plotmo", "janitor")# 3) Resolve all recursive dependencies (so 'cellranger' and friends are included)ap <- utils::available.packages() # uses the CRAN mirror set abovedeps <- tools::package_dependencies(required_pkgs, db = ap, recursive =TRUE)deps <-unique(unlist(deps, use.names =FALSE))need <-setdiff(c(required_pkgs, deps), rownames(installed.packages()))if (length(need)) {install.packages(need)}# 4) Load only the top-level packages you actually useinvisible(lapply(required_pkgs, library, character.only =TRUE))options(dplyr.summarise.inform =FALSE)set.seed(1234)## Data: Excel Input (and a sample generator)#This analysis expects an Excel file at `data/housing_sample.xlsx`. If it is missing, we generate a realistic sample so you can run end-to-end immediately.
Min. 1st Qu. Median Mean 3rd Qu. Max.
262434 564941 668166 659502 738206 1067306
2 Exploratory plots
Show code
library(ggplot2)ggplot(housing, aes(sqft, sale_price, color = neighborhood)) +geom_point(alpha =0.6) +geom_smooth(method ="loess", se =FALSE) +labs(title ="Price vs Sqft by Neighborhood")
Show code
ggplot(housing, aes(condition, sale_price, fill = condition)) +geom_boxplot(alpha =0.7) +labs(title ="Price Distribution by Condition") +theme(legend.position ="none")
3 Fit MARS (earth)
We fit a MARS model with 10-fold CV model selection.