8.14 Data Template
library(janitor) # Cleanup: clean_names().
library(rattle) # Dataset: weatherAUS.
library(magrittr) # Pipelines: %<>%.
library(randomForest) # Imputation: na.roughfix().
library(dplyr) # Wrangling: rename_all().
dsname <- "weatherAUS"
ds <- get(dsname)
nobs <- nrow(ds)
dim(ds)
vnames <- names(ds)
ds %<>% clean_names(numerals="right")
names(vnames) <- names(ds)
names(ds)
vars <- names(ds)
target <- "rain_tomorrow"
vars <- c(target, vars) %>% unique() %>% rev()
for (v in which(sapply(ds, is.factor))) levels(ds[[v]]) %<>% normVarNames()
risk <- "risk_mm"
id <- c("date", "location")
ignore <- c(risk, id)
vars <- setdiff(vars, ignore)
inputs <- setdiff(vars, target)
form <- formula(paste(target, "~ ."))
# ds[vars] %<>% na.roughfix()
SPLIT <- c(0.7, 0.15, 0.15)
tr <- sample(nobs, SPLIT[1]*nobs)
tu <- nobs %>% seq_len() %>% setdiff(tr) %>% sample(SPLIT[2]*nobs)
te <- nobs %>% seq_len() %>% setdiff(tr) %>% setdiff(tu)
target.tr <- ds %>% slice(tr) %>% pull(target)
target.tu <- ds %>% slice(tu) %>% pull(target)
target.te <- ds %>% slice(te) %>% pull(target)
if (!is.null(risk))
{
risk.tr <- ds %>% slice(tr) %>% pull(risk)
risk.tu <- ds %>% slice(tu) %>% pull(risk)
risk.te <- ds %>% slice(te) %>% pull(risk)
}
Your donation will support ongoing availability and give you access to the PDF version of this book. Desktop Survival Guides include Data Science, GNU/Linux, and MLHub. Books available on Amazon include Data Mining with Rattle and Essentials of Data Science. Popular open source software includes rattle, wajig, and mlhub. Hosted by Togaware, a pioneer of free and open source software since 1984. Copyright © 1995-2022 Graham.Williams@togaware.com Creative Commons Attribution-ShareAlike 4.0