Course Outline
-
segmentLearnosity
-
segmentCKCode
-
ckcode-chapter-b2-quantifying-error
list Items Test Book
Book
ckcode ⌲ chapter-b2-quantifying-error
require(coursekata)
empty_model <- lm(Thumb ~ NULL, data = Fingers)
Fingers <- Fingers %>% mutate(
Predict = predict(empty_model),
Resid = resid(empty_model)
)
# try running this code
# then modify it to sum up the absolute values of the residuals
abs(Fingers$Resid)
sum(abs(Fingers$Resid))
ex() %>% {
check_function(., "sum") %>% check_result() %>% check_equal()
}
CK Code: B2_Code_Quantifying_01
require(coursekata)
empty_model <- lm(Thumb ~ NULL, data = Fingers)
Fingers <- Fingers %>% mutate(
Predict = predict(empty_model),
Resid = resid(empty_model)
)
# try running this code
# then modify it to sum up the squared residuals
Fingers$Resid^2
sum(Fingers$Resid^2)
ex() %>% {
check_function(., "sum") %>% check_result() %>% check_equal()
}
CK Code: B2_Code_Quantifying_02
require(coursekata)
# this sets the model at a particular value
model <- 60.1
# this calculates the sum of squares from the model
sum((Fingers$Thumb - model)^2)
# this sets the model at a particular value
model <- 60.1
# this calculates the sum of squares from the model
sum((Fingers$Thumb - model)^2)
ex() %>% check_error()
CK Code: B2_Code_Sum_01
require(coursekata)
# create an empty model of Thumb length from Fingers
empty_model <-
# analyze the model with supernova() to get the SS
supernova()
empty_model <- lm(Thumb ~ NULL, data = Fingers)
supernova(empty_model)
ex() %>% {
check_object(., "empty_model") %>% check_equal()
check_output_expr(., "supernova(empty_model)")
}
CK Code: B2_Code_Sum_02
require(coursekata)
empty_model <- lm(Thumb ~ NULL, data = Fingers)
# this creates the empty_model
empty_model <- lm(Thumb ~ NULL, data = Fingers)
# calculate the variance of Thumb from the Fingers data frame
var()
# use supernova() on the empty_model to calculate variance
supernova()
var(Fingers$Thumb)
supernova(empty_model)
ex() %>% check_function("var") %>% check_result() %>% check_equal()
ex() %>% check_function("supernova") %>% check_result() %>% check_equal()
CK Code: B2_Code_Variance_01
require(coursekata)
empty_model <- lm(Thumb ~ NULL, data = Fingers)
# calculate the standard deviation of Thumb from Fingers with sd()
# calculate the standard deviation with sqrt() and var()
# calculate the standard deviation with favstats()
sd(Fingers$Thumb)
sqrt(var(Fingers$Thumb))
favstats(~Thumb, data = Fingers)
ex() %>% {
check_function(., "sd") %>% check_result() %>% check_equal()
check_function(., "sqrt") %>% check_result() %>% check_equal()
check_function(., "favstats") %>% check_result() %>% check_equal()
}
CK Code: B2_Code_Standard_01
require(coursekata)
MindsetMatters$PoundsLost <- MindsetMatters$Wt - MindsetMatters$Wt2
# create an empty model of PoundsLost from MindsetMatters
empty_model <-
# find SS, var, and sd
# there are multiple correct solutions
empty_model <- lm(PoundsLost ~ NULL, data = MindsetMatters)
sum(resid(empty_model)^2)
var(MindsetMatters$PoundsLost)
sd(MindsetMatters$PoundsLost)
ex() %>% {
check_object(., "empty_model") %>% check_equal()
check_output(., 556.7)
check_output(., 7.52)
check_output(., 2.74)
}
CK Code: B2_Code_Standard_02
require(coursekata)
# z-score if distribution 1 were true
(37000 - 35000)/1000
# z-score if distribution 2 were true
(37000 - 35000)/1000
(37000 - 35000)/5000
ex() %>% {
check_output_expr(., "(37000 - 35000)/1000")
check_output_expr(., "(37000 - 35000)/5000", missing_msg="Did you divide by the sd of the second distribution?")
}
CK Code: B2_Code_Z_01
require(coursekata)
# this saves the mean and standard deviation of Thumb
mean <- mean(Fingers$Thumb)
sd <- sd(Fingers$Thumb)
# write code to calculate the z-score for a 65.1 mm Thumb
mean <- mean(Fingers$Thumb)
sd <- sd(Fingers$Thumb)
(65.1 - mean) / sd
ex() %>% {
check_output_expr(., "(65.1 - mean) / sd")
}
CK Code: B2_Code_Using_01
require(coursekata)
# do not remove setup code here even though sample_1 isn't used!
# it is important to call set.seed(1) and rnorm both times so that the tally matches the figures in the text
set.seed(1)
sample_1 <- data.frame(
outcome = rnorm(500, 100, 4),
explanatory = rep(1, 100)
)
sample_2 <- data.frame(
outcome = rnorm(500, 100, 12),
explanatory = rep(2, 100)
)
sample_2$greater_than_110 <- sample_2$outcome > 110
# get a tally of greater_than_110 in sample_2
tally()
# get the tally in proportions
tally()
tally(~greater_than_110, data = sample_2)
tally(~greater_than_110, data = sample_2, format = "proportion")
ex() %>% {
check_function(., "tally", index = 1) %>% check_result() %>% check_equal()
check_function(., "tally", index = 2) %>% check_result() %>% check_equal()
}
CK Code: B2_Code_Modeling_01
require(coursekata)
# modify this to create a boolean variable that records whether a Thumb is greater than 65.1
Fingers$GreaterThan65.1 <-
# modify this to find the proportion of GreaterThan65.1
tally(~ , data = , format = "proportion")
Fingers$GreaterThan65.1 <- Fingers$Thumb > 65.1
tally(~GreaterThan65.1, data = Fingers, format = "proportion")
ex() %>% {
check_object(., "Fingers") %>% check_column("GreaterThan65.1") %>% check_equal()
check_function(., "tally") %>% check_result() %>% check_equal()
}
CK Code: B2_Code_Modeling_02
require(coursekata)
set.seed(1)
# this resamples numbers -3 to 3, 1000 times
var1 <- resample(-3:3, 1000)
# put var1 into a data frame called somedata
somedata <-
var1 <- resample(-3:3, 1000)
somedata <- data.frame(var1)
ex() %>% check_object("somedata") %>% check_equal()
CK Code: B2_Code_ErrorNormal_01
require(coursekata)
set.seed(2)
somedata <- data.frame(var1 = resample(-3:3, 1000))
# Create a histogram of var1 from somedata
# Create a histogram of var1 from somedata
gf_histogram(~ var1, data = somedata)
ex() %>% check_or(.,
check_function(., "gf_histogram") %>% {
check_arg(., "object") %>% check_equal()
check_arg(., "data") %>% check_equal()
},
override_solution(., "gf_histogram(somedata, ~var1)") %>%
check_function("gf_histogram") %>% {
check_arg(., "object") %>% check_equal()
check_arg(., "gformula") %>% check_equal()
},
override_solution(., "gf_histogram(~somedata$var1)") %>%
check_function("gf_histogram") %>%
check_arg("object") %>%
check_equal()
)
CK Code: B2_Code_ErrorNormal_02
require(coursekata)
set.seed(2)
somedata <- 1:10 %>%
purrr::set_names(paste0("var", 1:10)) %>%
purrr::map_df(~resample(-3:3, 1000))
# write code to print out a few lines of somedata
# write code to create individual histograms
# the first one (var1) has been written for you
gf_histogram(~ var1, fill = "red", data = somedata)
# write code to print out a few lines of somedata
head(somedata)
# write code to create individual histograms
# the first one (var1) has been written for you
gf_histogram(~var1, fill = "red", data = somedata)
# colors, if any, do not affect scoring
gf_histogram(~var2, data = somedata)
gf_histogram(~var3, data = somedata)
gf_histogram(~var4, data = somedata)
gf_histogram(~var5, data = somedata)
gf_histogram(~var6, data = somedata)
gf_histogram(~var7, data = somedata)
gf_histogram(~var8, data = somedata)
gf_histogram(~var9, data = somedata)
gf_histogram(~var10, data = somedata)
ex() %>% {
check_function(., "head") %>% check_arg("x") %>% check_equal()
check_function(., "gf_histogram", index = 1) %>% check_arg("object") %>% check_equal()
check_function(., "gf_histogram", index = 2) %>% check_arg("object") %>% check_equal()
check_function(., "gf_histogram", index = 3) %>% check_arg("object") %>% check_equal()
check_function(., "gf_histogram", index = 4) %>% check_arg("object") %>% check_equal()
check_function(., "gf_histogram", index = 5) %>% check_arg("object") %>% check_equal()
check_function(., "gf_histogram", index = 6) %>% check_arg("object") %>% check_equal()
check_function(., "gf_histogram", index = 7) %>% check_arg("object") %>% check_equal()
check_function(., "gf_histogram", index = 8) %>% check_arg("object") %>% check_equal()
check_function(., "gf_histogram", index = 9) %>% check_arg("object") %>% check_equal()
check_function(., "gf_histogram", index = 10) %>% check_arg("object") %>% check_equal()
}
CK Code: B2_Code_ErrorNormal_03
require(coursekata)
set.seed(2)
somedata <- 1:10 %>%
purrr::set_names(paste0("var", 1:10)) %>%
purrr::map_df(~resample(-3:3, 1000))
# run this code
summary(somedata)
summary(somedata)
ex() %>% check_function("summary") %>% check_result() %>% check_equal()
success_msg("Fantastic! On to the next challenge")
CK Code: B2_Code_ErrorNormal_04
require(coursekata)
set.seed(2)
somedata <- 1:10 %>%
purrr::set_names(paste0("var", 1:10)) %>%
purrr::map_df(~resample(-3:3, 1000))
# save the total of the 10 variables to the total column in somedata
# (HINT: The sum function won't work in this case)
somedata$total <-
# this will make a density histogram of the totals
gf_dhistogram(~total, data = somedata, fill = "black")
# save the total of the 10 variables to the total column
somedata$total <- somedata$var1 + somedata$var2 + somedata$var3 + somedata$var4 + somedata$var5 + somedata$var6 + somedata$var7 + somedata$var8 + somedata$var9 + somedata$var10
# this will make a density histogram of the totals
gf_dhistogram(~total, data = somedata)
ex() %>% {
check_object(., "somedata") %>%
check_column("total") %>%
check_equal()
check_or(.,
check_function(., "gf_dhistogram") %>% {
check_arg(., "object") %>% check_equal()
check_arg(., "data") %>% check_equal()
},
override_solution(., "somedata$total <- rowSums(somedata); gf_dhistogram(somedata, ~total)") %>%
check_function("gf_dhistogram") %>% {
# check_arg(., "object") %>% check_equal() # weird bug here...
check_arg(., "gformula") %>% check_equal()
},
override_solution(., "somedata$total <- rowSums(somedata); gf_dhistogram(~somedata$total)") %>%
check_function("gf_dhistogram") %>%
check_arg("object") %>%
check_equal()
)
}
CK Code: B2_Code_ErrorNormal_05
require(coursekata)
set.seed(2)
somedata <- 1:10 %>%
purrr::set_names(paste0("var", 1:10)) %>%
purrr::map_df(~resample(-3:3, 1000)) %>%
mutate(total = var1 + var2 + var3 + var4 + var5 + var6 + var7 + var8 + var9 + var10)
# try changing the bin number to be smaller than 30
gf_dhistogram(~ total, data = somedata, fill = "black", bins = 30)
# try changing the bin number to be larger than 30
gf_dhistogram(~ total, data = somedata, fill = "black", bins = 30)
# try changing the bin number to be smaller than 30
gf_dhistogram(~ total, data = somedata, fill = "black", bins = 10)
# try changing the bin number to be larger than 30
gf_dhistogram(~ total, data = somedata, fill = "black", bins = 75)
ex() %>% check_error()
CK Code: B2_Code_ErrorNormal_06
require(coursekata)
# try running this code
xpnorm(65.1, mean(Fingers$Thumb), sd(Fingers$Thumb))
xpnorm(65.1, mean(Fingers$Thumb), sd(Fingers$Thumb))
ex() %>% check_error()
CK Code: B2_Code_UsingNormal_01
require(coursekata)
simulate_scores <- function(game, n, mean, sd) {
scores <- rnorm(n, mean, sd)
z <- (scores - mean) / sd
interval <- ifelse(z > 0, trunc(1 + z), trunc(z - 1))
data.frame(game = game, scores = scores, z = z, interval = interval, zone = abs(interval))
}
compare_score_distributions <- function(sd = 3500, mean = 35000, n = 1000, ..., .seed = 5) {
set.seed(.seed)
kargle <- simulate_scores("Kargle", 1000, 35000, 5000)
bargle <- simulate_scores("Bargle", 1000, 35000, 1000)
zargle <- simulate_scores("Zargle", n, mean, sd)
games <- vctrs::vec_c(kargle, bargle, zargle)
# combine all zones > 3 into a single "outside 3" zone
games$zone <- ifelse(games$zone > 3, "outside 3", games$zone)
# convert the proportions to cumulative proportions for all except "outside 3"
props <- data.frame(tally(zone ~ game, data = games, format = "proportion"))
props <- purrr::map_dfr(split(props, props$game), function(x) {
x$Freq <- c(cumsum(x$Freq[1:3]), x$Freq[4])
x
})
# re-format the table to be wide (one column per game)
zone_table <- tidyr::pivot_wider(props, names_from = game, values_from = Freq)
gf_histogram(~scores, fill = ~zone, data = games, bins = 160, alpha = .8) %>%
gf_facet_grid(game ~ .) %>%
print()
data.frame(zone_table)
}
# change the standard deviation to whatever you'd like it to be
# try to break the empirical rule!
compare_score_distributions(sd = 3500, mean = 35000, n = 1000)
ex() %>% check_error()
CK Code: B2_Code_Empirical_01
require(coursekata)
# run your code here
CK Code: B2_Code_Review2_01
require(coursekata)
# run your code here
CK Code: B2_Code_Review2_02
require(coursekata)
# run your code here
CK Code: B2_Code_Review2_03