CourseKata - ckcode-chapter-b2-quantifying-error

Items Test Book

Book

ckcode ⌲ chapter-b2-quantifying-error

require(coursekata)

empty_model <- lm(Thumb ~ NULL, data = Fingers)
Fingers <- Fingers %>% mutate(
    Predict = predict(empty_model),
    Resid = resid(empty_model)
)


# try running this code
# then modify it to sum up the absolute values of the residuals
abs(Fingers$Resid)


sum(abs(Fingers$Resid))

ex() %>% {

    check_function(., "sum") %>% check_result() %>% check_equal()
}

CK Code: B2_Code_Quantifying_01

require(coursekata)
empty_model <- lm(Thumb ~ NULL, data = Fingers)
Fingers <- Fingers %>% mutate(
    Predict = predict(empty_model),
    Resid = resid(empty_model)
)


# try running this code
# then modify it to sum up the squared residuals
Fingers$Resid^2

sum(Fingers$Resid^2)

ex() %>% {
    check_function(., "sum") %>% check_result() %>% check_equal()
}

CK Code: B2_Code_Quantifying_02

require(coursekata)

# this sets the model at a particular value
model <- 60.1

# this calculates the sum of squares from the model
sum((Fingers$Thumb - model)^2)

# this sets the model at a particular value
model <- 60.1

# this calculates the sum of squares from the model
sum((Fingers$Thumb - model)^2)

ex() %>% check_error()

CK Code: B2_Code_Sum_01

require(coursekata)

# create an empty model of Thumb length from Fingers
empty_model <-

# analyze the model with supernova() to get the SS
supernova()

empty_model <- lm(Thumb ~ NULL, data = Fingers)
supernova(empty_model)

ex() %>% {
    check_object(., "empty_model") %>% check_equal()
    check_output_expr(., "supernova(empty_model)")
}

CK Code: B2_Code_Sum_02

require(coursekata)
empty_model <- lm(Thumb ~ NULL, data = Fingers)

# this creates the empty_model
empty_model <- lm(Thumb ~ NULL, data = Fingers)

# calculate the variance of Thumb from the Fingers data frame
var()

# use supernova() on the empty_model to calculate variance
supernova()

var(Fingers$Thumb)
supernova(empty_model)

ex() %>% check_function("var") %>% check_result() %>% check_equal()
ex() %>% check_function("supernova") %>% check_result() %>% check_equal()

CK Code: B2_Code_Variance_01

require(coursekata)
empty_model <- lm(Thumb ~ NULL, data = Fingers)

# calculate the standard deviation of Thumb from Fingers with sd()

# calculate the standard deviation with sqrt() and var()

# calculate the standard deviation with favstats()

sd(Fingers$Thumb)
sqrt(var(Fingers$Thumb))
favstats(~Thumb, data = Fingers)

ex() %>% {
    check_function(., "sd") %>% check_result() %>% check_equal()
    check_function(., "sqrt") %>% check_result() %>% check_equal()
    check_function(., "favstats") %>% check_result() %>% check_equal()
}

CK Code: B2_Code_Standard_01

require(coursekata)
MindsetMatters$PoundsLost <- MindsetMatters$Wt - MindsetMatters$Wt2

# create an empty model of PoundsLost from MindsetMatters
empty_model <-

# find SS, var, and sd
# there are multiple correct solutions

empty_model <- lm(PoundsLost ~ NULL, data = MindsetMatters)
sum(resid(empty_model)^2)
var(MindsetMatters$PoundsLost)
sd(MindsetMatters$PoundsLost)

ex() %>% {
    check_object(., "empty_model") %>% check_equal()
    check_output(., 556.7)
    check_output(., 7.52)
    check_output(., 2.74)
}

CK Code: B2_Code_Standard_02

require(coursekata)

# z-score if distribution 1 were true
(37000 - 35000)/1000

# z-score if distribution 2 were true

(37000 - 35000)/1000
(37000 - 35000)/5000

ex() %>% {
    check_output_expr(., "(37000 - 35000)/1000")
    check_output_expr(., "(37000 - 35000)/5000", missing_msg="Did you divide by the sd of the second distribution?")
}

CK Code: B2_Code_Z_01

require(coursekata)

# this saves the mean and standard deviation of Thumb
mean <- mean(Fingers$Thumb)
sd <- sd(Fingers$Thumb)

# write code to calculate the z-score for a 65.1 mm Thumb

mean <- mean(Fingers$Thumb)
sd <- sd(Fingers$Thumb)
(65.1 - mean) / sd

ex() %>% {
    check_output_expr(., "(65.1 - mean) / sd")
}

CK Code: B2_Code_Using_01

require(coursekata)

# do not remove setup code here even though sample_1 isn't used!
# it is important to call set.seed(1) and rnorm both times so that the tally matches the figures in the text
set.seed(1)
sample_1 <- data.frame(
    outcome = rnorm(500, 100, 4),
    explanatory = rep(1, 100)
)
sample_2 <- data.frame(
    outcome = rnorm(500, 100, 12),
    explanatory = rep(2, 100)
)
sample_2$greater_than_110 <- sample_2$outcome > 110

# get a tally of greater_than_110 in sample_2
tally()

# get the tally in proportions
tally()

tally(~greater_than_110, data = sample_2)
tally(~greater_than_110, data = sample_2, format = "proportion")

ex() %>% {
    check_function(., "tally", index = 1) %>% check_result() %>% check_equal()
    check_function(., "tally", index = 2) %>% check_result() %>% check_equal()
}

CK Code: B2_Code_Modeling_01

require(coursekata)

# modify this to create a boolean variable that records whether a Thumb is greater than 65.1
Fingers$GreaterThan65.1 <-

# modify this to find the proportion of GreaterThan65.1
tally(~ , data = , format = "proportion")

Fingers$GreaterThan65.1 <- Fingers$Thumb > 65.1
tally(~GreaterThan65.1, data = Fingers, format = "proportion")

ex() %>% {
    check_object(., "Fingers") %>% check_column("GreaterThan65.1") %>% check_equal()
    check_function(., "tally") %>% check_result() %>% check_equal()
}

CK Code: B2_Code_Modeling_02

require(coursekata)
set.seed(1)

# this resamples numbers -3 to 3, 1000 times
var1 <- resample(-3:3, 1000)

# put var1 into a data frame called somedata
somedata <-

var1 <- resample(-3:3, 1000)
somedata <- data.frame(var1)

ex() %>% check_object("somedata") %>% check_equal()

CK Code: B2_Code_ErrorNormal_01

require(coursekata)

set.seed(2)
somedata <- data.frame(var1 = resample(-3:3, 1000))

# Create a histogram of var1 from somedata

# Create a histogram of var1 from somedata
gf_histogram(~ var1, data = somedata)

ex() %>% check_or(.,
  check_function(., "gf_histogram") %>% {
    check_arg(., "object") %>% check_equal()
    check_arg(., "data") %>% check_equal()
  },
  override_solution(., "gf_histogram(somedata, ~var1)") %>%
    check_function("gf_histogram") %>% {
      check_arg(., "object") %>% check_equal()
      check_arg(., "gformula") %>% check_equal()
    },
  override_solution(., "gf_histogram(~somedata$var1)") %>%
    check_function("gf_histogram") %>%
    check_arg("object") %>%
    check_equal()
)

CK Code: B2_Code_ErrorNormal_02

require(coursekata)

set.seed(2)
somedata <- 1:10 %>%
  purrr::set_names(paste0("var", 1:10)) %>%
  purrr::map_df(~resample(-3:3, 1000))

# write code to print out a few lines of somedata

# write code to create individual histograms
# the first one (var1) has been written for you
gf_histogram(~ var1, fill = "red", data = somedata)

# write code to print out a few lines of somedata
head(somedata)

# write code to create individual histograms
# the first one (var1) has been written for you
gf_histogram(~var1, fill = "red", data = somedata)

# colors, if any, do not affect scoring
gf_histogram(~var2, data = somedata)
gf_histogram(~var3, data = somedata)
gf_histogram(~var4, data = somedata)
gf_histogram(~var5, data = somedata)
gf_histogram(~var6, data = somedata)
gf_histogram(~var7, data = somedata)
gf_histogram(~var8, data = somedata)
gf_histogram(~var9, data = somedata)
gf_histogram(~var10, data = somedata)

ex() %>% {
  check_function(., "head") %>% check_arg("x") %>% check_equal()
  check_function(., "gf_histogram", index = 1) %>% check_arg("object") %>% check_equal()
  check_function(., "gf_histogram", index = 2) %>% check_arg("object") %>% check_equal()
  check_function(., "gf_histogram", index = 3) %>% check_arg("object") %>% check_equal()
  check_function(., "gf_histogram", index = 4) %>% check_arg("object") %>% check_equal()
  check_function(., "gf_histogram", index = 5) %>% check_arg("object") %>% check_equal()
  check_function(., "gf_histogram", index = 6) %>% check_arg("object") %>% check_equal()
  check_function(., "gf_histogram", index = 7) %>% check_arg("object") %>% check_equal()
  check_function(., "gf_histogram", index = 8) %>% check_arg("object") %>% check_equal()
  check_function(., "gf_histogram", index = 9) %>% check_arg("object") %>% check_equal()
  check_function(., "gf_histogram", index = 10) %>% check_arg("object") %>% check_equal()
}

CK Code: B2_Code_ErrorNormal_03

require(coursekata)

set.seed(2)
somedata <- 1:10 %>%
    purrr::set_names(paste0("var", 1:10)) %>%
    purrr::map_df(~resample(-3:3, 1000))

# run this code
summary(somedata)

summary(somedata)

ex() %>% check_function("summary") %>% check_result() %>% check_equal()
success_msg("Fantastic! On to the next challenge")

CK Code: B2_Code_ErrorNormal_04

require(coursekata)

set.seed(2)
somedata <- 1:10 %>%
  purrr::set_names(paste0("var", 1:10)) %>%
  purrr::map_df(~resample(-3:3, 1000))

# save the total of the 10 variables to the total column in somedata
# (HINT: The sum function won't work in this case)
somedata$total <-

# this will make a density histogram of the totals
gf_dhistogram(~total, data = somedata, fill = "black")

# save the total of the 10 variables to the total column
somedata$total <- somedata$var1 + somedata$var2 + somedata$var3 + somedata$var4 + somedata$var5 + somedata$var6 + somedata$var7 + somedata$var8 + somedata$var9 + somedata$var10

# this will make a density histogram of the totals
gf_dhistogram(~total, data = somedata)

ex() %>% {
  check_object(., "somedata") %>%
    check_column("total") %>%
    check_equal()
  check_or(.,
    check_function(., "gf_dhistogram") %>% {
      check_arg(., "object") %>% check_equal()
      check_arg(., "data") %>% check_equal()
    },
    override_solution(., "somedata$total <- rowSums(somedata); gf_dhistogram(somedata, ~total)") %>%
      check_function("gf_dhistogram") %>% {
        # check_arg(., "object") %>% check_equal() # weird bug here...
        check_arg(., "gformula") %>% check_equal()
      },
    override_solution(., "somedata$total <- rowSums(somedata); gf_dhistogram(~somedata$total)") %>%
      check_function("gf_dhistogram") %>%
      check_arg("object") %>%
      check_equal()
  )
}

CK Code: B2_Code_ErrorNormal_05

require(coursekata)

set.seed(2)
somedata <- 1:10 %>%
  purrr::set_names(paste0("var", 1:10)) %>%
  purrr::map_df(~resample(-3:3, 1000)) %>%
  mutate(total = var1 + var2 + var3 + var4 + var5 + var6 + var7 + var8 + var9 + var10)

# try changing the bin number to be smaller than 30
gf_dhistogram(~ total, data = somedata, fill = "black", bins = 30)

# try changing the bin number to be larger than 30
gf_dhistogram(~ total, data = somedata, fill = "black", bins = 30)

# try changing the bin number to be smaller than 30
gf_dhistogram(~ total, data = somedata, fill = "black", bins = 10)

# try changing the bin number to be larger than 30
gf_dhistogram(~ total, data = somedata, fill = "black", bins = 75)

ex() %>% check_error()

CK Code: B2_Code_ErrorNormal_06

require(coursekata)

# try running this code
xpnorm(65.1, mean(Fingers$Thumb), sd(Fingers$Thumb))

xpnorm(65.1, mean(Fingers$Thumb), sd(Fingers$Thumb))ex() %>% check_error()

CK Code: B2_Code_UsingNormal_01

require(coursekata)

simulate_scores <- function(game, n, mean, sd) {
  scores <- rnorm(n, mean, sd)
  z <- (scores - mean) / sd
  interval <- ifelse(z > 0, trunc(1 + z), trunc(z - 1))
  data.frame(game = game, scores = scores, z = z, interval = interval, zone = abs(interval))
}

compare_score_distributions <- function(sd = 3500, mean = 35000, n = 1000, ..., .seed = 5) {
  set.seed(.seed)
  kargle <- simulate_scores("Kargle", 1000, 35000, 5000)
  bargle <- simulate_scores("Bargle", 1000, 35000, 1000)
  zargle <- simulate_scores("Zargle", n, mean, sd)
  games <- vctrs::vec_c(kargle, bargle, zargle)

  # combine all zones > 3 into a single "outside 3" zone
  games$zone <- ifelse(games$zone > 3, "outside 3", games$zone)
  # convert the proportions to cumulative proportions for all except "outside 3"
  props <- data.frame(tally(zone ~ game, data = games, format = "proportion"))
  props <- purrr::map_dfr(split(props, props$game), function(x) {
    x$Freq <- c(cumsum(x$Freq[1:3]), x$Freq[4])
    x
  })
  # re-format the table to be wide (one column per game)
  zone_table <- tidyr::pivot_wider(props, names_from = game, values_from = Freq)

  gf_histogram(~scores, fill = ~zone, data = games, bins = 160, alpha = .8) %>%
    gf_facet_grid(game ~ .) %>%
    print()

  data.frame(zone_table)
}

# change the standard deviation to whatever you'd like it to be
# try to break the empirical rule!
compare_score_distributions(sd = 3500, mean = 35000, n = 1000)

ex() %>% check_error()

CK Code: B2_Code_Empirical_01

require(coursekata)

# run your code here

CK Code: B2_Code_Review2_01

require(coursekata)

# run your code here

CK Code: B2_Code_Review2_02

require(coursekata)

# run your code here

CK Code: B2_Code_Review2_03

ckcode-chapter-b1-simple-model ckcode-chapter-b3-adding-explanatory

Course Outline

Items Test Book

ckcode ⌲ chapter-b2-quantifying-error

Responses

list Items Test Book

ckcode ⌲ chapter-b2-quantifying-error

Items Test Book