Course Outline
-
segmentLearnosity
-
segmentCKCode
-
ckcode-what-you-learned
list Items Test Book
Book
ckcode ⌲ what-you-learned
require(coursekata)
# use the str() function to see what is in the hate_crimes data frame
# use the str() function to see what is in the hate_crimes data frame
str(hate_crimes)
ex() %>%
check_function("str", not_called_msg = "Did you use the str() function?") %>%
check_arg("object") %>%
check_equal(incorrect_msg = "Did you call str on the hate_crimes data set?")
CK Code: ch11-26
require(coursekata)
# make a plot to help us explore the variation in avg_hatecrimes_per_100k_fbi
# all of these are ways to look at a single continuous variable
# some are more helpful than others
gf_histogram(~avg_hatecrimes_per_100k_fbi, data = hate_crimes)
gf_dhistogram(~avg_hatecrimes_per_100k_fbi, data = hate_crimes)
gf_freqpoly(~avg_hatecrimes_per_100k_fbi, data = hate_crimes)
gf_density(~avg_hatecrimes_per_100k_fbi, data = hate_crimes)
gf_boxplot(avg_hatecrimes_per_100k_fbi ~ 1, data = hate_crimes)
gf_violin(avg_hatecrimes_per_100k_fbi ~ 1, data = hate_crimes)
ex() %>% check_or(
check_function(., "gf_histogram") %>% check_result() %>% check_equal(),
check_function(., "gf_dhistogram") %>% check_result() %>% check_equal(),
check_function(., "gf_freqpoly") %>% check_result() %>% check_equal(),
check_function(., "gf_density") %>% check_result() %>% check_equal(),
check_function(., "gf_boxplot") %>% check_result() %>% check_equal(),
check_function(., "gf_violin") %>% check_result() %>% check_equal()
)
CK Code: ch11-27
require(coursekata)
# Can you arrange the data frame to show you the places with the highest hate crime rates? Can you just print the 6 states with the highest crime rates?
# Can you arrange the data frame to show you the places with the highest hate crime rates? Can you just print the 6 states with the highest crime rates?
# there are multiple ways to approach this task. Here are a few possible solutions:
head(arrange(hate_crimes, desc(avg_hatecrimes_per_100k_fbi)))
hate_crimes <- arrange(hate_crimes, desc(avg_hatecrimes_per_100k_fbi))
head(select(hate_crimes, state, avg_hatecrimes_per_100k_fbi))
ex() %>% {
check_function(., "desc", not_called_msg = "Did you use the desc() function to arrange avg_hatecrimes_per_100k_fbi in descending order?")
check_function(., "arrange", not_called_msg = "Did you use the arrange() function to arrange avg_hatecrimes_per_100k_fbi in descending order?") %>% check_result() %>% check_equal()
check_function(., "head", not_called_msg = "Did you remember to print the first 6 rows of the data frame?")
}
CK Code: ch11-28
require(coursekata)
# make a visualization of hate crimes explained by unemployment
# make a visualization of hate crimes explained by household income
# make a visualization of hate crimes explained by unemployment
gf_point(avg_hatecrimes_per_100k_fbi ~ share_unemp_seas, data = hate_crimes, color = "navy", size = 3) %>%
gf_lm(color = "orange")
# make a visualization of hate crimes explained by household income
gf_point(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes, color = "darkgreen", size = 3) %>%
gf_lm(color = "orange")
ex() %>% check_error()
CK Code: ch11-29
require(coursekata)
# find and print the best-fitting estimates for the unemployment model
# find and print the best-fitting estimates for the income model
# find and print the best-fitting estimates for the unemployment model
lm(avg_hatecrimes_per_100k_fbi ~ share_unemp_seas, data = hate_crimes)
# find and print the best-fitting estimates for the income model
lm(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes)
not_called = "Did you use the lm() function to find and print the best-fitting estimates?"
ex() %>% {
check_function(., "lm", index = 1, not_called_msg = not_called) %>% check_result() %>% check_equal(incorrect_msg = "Did you model avg_hatecrimes_per_100k_fbi as a function of share_unemp_seas in the hate_crimes data frame?")
check_function(., "lm", index = 2, not_called_msg = not_called) %>% check_result() %>% check_equal(incorrect_msg = "Did you model avg_hatecrimes_per_100k_fbi as a function of median_house_inc in the hate_crimes data frame?" )
}
CK Code: ch11-30
require(coursekata)
# this code fits the models
unemp.model <- lm(avg_hatecrimes_per_100k_fbi ~ share_unemp_seas, data = hate_crimes)
income.model <- lm(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes)
# print the supernova table for unemp.model
# print the supernova table for income.model
# this code fits the models
unemp.model <- lm(avg_hatecrimes_per_100k_fbi ~ share_unemp_seas, data = hate_crimes)
income.model <- lm(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes)
# print the supernova table for unemp.model
supernova(unemp.model)
# print the supernova table for income.model
supernova(income.model)
msg = "Did you call supernova() on both unemp.model and income.model?"
ex() %>% {
check_output_expr(., "supernova(unemp.model)")
check_output_expr(., "supernova(income.model)")
}
CK Code: ch11-31
require(coursekata)
hate_crimes <- hate_crimes %>%
filter(complete.cases(avg_hatecrimes_per_100k_fbi, median_house_inc))
set.seed(41)
# We've saved the sample PRE
sample_PRE <- PRE(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes)
# Create a sampling distribution of 1000 randomized PREs
SDoPRE <-
# Run a tally to calculate p-value
tally()
# This code will depict the sampling distribution in a histogram
gf_histogram(~PRE, data = SDoPRE, fill = ~PRE > sample_PRE)
# We've saved the sample PRE
sample_PRE <- PRE(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes)
# Create a sampling distribution of 1000 randomized PREs
SDoPRE <- do(1000) * PRE(shuffle(avg_hatecrimes_per_100k_fbi) ~ median_house_inc, data = hate_crimes)
# NOTE: Best solution is:
# Run a tally to calculate p-value
tally(~PRE > sample_PRE, data = SDoPRE, format = "proportion")
# This code will depict the sampling distribution in a histogram
gf_histogram(~PRE, data = SDoPRE, fill = ~PRE > sample_PRE)
# This longer solution is there for scoring purposes and can be ignored.
# Run a tally to calculate p-value
# tally(~PRE > PRE(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes), data = SDoPRE, format = "proportion")
# This code will depict the sampling distribution in a histogram
# gf_histogram(~PRE, data = SDoPRE, fill = ~PRE > PRE(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes))
eq_fun <- function(x, y) (formula(x) == ~PRE > PRE(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes))
ex() %>% {
check_function(., "do")
check_function(., "PRE", index = 2)
}
ex() %>% check_or(
check_function(., "gf_histogram") %>% {
check_arg(., "data") %>% check_equal()
check_arg(., "object") %>% check_equal()
check_arg(., "fill") %>% check_equal(eval = FALSE, eq_fun = eq_fun)
},
override_solution(., "gf_histogram(~PRE, data = SDoPRE, fill = ~PRE > sample_PRE)") %>%
check_function("gf_histogram") %>%
check_arg("fill") %>% check_equal(eval = FALSE, eq_fun = function (x,y) (formula(x) == ~PRE > sample_PRE))
)
ex() %>% check_or(
check_function(., "tally") %>% {
check_arg(., "data") %>% check_equal()
check_arg(., "format") %>% check_equal()
check_arg(., "x") %>% check_equal(eval = FALSE, eq_fun = eq_fun)
},
override_solution(., 'tally(~PRE > sample_PRE, data = SDoPRE, format = "proportion")') %>%
check_function("tally") %>%
check_arg("x") %>% check_equal(eval = FALSE, eq_fun = function(x,y) (formula(x) == ~PRE > sample_PRE))
)
CK Code: ch11-32
require(coursekata)
hate_crimes <- hate_crimes %>%
filter(complete.cases(avg_hatecrimes_per_100k_fbi, median_house_inc))
set.seed(41)
# We've saved the sample F
sample_F <- fVal(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes)
# Create sampling distribution of randomized Fs
SDoF <-
# Run a tally to calculate p-value
tally()
# This code will depict the sampling distribution in a histogram
gf_histogram(~fVal, data = SDoF, fill = ~fVal > sample_F)
# We've saved the sample F
sample_F <- fVal(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes)
# Create sampling distribution of randomized Fs
SDoF <- do(10) * fVal(shuffle(avg_hatecrimes_per_100k_fbi) ~ median_house_inc, data = hate_crimes)
# NOTE: Best solution is:
# Run a tally to calculate p-value
# tally(~fVal > sample_F, data = SDoF, format = "proportion")
# This code will depict the sampling distribution in a histogram
# gf_histogram(~fVal, data = SDoF, fill = ~fVal > sample_F)
# This longer solution is there for scoring purposes and can be ignored.
# Run a tally to calculate p-value
tally(~fVal > fVal(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes), data = SDoF, format = "proportion")
# This code will depict the sampling distribution in a histogram
gf_histogram(~fVal, data = SDoF, fill = ~fVal > fVal(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes))
eq_fun <- function(x, y) (formula(x) == ~fVal > fVal(avg_hatecrimes_per_100k_fbi ~ median_house_inc, data = hate_crimes))
ex() %>% {
check_function(., "do")
check_function(., "fVal", index = 2)
}
ex() %>% check_or(
check_function(., "gf_histogram") %>% {
check_arg(., "data") %>% check_equal()
check_arg(., "object") %>% check_equal()
check_arg(., "fill") %>% check_equal(eval = FALSE, eq_fun = eq_fun)
},
override_solution(., "gf_histogram(~fVal, data = SDoF, fill = ~fVal > sample_F)") %>%
check_function("gf_histogram") %>%
check_arg("fill") %>% check_equal(eval = FALSE, eq_fun = function(x,y) (formula(x) == ~fVal > sample_F))
)
ex() %>% check_or(
check_function(., "tally") %>% {
check_arg(., "data") %>% check_equal()
check_arg(., "format") %>% check_equal()
check_arg(., "x") %>% check_equal(eval = FALSE, eq_fun = eq_fun)
},
override_solution(., 'tally(~fVal > sample_F, data = SDoF, format = "proportion")') %>%
check_function("tally") %>%
check_arg("x") %>% check_equal(eval = FALSE, eq_fun = function(x,y) (formula(x) == ~fVal > sample_F))
)
CK Code: ch11-33