Question #4
#a
groups <- c("a", "b", "c")
vars <- c(rnorm(n = 20, mean = 10),
rnorm(n = 20, mean = 20),
rnorm(n = 20, mean = 30))
df <- data.frame(groups,vars)
#b
shuffle <- function(n = 10, means = c(10, 20, 30)) {
var1 <- rnorm(n = n, mean = means[1])
var2 <- rnorm(n = n, mean = means[2])
var3 <- rnorm(n = n, mean = means[3])
avgs <- c(mean(var1), mean(var2), mean(var3))
return(avgs)
}
#c
df2 <- data.frame(rep = NA, a = NA, b = NA, c = NA)
for (i in 1:100) {
rep_data <- shuffle(df)
df2[i, 1:4] <- c(i, rep_data)
}
head(df2)
## rep a b c
## 1 1 10.050854 20.08542 29.78820
## 2 2 10.041449 21.13586 30.50175
## 3 3 10.196622 20.38981 30.79304
## 4 4 10.066933 20.74551 30.03030
## 5 5 9.969958 19.72048 30.37813
## 6 6 9.987650 19.17907 30.31938
#d
qplot(data = df2, x = a, geom = "histogram")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(data = df2, x = b, geom = "histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(data = df2, x = c, geom = "histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#The reshuffled means are not always exactly the original specified mean. However, the reshuffled means very close to the original mean and are normally distributed.