Packages
library(mosaic)
library(ggformula)
Reading in data
# load data, .txt file from web link.
Temps <- read.table("http://www.isi-stats.com/isi/data/chap2/FemaleTemp.txt", header=TRUE)
#load data, a .csv file from a web link
GSS <- read.csv("https://raw.githubusercontent.com/IJohnson-math/Math138/main/GSS_clean.csv")
# commands to view parts of the data
glimpse(GSS) #used to view the variable names
head(GSS) #used to view the first six rows of data
tail(GSS) #used to view the last six rows of data
Working with Data
# filtering out NA values in the variable self_emp_or_works_for_somebody
GSS <- filter(GSS, !is.na(self_emp_or_works_for_somebody))
Plotting Graphs
# one quantitative
gf_histogram(~number_of_hours_worked_last_week, data=GSS, binwidth=4, title="Time Worked Last Week by a Random Sample of 1381 US Adults", xlab="Time Worked (hours)")
Summary statistics
# one categorical
tally(~works_for, data=GSS22)
tally(~works_for, data=GSS22, format="proportion")
# one quantitative
mean(~years_education, data=GSS22, na.rm=TRUE)
sd(~siblings, data=GSS22, na.rm=TRUE)
Inference
# one proportion,
# here p = 0.111 stands for the parameter pi=0.111
# options for alternative are two.sided, less, greater
prop.test(~degree, data=GSS22, success="less than high school", p=0.111, alternative="less", correct=FALSE)
# without a data file
#The 359 is the success count and n=3544 is the sample size.
#p=0.111 is the null hypothesis value of pi and our alternative hypothesis is less than.
prop.test(359, 3544, p=0.111, alternative="less")
# one quantitative variable, null hypothesis mean mu=40
t.test(~number_of_hours_worked_last_week, data = GSS, alternative = "two.sided", mu=40)