Packages

library(mosaic)
library(ggformula)

Reading in data

# load data, .txt file from web link. 
Temps <- read.table("http://www.isi-stats.com/isi/data/chap2/FemaleTemp.txt", header=TRUE)

#load data, a .csv file from a web link
GSS <- read.csv("https://raw.githubusercontent.com/IJohnson-math/Math138/main/GSS_clean.csv")

# commands to view parts of the data
glimpse(GSS)        #used to view the variable names
head(GSS)           #used to view the first six rows of data
tail(GSS)           #used to view the last six rows of data

Working with Data

# filtering out NA values in the variable self_emp_or_works_for_somebody
GSS <- filter(GSS, !is.na(self_emp_or_works_for_somebody))

Plotting Graphs

# one quantitative
gf_histogram(~number_of_hours_worked_last_week, data=GSS, binwidth=4,  title="Time Worked Last Week by a Random Sample of 1381 US Adults", xlab="Time Worked (hours)")

Summary statistics

# one categorical
tally(~works_for, data=GSS22)
tally(~works_for, data=GSS22, format="proportion")

# one quantitative
mean(~years_education, data=GSS22, na.rm=TRUE)
sd(~siblings, data=GSS22, na.rm=TRUE)

Inference

# one proportion, 
# here p = 0.111 stands for the parameter pi=0.111
# options for alternative are two.sided, less, greater
prop.test(~degree, data=GSS22, success="less than high school", p=0.111, alternative="less", correct=FALSE)

# without a data file
#The 359 is the success count and n=3544 is the sample size.
#p=0.111 is the null hypothesis value of pi and our alternative hypothesis is less than.
prop.test(359, 3544, p=0.111, alternative="less")

# one quantitative variable, null hypothesis mean mu=40
t.test(~number_of_hours_worked_last_week,  data = GSS, alternative = "two.sided", mu=40)