Packages

library(mosaic)
library(ggformula)

Reading in data

# load data, .txt file from web link 
CollegeMidwest <- read.table("http://www.isi-stats.com/isi/data/chap3/CollegeMidwest.txt", header=TRUE)

#load data, a .csv file from a web link
GSS <- read.csv("https://raw.githubusercontent.com/IJohnson-math/Math138/main/GSS_clean.csv")

# commands to view parts of the data
glimpse(CollegeMidwest)
head(CollegeMidwest)
tail(CollegeMidwest)

Working with Data

# filtering out NA values in the variable self_emp_or_works_for_somebody
GSS <- filter(GSS, !is.na(self_emp_or_works_for_somebody))

Plotting

# one categorical
gf_bar( ~ OnCampus, data=CollegeMidwest, title = "Make Title", caption = "write a caption")

# one quantitative
gf_histogram( ~CumGpa, data=CollegeMidwest, binwidth = 0.05)

Summary statistics

# one categorical
tally(~OnCampus, data=CollegeMidwest)
tally(~OnCampus, data=CollegeMidwest, format="proportion")

# one quantitative
mean(~CumGpa, data=CollegeMidwest, na.rm=TRUE)
median(~CumGpa, data=CollegeMidwest)
sd(~CumGpa, data=CollegeMidwest)
range(~CumGpa, data=CollegeMidwest)

Inference

# one proportion, 
# here p = 0.1 stands for pi=0.1
# options for alternative are two.sided, less
prop.test(~self_emp_or_works_for_somebody, data = GSS, success = "Self-employed", p = 0.1, alternative = "greater")
# without a data file
# n=2261 is the sample size, 232 is the number of successes, p=0.10 is the null hypothesis parameter 
prop.test(232, 2261, p=0.10, alternative = "two.sided", correct=FALSE)
# one quantitative variable, null hypothesis mean mu=40
t.test(~number_of_hours_worked_last_week,  data = GSS, alternative = "two.sided", mu=40)