setInternet2(TRUE) # solution for https files
download.file("https://sites.google.com/site/statsr4us/intro/software/rcmdr-1/TeachingRatings.rda", "TeachingRatings.rda")
## Warning in download.file("https://sites.google.com/site/statsr4us/intro/
## software/rcmdr-1/TeachingRatings.rda", : downloaded length 6252 != reported
## length 6252
load("TeachingRatings.rda")
setwd("C:/Users/Murtaza/Google Drive/AEBE/Data/ch.06")
names(TeachingRatings)
## [1] "minority" "age" "gender" "credits" "beauty"
## [6] "eval" "division" "native" "tenure" "students"
## [11] "allstudents" "prof"
## Warning: package 'xtable' was built under R version 3.1.3
## Warning: package 'psych' was built under R version 3.1.3
library(quantmod) ## load the package "quantmod"
## Warning: package 'quantmod' was built under R version 3.1.3
## Loading required package: xts
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.1.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.1.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
getSymbols("AAPL") ## get daily Apple stock data from Yahoo Finance
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 165393 != reported length 200
## [1] "AAPL"
dim(AAPL) ## find the sizes of the data downloaded
## [1] 2271 6
getSymbols("AAPL",from="2011-01-01",to="2013-12-31") ## specify the data span
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 55540 != reported length 200
## [1] "AAPL"
y<-diff(log(AAPL$AAPL.Close))
x1<-rbind(mean=mean(y, na.rm=T), sdev=sd(y, na.rm=T))
colnames(x1)<-"descriptive stats" ; x1
## descriptive stats
## mean 0.0007064645
## sdev 0.0177433396
x <- coredata(y[-1,])
h<-hist(x, breaks=20, col="dark grey", xlab="daily returns for Apple, 2011-2013",
main="Histogram with Normal Curve" )
xfit<-seq(min(x),max(x),length=400)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="black", lwd=2)
x=seq(-4,4,length=200)
y=1/sqrt(2*pi)*exp(-x^2/2)
plot(x,y,type="l",lwd=2,col="dark grey")
# Display the Student's t distributions with various
# degrees of freedom and compare to the normal distribution
# http://www.statmethods.net/advgraphs/probability.html
x <- seq(-4, 4, length=100)
hx <- dnorm(x)
degf <- c(1, 3, 8, 30)
colors <- c("red", "blue", "darkgreen", "gold", "black")
labels <- c("df=1", "df=3", "df=8", "df=30", "normal")
plot(x, hx, type="l", lty=2, xlab="x value",
ylab="Density", main="Comparing t with normal distribution",
sub="adapted from http://www.statmethods.net/advgraphs/probability.html")
for (i in 1:4){
lines(x, dt(x,degf[i]), lwd=2, lty=i+2, col=colors[i])
}
legend("topright", inset=.05, title="Distributions",
labels, lwd=2, lty=c(3,4,5,6,2), col=colors)
attach(TeachingRatings)
tab <- xtable(describe(cbind(eval, age, beauty, students, allstudents),
skew=F, ranges=F))
rownames(tab)<- c("teaching evaluation score", "instructor's age", "beauty score",
"students responding to survey","students registered in course")
print(tab, type="html")
vars | n | mean | sd | se | |
---|---|---|---|---|---|
teaching evaluation score | 1 | 463.00 | 4.00 | 0.55 | 0.03 |
instructor’s age | 2 | 463.00 | 48.37 | 9.80 | 0.46 |
beauty score | 3 | 463.00 | 0.00 | 0.79 | 0.04 |
students responding to survey | 4 | 463.00 | 36.62 | 45.02 | 2.09 |
students registered in course | 5 | 463.00 | 55.18 | 75.07 | 3.49 |
library(lattice)
## Warning: package 'lattice' was built under R version 3.1.3
histogram(TeachingRatings$eval, nint=15, aspect=2,
xlab="teaching evaluation score", col=c("dark grey"))
x=seq(1,7,length=200)
y=dnorm(x,mean=3.998,sd=.554)
plot(x,y,type="l",lwd=2,col="dark grey", main="Normal distribution",
xlab="teaching evaluation score", ylab= "density")
mtext("mean = 3.998, sd=0.554")
z.eval<-as.matrix((TeachingRatings$eval-3.998)/.554)
histogram(z.eval, nint=15, aspect=2,
xlab="nomalized beauty score", col=c("dark grey"))
dnorm(4.5,mean=3.998,sd=.554)
## [1] 0.4776437
1-pnorm(4.5,mean=3.998,sd=.554)
## [1] 0.1824316
x=seq(1,7,length=200)
y=dnorm(x,mean=3.998,sd=.554)
plot(x,y,type="l",lwd=2,col="black", main="Normal distribution",
sub="probability = 0.1824",
xlab="teaching evaluation score", ylab= "density")
mtext("probability of obtaining teching evaluation >4.5")
ticks = c(4.5)
axis(side = 1, at = ticks)
x=seq(4.5,7,length=200)
y=dnorm(x,mean=3.998,sd=.554)
polygon(c(4.5,x,7),c(0,y,0),col="gray")
dnorm(4.5,mean=3.998,sd=.554)
## [1] 0.4776437
pnorm(4.5,mean=3.998,sd=.554)
## [1] 0.8175684
x=seq(1,7,length=200)
y=dnorm(x,mean=3.998,sd=.554)
plot(x,y,type="l",lwd=2,col="black", main="Normal distribution",
sub="probability = 0.8176",
xlab="teaching evaluation score", ylab= "density")
mtext("probability of obtaining teching evaluation < 4.5")
ticks = c(4.5)
axis(side = 1, at = ticks)
x=seq(-7, 4.5,length=200)
y=dnorm(x,mean=3.998,sd=.554)
polygon(c(-7, x, 4.5),c(0,y,0),col="gray")
pnorm(4.2,mean=3.998,sd=.554)-pnorm(3.5,mean=3.998,sd=.554)
## [1] 0.4579544
x=seq(2,6,length=200)
y=dnorm(x,mean=3.998,sd=.554)
plot(x,y,type="l",lwd=2,col="black", main="Normal distribution",
sub="probability = 0.46",
xlab="teaching evaluation score", ylab= "density")
mtext("3.5 <= Teaching evaluation <= 4.2")
ticks = c(3.5, 4.2)
axis(side = 1, at = ticks, line =.8)
x=seq(3.5, 4.2,length=200)
y=dnorm(x,mean=3.998,sd=.554)
polygon(c(3.5,x,4.2),c(0,y,0),col="gray")
setInternet2(TRUE) # solution for https files
download.file("https://sites.google.com/site/econometriks/docs/basket.rda", "basket.rda")
load("basket.rda")
basket$player<-as.factor(basket$player)
attach(basket)
tapply(PTS, player,mean);tapply(PTS, player,sd)
## Michael Jordan Wilt Chamberlain
## 29.45333 30.63750
## Michael Jordan Wilt Chamberlain
## 4.763832 10.591813
x=seq(0,60,length=200)
y=dnorm(x,mean=29.45,sd=4.764)
plot(x,y,type="l",lwd=2,col="black", main="",
sub="", xlab="average points scored per game", ylab= "density")
y=dnorm(x,mean=30.63,sd=10.59)
lines(x,y,type="l",lwd=2,col="dark grey", main="",
sub="", xlab=" ", ylab= "")
mtext("Michael \nJordan",1,line=-10,at=40,col="black")
mtext("Wilt \nChamberlain",1,line=-6,at=10,col="blue")
##two tailed
x=seq(-4,4,length=200)
y=dnorm(x,mean=0,sd=1)
plot(x,y,type="l",lwd=2,col="black", main="Two-tailed test",
sub="", xlab=" ", ylab= "density", xaxt="n")
mtext("Normal distribution")
axis(1, c(-4, -3, 0, 3,4), line=0); axis(1, c(-1.96, 1.96), line=0)
x=seq(-4, -1.96, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(-4,x,-1.96),c(0,y,0),col="gray")
x=seq(1.96, 4, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(1.96, x, 4),c(0,y,0),col="gray")
##less than
x=seq(-4,4,length=200)
y=dnorm(x,mean=0,sd=1)
plot(x,y,type="l",lwd=2,col="black", main="One-tailed test",
sub="", xlab=" ", ylab= "density", xaxt="n")
mtext("Normal distribution, left sided")
axis(1, c(-4, -3, 0, 3,4), line=0); axis(1, c(-1.64, 1.64), line=0)
x=seq(-4, -1.64, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(-4,x,-1.64),c(0,y,0),col="gray")
##greater than
x=seq(-4,4,length=200)
y=dnorm(x,mean=0,sd=1)
plot(x,y,type="l",lwd=2,col="black", main="One-tailed test",
sub="", xlab=" ", ylab= "density", xaxt="n")
mtext("Normal distribution, right sided")
axis(1, c(-4, -3, 0, 3,4), line=0); axis(1, c(-1.64, 1.64), line=0)
x=seq(1.64, 4, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(1.64, x, 4.5),c(0,y,0),col="gray")
y=qt(.05, c(30,60,90, 200, 400, 500, 1000, 4000),lower.tail = T)
x= c(30,60,90, 200, 400, 500, 1000, 4000)
plot(x,y, pch=20, col="black", main="Critical values for t (lower tail)",
xlab="degrees of freedom", ylab="critical t values for 5%")
y
## [1] -1.697261 -1.670649 -1.661961 -1.652508 -1.648672 -1.647907 -1.646379
## [8] -1.645235
-1.42>-1.96
prob=pnorm(-1.96,mean=0,sd=1);prob*2
## [1] 0.04999579
prob=pnorm(10.7,mean=12,sd=5.5/sqrt(36));prob*2
## [1] 0.1561377
library(NCStats)
## Loading required package: FSA
## Warning: package 'FSA' was built under R version 3.1.3
##
##
## ############################################
## ## FSA package, version 0.8.4 ##
## ## Derek H. Ogle, Northland College ##
## ## ##
## ## Run ?FSA for documentation. ##
## ## Run citation('FSA') for citation ... ##
## ## please cite if used in publication. ##
## ## ##
## ## See derekogle.com/fishR/ for more ##
## ## thorough analytical vignettes. ##
## ############################################
##
## Attaching package: 'FSA'
## The following object is masked from 'package:psych':
##
## headtail
## Loading required package: car
## Warning: package 'car' was built under R version 3.1.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## Loading required package: FSAdata
## Warning: package 'FSAdata' was built under R version 3.1.3
##
##
## ##############################################
## ## FSAdata package, version 0.3.2 ##
## ## by Derek H. Ogle, Northland College ##
## ## ##
## ## Run ?FSAdata for documentation with ##
## ## search hints to find data for specific ##
## ## types of fisheries analyses. ##
## ##############################################
##
##
## ############################################
## ## NCStats package, version 0.4.2 ##
## ## by Derek H. Ogle, Northland College ##
## ## ##
## ## type ?NCStats for documentation. ##
## ############################################
##
## Attaching package: 'NCStats'
## The following objects are masked from 'package:FSA':
##
## compIntercepts, compSlopes, geomean, geosd
z.test(10.7,mu = 12,sd = 5.5/sqrt(36),alternative = "two.sided")
## One Sample z-test with 10.7
## z = -1.4182, n = 1.000, Std. Dev. = 0.917, SE of the sample mean =
## 0.917, p-value = 0.1561
## alternative hypothesis: true mean is not equal to 12
## 95 percent confidence interval:
## 8.903366 12.496634
## sample estimates:
## mean of 10.7
## 10.7
xbar=10.7; sdev=5.5; mu=12; n=36
z1<-(xbar-mu)/(sdev/sqrt(n));z1
## [1] -1.418182
zvalue=round(z1,2);zvalue
## [1] -1.42
x=seq(-4,4,length=200)
y=dnorm(x,mean=0,sd=1)
plot(x,y,type="l",lwd=2,col="black", main="Comparison of mean test",
sub="", xlab=" ", ylab= "density")
mtext("Normal distribution")
x=seq(-4, -1.96, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(-4,x,-1.96),c(0,y,0),col="gray")
x=seq(1.96, 4, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(1.96, x, 4),c(0,y,0),col="gray")
x=seq(-4,zvalue, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(-4,x, zvalue),c(0,y,0),col=, lwd=3)
ticks = c(zvalue)
axis(side = 1, at = ticks, line=0.3)
## Axes and labels
axis(1, -4:4, labels = round(-4:4*sdev/sqrt(n)+mu,3), line=3,
col="blue",col.ticks="blue",col.axis="blue")
axis(1, zvalue, labels = round(zvalue*sdev/sqrt(n)+mu,1), line=2.2,
col="blue",col.ticks="blue",col.axis="blue")
mtext("raw \ndata",1,line=3,at=-5,col="blue")
mtext("normalized\ndata",1,line=1.3,at=-4.7,col="black")
# LABELS
mylabel2 = bquote(italic(z-value) == .(format(zvalue, digits = 3)))
text(x = 3, y = .4, labels = mylabel2)
mylabel3 = bquote(italic(p-value) == .(format(prob*2, digits = 5)))
text(x = 3, y = .37, labels = mylabel3)
x=seq(-4,4,length=200)
y=dnorm(x,mean=0,sd=1)
plot(x,y,type="l",lwd=2,col="black", main="Comparison of mean test",
sub=" ", xlab=" ", ylab= "density")
mtext("95% confidence interval")
x=seq(-1.96, 1.96, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(-1.96,x,1.96),c(0,y,0),col="gray")
x=seq(-4,zvalue, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(-4,x, zvalue),c(0,y,0),col=, lwd=3)
ticks = c(zvalue)
axis(side = 1, at = ticks, line=0.3)
axis(1, -4:4, labels = round(-4:4*sdev/sqrt(n)+mu,3), line=3,
col="blue",col.ticks="blue",col.axis="blue")
axis(1, zvalue, labels = round(zvalue*sdev/sqrt(n)+mu,3), line=2.2,
col="blue",col.ticks="blue",col.axis="blue")
mtext("raw \ndata",1,line=3,at=-5,col="blue")
mtext("normalized\ndata",1,line=1.3,at=-4.7,col="black")
xbar=6.5; sdev=1.5; mu=7; n=20
prob=pnorm(xbar,mean=mu,sd=sdev/sqrt(n));prob
## [1] 0.06801856
z.test(xbar,mu = mu,sd = sdev/sqrt(n),alternative = "less")
## One Sample z-test with xbar
## z = -1.4907, n = 1.000, Std. Dev. = 0.335, SE of the sample mean =
## 0.335, p-value = 0.06802
## alternative hypothesis: true mean is less than 7
## 95 percent confidence interval:
## -Inf 7.051701
## sample estimates:
## mean of xbar
## 6.5
z1<-(xbar-mu)/(sdev/sqrt(n));z1
## [1] -1.490712
zvalue=round(z1,2);zvalue
## [1] -1.49
x=seq(-4,4,length=200)
y=dnorm(x,mean=0,sd=1)
plot(x,y,type="l",lwd=2,col="black", main="Comparison of mean test",
sub="", xlab=" ", ylab= "density")
mtext("one-tailed test")
x=seq(-4, -1.64, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(-4,x,-1.64),c(0,y,0),col="gray")
x=seq(-4, zvalue, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(-4, x, zvalue),c(0,y,0),col=, lwd=2)
ticks = c(-1.64); axis(side = 1, at = ticks, line=.8)
ticks = c(zvalue); axis(side = 1, at = ticks, line=.05)
axis(1, -4:4, labels = round(-4:4*sdev/sqrt(n)+mu,2), line=3,
col="blue",col.ticks="blue",col.axis="blue")
axis(1, zvalue, labels = round(zvalue*sdev/sqrt(n)+mu,2), line=3,
col="blue",col.ticks="blue",col.axis="blue")
mtext("raw \ndata",1,line=3,at=-5,col="blue")
mtext("normalized\ndata",1,line=1.3,at=-4.7,col="black")
# LABELS
mylabel2 = bquote(italic(z-value) == .(format(zvalue, digits = 3)))
text(x = 3, y = .4, labels = mylabel2)
mylabel3 = bquote(italic(p-value) == .(format(prob, digits = 5)))
text(x = 3, y = .37, labels = mylabel3)
xbar=550; sdev=50; mu=500; n=5
prob=pnorm(550,mean=500,sd=50/sqrt(5));1-prob
## [1] 0.01267366
ztest=z.test(550,mu = 500,sd = 50/sqrt(5),alternative = "greater");ztest
## One Sample z-test with 550
## z = 2.2361, n = 1.000, Std. Dev. = 22.361, SE of the sample mean =
## 22.361, p-value = 0.01267
## alternative hypothesis: true mean is greater than 500
## 95 percent confidence interval:
## 513.22 Inf
## sample estimates:
## mean of 550
## 550
plot(ztest)
z1<-(xbar-mu)/(sdev/sqrt(n));z1
## [1] 2.236068
zvalue=round(z1,2);zvalue
## [1] 2.24
x=seq(-4,4,length=200)
y=dnorm(x,mean=0,sd=1)
plot(x,y,type="l",lwd=2,col="black", main="Comparison of mean test",
sub="", xlab=" ", ylab= "density")
mtext("one-tailed, greater than test")
x=seq(1.64, 4, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(1.64, x, 4),c(0,y,0),col="gray")
x=seq(zvalue, 4, length=200)
y=dnorm(x,mean=0,sd=1)
polygon(c(zvalue, x, 4),c(0,y,0),col=, lwd=3)
ticks = c(zvalue, 1.64)
axis(side = 1, at = ticks, line=.7)
## Axes and labels
axis(1, -4:4, labels = round(-4:4*sdev/sqrt(n)+mu,2), line=3,
col="blue",col.ticks="blue",col.axis="blue")
axis(1, zvalue, labels = round(zvalue*sdev/sqrt(n)+mu,2), line=-6,
col="blue",col.ticks="blue",col.axis="blue")
mtext("raw \ndata",1,line=3,at=-5,col="blue")
mtext("normalized\ndata",1,line=1.3,at=-4.7,col="black")
# LABELS
mylabel2 = bquote(italic(z-test) == .(format(zvalue, digits = 3)))
text(x = 3, y = .4, labels = mylabel2)
mylabel3 = bquote(italic(p-value) == .(format(1-prob, digits = 5)))
text(x = 3, y = .37, labels = mylabel3)
xbar=170000; sdev=25000; mu1=166000; n1=35
t.testx=(xbar-mu1)/(sdev/sqrt(n1)); t.testx
## [1] 0.9465728
prob=pt(t.testx,df=n1-1, lower.tail=F)
pvalue=prob*2 # two tailed prob;
pvalue
## [1] 0.3505358
tvalue=round(t.testx,2);tvalue
## [1] 0.95
tcrit=abs(qt(0.05/2, n1-1)); tcrit ## two-sided critical t value
## [1] 2.032245
x=seq(-4,4,length=200)
y=dt(x,df=n1-1)
plot(x,y,type="l",lwd=2,col="black", main="Comparison of mean test",
sub="", xlab=" ", ylab= "density")
mtext("t distribution")
x=seq(-4, -tcrit, length=200)
y=dt(x,df=n1-1)
polygon(c(-4,x,-tcrit),c(0,y,0),col="gray")
x=seq(tcrit, 4, length=200)
y=dt(x,df=n1-1)
polygon(c(tcrit, x, 4),c(0,y,0),col="gray")
x=seq(tvalue, 4, length=200)
y=dt(x,df=n1-1)
polygon(c(tvalue, x, 4),c(0,y,0),col=, lwd=3)
ticks = c(tvalue)
axis(side = 1, at = ticks)
axis(1, -4:4, labels = round(-4:4*sdev/sqrt(n)+mu1,0), line=3,
col="blue",col.ticks="blue",col.axis="blue")
axis(1, tvalue, labels = round(tvalue*sdev/sqrt(n)+mu1,0), line=.72,
col="blue",col.ticks="blue",col.axis="blue")
mtext("raw \ndata",1,line=3,at=-5,col="blue")
mtext("normalized\ndata",1,line=1.3,at=-4.7,col="black")
# LABELS
mylabel1 = bquote(italic(dof) == .(format(n1-1, digits = 3)))
text(x = 3, y = .4, labels = mylabel1)
mylabel2 = bquote(italic(t-test) == .(format(tvalue, digits = 3)))
text(x = 3, y = .37, labels = mylabel2)
mylabel3 = bquote(italic(p-value) == .(format(pvalue, digits = 5)))
text(x = 3, y = .34, labels = mylabel3)
x=TeachingRatings
t.mean<-tapply(x$eval,x$gender, mean)
t.sd<- tapply(x$eval,x$gender, sd)
round(cbind(mean=t.mean, s.dev=t.sd),2)
## mean s.dev
## male 4.07 0.56
## female 3.90 0.54
t2.ex <- t.test(eval~gender,var.equal = FALSE, data=x)
t2.ex
## Welch Two Sample t-test with eval by gender
## t = 3.2667, df = 425.756, p-value = 0.001176
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.06691754 0.26909088
## sample estimates:
## mean in group male mean in group female
## 4.069030 3.901026
pt(c(3.267), df=425.76, lower.tail=F)*2
## [1] 0.001174933
plot(t2.ex)
x=TeachingRatings
t2 <- t.test(eval~gender,var.equal = FALSE, data=x); t2
## Welch Two Sample t-test with eval by gender
## t = 3.2667, df = 425.756, p-value = 0.001176
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.06691754 0.26909088
## sample estimates:
## mean in group male mean in group female
## 4.069030 3.901026
df.x=as.numeric(t2[2]); df.x
## [1] 425.7558
tvalue=round(as.numeric(t2[1]),2);tvalue
## [1] 3.27
tcrit=abs(qt(0.05/2, df.x)); tcrit ## two-sided critical t value
## [1] 1.965551
x=seq(-4,4,length=200)
y=dt(x,df=df.x)
plot(x,y,type="l",lwd=2,col="black", main="Comparison of mean test",
sub="", xlab=" ", ylab= "density")
mtext("t distribution")
x=seq(-4, -tcrit, length=200)
y=dt(x,df=df.x)
polygon(c(-4,x,-tcrit),c(0,y,0),col="gray")
x=seq(tcrit, 4, length=200)
y=dt(x,df=df.x)
polygon(c(tcrit, x, 4),c(0,y,0),col="gray")
x=seq(tvalue, 4, length=200)
y=dt(x,df=df.x)
polygon(c(tvalue, x, 4),c(0,y,0),lwd=3)
ticks = c(tvalue)
axis(side = 1, at = ticks)
# LABELS
mylabel1 = bquote(italic(dof) == .(format(df.x, digits = 5)))
text(x = 3, y = .4, labels = mylabel1)
mylabel2 = bquote(italic(t-test) == .(format(tvalue, digits = 3)))
text(x = 3, y = .37, labels = mylabel2)
mylabel3 = bquote(italic(p-value) == .(format(t2[3], digits = 3)))
text(x = 3, y = .34, labels = mylabel3)
x=TeachingRatings
t2 <- t.test(eval~gender,var.equal = FALSE, data=x, alternative='greater');
df.x=as.numeric(t2[2]); df.x
## [1] 425.7558
tvalue=round(as.numeric(t2[1]),2);tvalue
## [1] 3.27
tcrit=abs(qt(0.05, df.x)); tcrit ## one-sided critical t value
## [1] 1.64844
x=seq(-4,4,length=200)
y=dt(x,df=df.x)
plot(x,y,type="l",lwd=2,col="black", main="Comparison of mean test",
sub="", xlab=" ", ylab= "density")
mtext("t distribution")
x=seq(tcrit, 4, length=200)
y=dt(x,df=df.x)
polygon(c(tcrit, x, 4),c(0,y,0),col="gray")
x=seq(tvalue, 4, length=200)
y=dt(x,df=df.x)
polygon(c(tvalue, x, 4),c(0,y,0),lwd=3)
ticks = c(tvalue)
axis(side = 1, at = ticks)
ticks = c(round(tcrit,2))
axis(side = 1, at = ticks, line=1)
# LABELS
mylabel1 = bquote(italic(dof) == .(format(df.x, digits = 5)))
text(x = 3, y = .4, labels = mylabel1)
mylabel2 = bquote(italic(t-test) == .(format(tvalue, digits = 3)))
text(x = 3, y = .37, labels = mylabel2)
mylabel3 = bquote(italic(p-value) == .(format(t2[3], digits = 3)))
text(x = 3, y = .34, labels = mylabel3)
x=TeachingRatings
t2 <- t.test(eval~gender,var.equal = FALSE, data=x, alternative='greater'); t2
## Welch Two Sample t-test with eval by gender
## t = 3.2667, df = 425.756, p-value = 0.0005881
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.0832263 Inf
## sample estimates:
## mean in group male mean in group female
## 4.069030 3.901026
plot(t2)
x=TeachingRatings
t2 <- t.test(eval~gender,var.equal = T, data=x); t2
## Two Sample t-test with eval by gender
## t = 3.2499, df = 461, p-value = 0.001239
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.06641797 0.26959045
## sample estimates:
## mean in group male mean in group female
## 4.069030 3.901026
x=TeachingRatings
t2 <- t.test(eval~gender,var.equal = T, data=x); t2
## Two Sample t-test with eval by gender
## t = 3.2499, df = 461, p-value = 0.001239
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.06641797 0.26959045
## sample estimates:
## mean in group male mean in group female
## 4.069030 3.901026
df.x=as.numeric(t2[2]); df.x
## [1] 461
tvalue=round(as.numeric(t2[1]),2);tvalue
## [1] 3.25
tcrit=abs(qt(0.05/2, df.x)); tcrit ## two-sided critical t value
## [1] 1.965123
x=seq(-4,4,length=200)
y=dt(x,df=df.x)
plot(x,y,type="l",lwd=2,col="black", main="Comparison of mean test",
sub="", xlab=" ", ylab= "density")
mtext("t distribution")
x=seq(-4, -tcrit, length=200)
y=dt(x,df=df.x)
polygon(c(-4,x,-tcrit),c(0,y,0),col="gray")
x=seq(tcrit, 4, length=200)
y=dt(x,df=df.x)
polygon(c(tcrit, x, 4),c(0,y,0),col="gray")
x=seq(tvalue, 4, length=200)
y=dt(x,df=df.x)
polygon(c(tvalue, x, 4),c(0,y,0),lwd=3)
ticks = c(tvalue)
axis(side = 1, at = ticks)
# LABELS
mylabel1 = bquote(italic(dof) == .(format(df.x, digits = 5)))
text(x = 3, y = .4, labels = mylabel1)
mylabel2 = bquote(italic(t-test) == .(format(tvalue, digits = 3)))
text(x = 3, y = .37, labels = mylabel2)
mylabel3 = bquote(italic(p-value) == .(format(t2[3], digits = 3)))
text(x = 3, y = .34, labels = mylabel3)
x=TeachingRatings
t2 <- t.test(eval~gender,var.equal = TRUE, data=x, alternative='greater'); t2
## Two Sample t-test with eval by gender
## t = 3.2499, df = 461, p-value = 0.0006194
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.08280296 Inf
## sample estimates:
## mean in group male mean in group female
## 4.069030 3.901026
x=TeachingRatings
t2 <- t.test(eval~gender,var.equal = TRUE, data=x, alternative='greater'); t2
## Two Sample t-test with eval by gender
## t = 3.2499, df = 461, p-value = 0.0006194
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.08280296 Inf
## sample estimates:
## mean in group male mean in group female
## 4.069030 3.901026
df.x=as.numeric(t2[2]); df.x
## [1] 461
tvalue=round(as.numeric(t2[1]),2);tvalue
## [1] 3.25
tcrit=abs(qt(0.05, df.x)); tcrit ## two-sided critical t value
## [1] 1.648166
x=seq(-4,4,length=200)
y=dt(x,df=df.x)
plot(x,y,type="l",lwd=2,col="black", main="Comparison of mean test",
sub="", xlab=" ", ylab= "density")
mtext("t distribution")
x=seq(tcrit, 4, length=200)
y=dt(x,df=df.x)
polygon(c(tcrit, x, 4),c(0,y,0),col="gray")
x=seq(tvalue, 4, length=200)
y=dt(x,df=df.x)
polygon(c(tvalue, x, 4),c(0,y,0),lwd=3)
ticks = c(tvalue)
axis(side = 1, at = ticks)
ticks = c(round(tcrit,2))
axis(side = 1, at = ticks, line=1)
# LABELS
mylabel1 = bquote(italic(dof) == .(format(df.x, digits = 5)))
text(x = 3, y = .4, labels = mylabel1)
mylabel2 = bquote(italic(t-test) == .(format(tvalue, digits = 3)))
text(x = 3, y = .37, labels = mylabel2)
mylabel3 = bquote(italic(p-value) == .(format(t2[3], digits = 3)))
text(x = 3, y = .34, labels = mylabel3)
x1=110000;s1=5000; n1=65
x2=125000;s2=15000; n2=45
# Assuming equal variances
vpool= (s1^2*(n1-1)+s2^2*(n2-1))/(n1+n2-2); vpool
## [1] 106481481
sdev = sqrt(vpool*(n1+n2)/(n1*n2)); sdev
## [1] 2001.108
t = (x1-x2)/sdev ;t
## [1] -7.495849
dof= n1+n2-2; dof
## [1] 108
x<-TeachingRatings
# Gender and eval
x$gen2<-factor(x$gender, levels = c("female", "male"))
cbind(mean.eval=tapply(x$eval, x$gen2,mean),observations=table(x$gen2))
## mean.eval observations
## female 3.901026 195
## male 4.069030 268
plot(x$gen2,x$eval, pch=20)
# Fig. 6.35
t2 <- t.test(eval~gender,var.equal = TRUE, data=x); t2
## Two Sample t-test with eval by gender
## t = 3.2499, df = 461, p-value = 0.001239
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.06641797 0.26959045
## sample estimates:
## mean in group male mean in group female
## 4.069030 3.901026
# Fig. 6.36
summary(lm(eval~gen2, data=x))
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.90103 0.03933 99.19 < 2e-16 ***
## gen2male 0.16800 0.05169 3.25 0.00124 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5492 on 461 degrees of freedom
## Multiple R-squared: 0.0224, Adjusted R-squared: 0.02028
## F-statistic: 10.56 on 1 and 461 DF, p-value: 0.001239
#age as a three factor
x$f.age <- cut(x$age, breaks = 3)
x$f.age <- factor(x$f.age,labels=c("young", "mid age", "old"))
# plot(x$f.age,x$eval, pch=20)
cbind(mean.eval=tapply(x$eval, x$f.age,mean),observations=table(x$f.age))
## mean.eval observations
## young 4.004348 161
## mid age 4.036036 222
## old 3.881250 80
# plot(x$age,x$eval,pch=20)
# Fig. 6.37
summary(lm(eval~f.age, data=x))
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.00435 0.04361 91.831 <2e-16 ***
## f.agemid age 0.03169 0.05727 0.553 0.580
## f.ageold -0.12310 0.07568 -1.626 0.105
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5533 on 460 degrees of freedom
## Multiple R-squared: 0.00997, Adjusted R-squared: 0.005665
## F-statistic: 2.316 on 2 and 460 DF, p-value: 0.09981
# Fig. 6.38
summary(aov(eval~f.age, data=x))
## Df Sum Sq Mean Sq F value Pr(>F)
## f.age 2 1.42 0.7090 2.316 0.0998 .
## Residuals 460 140.82 0.3061
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## beauty
x$f.beauty<-cut(x$beauty, breaks=3)
x$f.beauty<-factor(x$f.beauty, labels=c("low beauty", "average looking", "good looking"))
cbind(mean.eval=tapply(x$eval, x$f.beauty,mean),observations=table(x$f.beauty))
## mean.eval observations
## low beauty 3.945856 181
## average looking 3.988177 203
## good looking 4.144304 79
# Fig. 6.39:
summary(aov(eval~f.beauty, data=x))
## Df Sum Sq Mean Sq F value Pr(>F)
## f.beauty 2 2.2 1.1013 3.618 0.0276 *
## Residuals 460 140.0 0.3044
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t1<-table(x$gender,x$tenure);t1
##
## no yes
## male 52 216
## female 50 145
round(prop.table(t1,1)*100,2)
##
## no yes
## male 19.40 80.60
## female 25.64 74.36
chisq.test(t1, correct=F)
## Pearson's Chi-squared test with t1
## X-squared = 2.5571, df = 1, p-value = 0.1098
r1<-margin.table(t1, 1) # (summed over rows)
c1<-margin.table(t1, 2) # (summed over columns)
r1;c1
##
## male female
## 268 195
##
## no yes
## 102 361
e1<-r1%*%t(c1)/sum(t1);e1
##
## no yes
## male 59.04104 208.959
## female 42.95896 152.041
t2<-(t1-e1)^2/e1;t2;sum(t2)
##
## no yes
## male 0.8396905 0.2372533
## female 1.1540362 0.3260712
## [1] 2.557051
qchisq(.95, df = 1)
## [1] 3.841459
1-pchisq(sum(t2),(length(r1)-1)*(length(c1)-1))
## [1] 0.1098032