Code Snippets

Download

# simple arithmetic operations
5+3
5-3
5*3
5/3
5^3


(3+4)^2
3+4^2
3+2*4^2
3+2*4+2
(3+2)*(4+2)


5*abs(-3)
sqrt(17)/2


?sqrt


?log


log(100,base=10)
log(100,10)
log(100)


round(50/3)
round(exp(1),digits=4)
signif(50/3,digits=5)

Download

x <- 8
x
x+5
x <- 2*x
x
frog


y <- 3.4
class(y)
str <- "Economic statistics"
class(str)

Download

8 == 3+5
x <- 16
x > 12
xsmall <- (x<=9)
xsmall


x <- 16
(x>12) & (x<=9)
(x>12) | (x<=9)


x <- 16
(x>12)
!(x>12)


x <- 16
y <- 30
((y>2*x)|(y<3*x)) & (abs(x-y)!=10)

Download

str <- "intro to statistics"
str


str <- "intro to statistics"
nchar(str)
toupper(str)
substr(str,3,12)
x <- 6
y <- exp(1)
paste("The value of x is", x, "and the value of y is", round(y,4))
paste("The value of x is", x, "and the value of y is", round(y,4), sep="")
str2 <- paste(str,str)
str2


answers <- "ACBBAD"
grepl("AA", answers)
grepl("BB", answers)
grepl("AAA", answers)
grepl("AAA", answers) | grepl("BBB", answers) | grepl("CCC", answers) | grepl("DDD", answers)

Download

numvec <- c(8,12,5,10,3)
numvec
numvec <- c(numvec,18)
numvec
length(numvec)

answers <- c("A","C","B","B","A","D")
answers

tfvec <- c(TRUE,FALSE,FALSE,TRUE)
tfvec
length(tfvec)

c(8,12,"A")


seq(1,10)
seq(1,10,2)
seq(5,4,-0.1)
1:6
1.2:6
6:1
vec <- c(1:6,seq(10,20,2))
vec


rep(0,5)
rep(1,5)
rep(FALSE,8)
rep("abcd",6)


numvec <- c(8,12,5,10,3,18,7,10,2,8)
length(numvec)
numvec[3]
numvec[2:5]
numvec[(length(numvec)-2):length(numvec)]
numvec[c(4,1,9)]
sqrt(numvec)[2:4]


numvec <- c(8,12,5,10,3,18,7,10,2,8)
min(numvec)
max(numvec)
sort(numvec)
sort(numvec, decreasing = TRUE)
unique(numvec)
sort(unique(numvec))
sum(numvec)
mean(numvec)
cumsum(numvec)


numvec <- c(8,12,5,10,3,18,7,10,2,8)
numvec + 1
numvec + 1:10
sqrt(numvec)
numvec >= 9
prices <- c(1.24,3.12,0.78,2.22,4.57,2.89,4.08,1.83,3.78,2.66)
numvec*prices
revenue <- sum(numvec*prices)
revenue


strvec <- c("ab","cd","ef")
toupper(strvec)
paste(strvec,".",sep="")
strvec=="ab"
logicvec1 <- c(TRUE,FALSE,FALSE,TRUE)
logicvec2 <- c(TRUE,TRUE,FALSE,TRUE)
logicvec1|logicvec2
logicvec1&logicvec2


numvec <- c(8,12,5,10,3,18,7,10,2,8)
prices <- c(1.24,3.12,0.78,2.22,4.57,2.89,4.08,1.83,3.78,2.66)
all(numvec >= 9)
any(numvec >= 9)
ifelse(prices > 3.50, "high price", "low price")
ifelse(prices > 3.50, 3.50, prices)
which(prices > 3.50)
high_prices <- prices[which(prices > 3.50)]
high_prices


sum(numvec >= 9)
sum(prices > 3.50)


mean(numvec >= 9)
mean(prices > 3.50)


set.seed(1234)
x <- runif(1000)
x[1:10]
sum(x<0.3)
mean(x<0.3)
mean((x>0.6)*(x<0.8))

Download

x <- 5
x
x/3

Download

#  if (logical condition) {
#     ... commands ...
#  }


str <- "intro to statistics"

if (nchar(str) > 10) {
  print(paste(str,"is a long string."))
  print(paste(str,"has",nchar(str),"characters."))
}


#  if (logical condition) {
#     ... commands executed if condition is TRUE...
#  } else {
#     ... commands executed if condition is FALSE...
#  }


price <- 25.78

if (price > 20) {
  print("The price is too high.")
  price <- 0.9*price
} else {
  print("The price is not too high.")
}


price <- 25.78

if (price > 20) {
  print("The price is too high.")
  price <- 0.9*price
} else if (price < 10) {
  print("The price is too low.")
  price <- 1.1*price
} else {
  print("The price is not too high or too low.")
}


#  for (var in sequence) {
#     ... commands ...
#  }


sequence_length <- 10
fib_sequence <- rep(0,sequence_length)
fib_sequence[1] <- 1
fib_sequence[2] <- 1

for (i in 3:sequence_length) {
  fib_sequence[i] <- fib_sequence[i-2]+fib_sequence[i-1]
}

print(fib_sequence)


strvec <- c("cow","horse","pig","chicken","goat")

first_letters <- ""

for (str in strvec) {
  first_letters <- paste(first_letters,substr(str,1,1),sep="")
}

print(first_letters)


#  while (logical condition) {
#     ... commands ...
#  }


sales <- c(38,52,24,61,47,18,29,44,41)
total_sales <- 0
idx <- 0

while (total_sales <= 200) {
  idx <- idx + 1
  total_sales <- total_sales + sales[idx]
}

print(paste("It took",idx,"days to reach over 200 sales. There were",total_sales,"sales after",idx,"days."))

Download

triangle_area <- function(base, height) {
  area <- 0.5*base*height
  return(area)
}

triangle_area(10,5)
area


height <- 8
triangle_area(10,5)
height


base_vec <- c(10,15,20)
height_vec <- c(5,8,10)
triangle_area(base_vec,height_vec)


distance <- function(x1, y1, x2 = 0, y2 = 0) {
  return(sqrt((x1-x2)^2+(y1-y2)^2))
}

distance(3,4)
distance(3,4,5,-1)

Download

df <- data.frame(name = c("Amy", "Blake", "Chloe"), age = c(28, 41, 32), employed = c("Yes", "No", "Yes"))

print(df)

mean(df$age)

sapply(df,class)

df$employed <- factor(df$employed)
sapply(df,class)


df$name
df$age[1]
df[1,2]
df[,2]
df[,c("name","employed")]
df[1:2,]
df[df$age>30,]


setwd("/Users/ja8294/Library/CloudStorage/Box-Box/ECO 329 book/data")
exams <- read.csv("exams.csv")
exams[5:9,]
print(paste("Top score on exam #1:",max(exams$exam1)))
print(paste("Top score on exam #2:",max(exams$exam2)))


head(exams)
str(exams)
summary(exams)
names(exams)


exams$avg <- (exams$exam1+exams$exam2)/2
str(exams)
write.csv(exams,file="exams-edited.csv")

Download

hourly_wage <- c(12.50, NA, 10.75, 11.00, NA, NA, 14.80, 13.25)
age <- c(24, 42, 31, 61, 55, 26, 34, 59)
is.na(hourly_wage)
hourly_wage[!is.na(hourly_wage)]
sum(is.na(hourly_wage))
worker_df <- data.frame(wagehr = hourly_wage, age = age)
na.omit(worker_df)


mean(hourly_wage)
mean(hourly_wage, na.rm = TRUE)
max(hourly_wage, na.rm = TRUE)

Download

r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)


install.packages("stringr")


library(stringr)
library(help = stringr)
str_trim("  testing this out ")


x <- 16
sqrt(x)


x <- 16
x <- sqrt(x)
x <- c(x,x)


x <- c(79,16,53,44)
x <- sort(x, decreasing = TRUE)
length(x)


y <- c(6.7,-3.3,4.2)
x <- (1:3)*y
min(x)
x <- (x > 0)
sum(x)


sales <- c(38,52,24,61,47,18,29,44,41)


returns <- "UDUUUDDUDUDUUUUDUDUU"


setwd("/Users/ja8294/Library/CloudStorage/Box-Box/ECO 329 book/data")

exams <- read.csv("exams.csv")


cigdata <- read.csv("cigdata2019.csv")

Download

coin <- c("H","T")
coin
die <- 1:6
die
twocoins <- c("HH","HT","TH","TT")
twocoins

Download

S <- c("YYY","YYN","YNY","YNN","NYY","NYN","NNY","NNN")
A <- c("YYY","YYN")
B <- c("YYN","YNY","NYY")
length(S)
"YYY" %in% S
union(A,B)
intersect(A,B)
setdiff(S,B)
no_purchases <- c("NNN")
intersect(no_purchases,A)
length(intersect(no_purchases,A))

Download

sample(c("H","T"), 10, replace = TRUE)


set.seed(1234)
cointosses <- sample(c("H","T"), 10000, replace = TRUE)
mean(cointosses=="H")


set.seed(1234)

# toss a coin 10,000 times
cointosses <- sample(c("H","T"), 10000, replace = TRUE)

# show the results of the first 10 tosses
cointosses[1:10]

# calculate the cumulative sum of heads
# (e.g. cumul_heads[100] is number of heads through first 100 tosses)
cumul_heads <- cumsum(cointosses=="H")

# calculate the cumulative frequency of heads
# (e.g. freqheads[100] is the frequency (fraction) of heads through first 100 tosses)
freq_heads <- cumul_heads/(1:10000)

# plot the freqheads vector
plot(freq_heads,xlab="Simulation number",ylab="Cumulative frequency of heads",cex=0.5)
abline(h=0.5,col="blue",lty=3)


set.seed(1234)

# toss two coins 10,000 times
coin1tosses <- sample(c("H","T"), 10000, replace=TRUE)
coin2tosses <- sample(c("H","T"), 10000, replace=TRUE)

# calculate the cumulative number of "two heads" occurrences
cumul_twoheads <- cumsum(coin1tosses=="H" & coin2tosses=="H")

# calculate the cumulative frequency of "two heads" occurrences
freq_twoheads <- cumul_twoheads/(1:10000)

# plot the freq_twoheads vector
plot(freq_twoheads,xlab="Simulation number",ylab="Cumulative frequency of two-heads",cex=0.5)
abline(h=0.25,col="blue",lty=3) 


set.seed(1234)
coin1tosses <- sample(c("H","T"), 10000, replace=TRUE)
coin2tosses <- sample(c("H","T"), 10000, replace=TRUE)
mean(coin1tosses=="H" & coin2tosses=="H")


set.seed(1234)

# roll six-sided die 10,000 times
dierolls <- sample(1:6, 10000, replace = TRUE)

# show the results of the first 10 rolls
dierolls[1:10]

# calculate the cumulative number of "six" rolls
cumul_six <- cumsum(dierolls==6)

# calculate the cumulative frequency of "six" rolls
freq_six <- cumul_six/(1:10000)

# plot the freq_six vector
plot(freq_six,xlab="Simulation number",ylab="Cumulative frequency of sixes",cex=0.5)
abline(h=1/6,col="blue",lty=3) 


set.seed(1234)
dierolls <- sample(1:6, 10000, replace = TRUE)
mean(dierolls==6)

Download

factorial(20)/factorial(17)
20*19*18


choose(20,17)

Download

options(scipen=999)


set.seed(1234)

# initialize the number of simulations and number of tosses
num_simulations <- 100000
num_tosses <- 100

# initialize a vector to store the cumulative count of streaks
cumul_streaks <- rep(0, num_simulations)

# initialize a counter for the number of streaks observed
streak_counter <- 0

# loop with simulated coin tosses and check for streak
for (i in 1:num_simulations) {
    # simulate the coin tosses
    tosses <- sample(c("H","T"), num_tosses, replace = TRUE)

    # paste together the toss outcomes into a single string
    toss_string <- paste(tosses, collapse = "")

    # add one to streak counter if streak of five heads occurs
    streak_counter <- streak_counter + grepl("HHHHH", toss_string)

    cumul_streaks[i] <- streak_counter
}

# create a vector of the cumulative frequencies
freq_streaks <- cumul_streaks/(1:num_simulations)

# output the final frequency (approximated probability)
freq_streaks[num_simulations]

# plot the cumulative frequencies against the simulation number
plot(freq_streaks[1000:num_simulations], xlab="Simulation number",
    ylab="Cumulative frequency of five-head streak occurrence", cex=0.5, ylim=c(0.77,0.83))

Download

cps <- read.csv("cps2019.csv", na.strings = "", stringsAsFactors = TRUE)
str(cps)
summary(cps)


sp500 <- read.csv("sp500-monthly-returns.csv")
head(sp500[,c("Date","T","BAC")])

Download

table(cps$lfstatus)
table(cps$lfstatus)/nrow(cps)


# graph-display format (two rows, one column)
par(mfrow = c(2,1))

# bar chart with sample counts
barplot(table(cps$lfstatus), ylim=c(0,3000), main="Bar chart (counts) of labor-force status")

# bar chart with sample proportions
barplot(table(cps$lfstatus)/nrow(cps), ylim=c(0,0.8), main="Bar chart (proportions) of labor-force status")

Download

# graph-display format (two rows, two columns)
par(mfrow = c(2,2))

# histogram of age with bin width = one year, counts
hist(cps$age, breaks=seq(29.5,59.5,1), main="Bin width = 1 yr, counts", xlab="Age")

# histogram of age with bin width = one year, density
hist(cps$age, breaks=seq(29.5,59.5,1), freq=FALSE, main="Bin width = 1 yr, density", xlab="Age")

# histogram of age with bin width = one year, counts
hist(cps$age, breaks=seq(29.5,59.5,2), main="Bin width = 2 yrs, counts", xlab="Age")

# histogram of age with bin width = one year, density
hist(cps$age, breaks=seq(29.5,59.5,2), freq=FALSE, main="Bin width = 2 yrs, density", xlab="Age")



# create subsample of the data of only "Employed" individuals
cpsemployed <- cps[cps$lfstatus=="Employed",]

# show the number of employed individuals
nrow(cpsemployed)

# graph-display format (three rows, two columns)
par(mfrow = c(3,2))

# histogram of weekly earnings with 10 bins (density on y-axis)
hist(cpsemployed$earnwk, breaks=10, freq=FALSE, main="Weekly earnings, 10 bins", xlab="Weekly earnings")

# histogram of weekly earnings with 20 bins (density on y-axis)
hist(cpsemployed$earnwk, breaks=20, freq=FALSE, main="Weekly earnings, 20 bins", xlab="Weekly earnings")

# histogram of weekly earnings with 50 bins (density on y-axis)
hist(cpsemployed$earnwk, breaks=50, freq=FALSE, main="Weekly earnings, 50 bins", xlab="Weekly earnings")

# histogram of weekly earnings with 100 bins (density on y-axis)
hist(cpsemployed$earnwk, breaks=100, freq=FALSE, main="Weekly earnings, 100 bins", xlab="Weekly earnings")

# histogram of weekly earnings with 200 bins (density on y-axis)
hist(cpsemployed$earnwk, breaks=200, freq=FALSE, main="Weekly earnings, 200 bins", xlab="Weekly earnings")

# histogram of weekly earnings with 500 bins (density on y-axis)
hist(cpsemployed$earnwk, breaks=500, freq=FALSE, main="Weekly earnings, 500 bins", xlab="Weekly earnings")


# histogram of weekly earnings with 92 bins, with estimated density overlaid
hist(cpsemployed$earnwk, breaks=92, freq=FALSE, main="", xlab="Weekly earnings")
lines(density(cpsemployed$earnwk), col="blue", lwd=2)

Download

mean(cps$age)
median(cps$age)
mean(cps$earnwk, na.rm = TRUE)
median(cps$earnwk, na.rm = TRUE)


# histogram of weekly earnings with 92 bins (Freedman-Diaconis), with estimated density overlaid
hist(cpsemployed$earnwk, breaks=92, freq=FALSE, main="", xlab="Weekly earnings")
lines(density(cpsemployed$earnwk), col="blue", lwd=2)
abline(v=mean(cpsemployed$earnwk), col="red", lwd=2)
abline(v=median(cpsemployed$earnwk), col="green", lwd=2)
legend("topright", legend=c("Sample median","Sample mean"), col=c("green","red"), lty=c(1,1), lwd=c(2,2))


mean(sp500$T)
median(sp500$T)
mean(sp500$BAC)
median(sp500$BAC)


# graph-display format (two rows, one column)
par(mfrow = c(2,1))

# histograms of T and BAC returns, with density curves
hist(sp500$T, freq=FALSE, breaks=seq(-0.8,0.8,0.02), main="", xlab="Monthly return (T)")
lines(density(sp500$T), col="blue", lwd=2)

hist(sp500$BAC, freq=FALSE, breaks=seq(-0.8,0.8,0.02), main="", xlab="Monthly return (BAC)")
lines(density(sp500$BAC), col="blue", lwd=2)


# sample quartiles of weekly earnings
quantile(cpsemployed$earnwk, probs = c(0.25,0.50,0.75))

# sample quartiles of weekly earnings
quantile(cpsemployed$earnwk, probs = seq(0.1,0.9,0.1))


# histogram of weekly earnings with 92 bins (Freedman-Diaconis), with estimated density overlaid
hist(cpsemployed$earnwk, breaks=92, freq=FALSE, main="Weekly earnings distribution is right-skewed", xlab="Weekly earnings")
lines(density(cpsemployed$earnwk), col="blue", lwd=2)
abline(v=quantile(cpsemployed$earnwk,0.1), col="red", lwd=2)
abline(v=quantile(cpsemployed$earnwk,0.25), col="orange", lwd=2)
abline(v=median(cpsemployed$earnwk), col="blue", lwd=2)
abline(v=quantile(cpsemployed$earnwk,0.75), col="green", lwd=2)
abline(v=quantile(cpsemployed$earnwk,0.9), col="violet", lwd=2)
legend("topright", legend=c("Sample 10% quantile","Sample 25% quantile","Sample median","Sample 75% quantile","Sample 90% quantile"), col=c("red","orange","blue","green","violet"), lty=c(1,1),lwd=c(2,2))

Download

par(mfrow = c(2,2))
s <- seq(-4,+4,0.01)
plot(s, dnorm(s,0, 1), type="l", col="blue", xlab="",ylab="")
lines(s, dnorm(s,0,1.5), col="black", lty=3)
s <- seq(-4,+8,0.01)
plot(s, dnorm(s,4, 1), type="l", col="blue", xlab="",ylab="")
lines(s, dnorm(s,0,1), col="black", lty=3)
s <- seq(-6,+8,0.01)
plot(s, dnorm(s,4, 1), type="l", col="blue", xlab="",ylab="")
lines(s, dnorm(s,0,2), col="black", lty=3)
s <- seq(-4,+10,0.01)
plot(s, dnorm(s,0,1), type="l", col="blue", xlab="",ylab="")
lines(s, dnorm(s,4,2), col="black", lty=3)


# interquartile range of weekly earnings
IQR(cps$earnwk, na.rm = TRUE)

# interquartile range of weekly earnings
IQR(cps$age)


# graph-display format (one row, two columns)
par(mfrow = c(1,2))

# box plot with whiskers and outliers (the default in R)
boxplot(cpsemployed$earnwk, ylab="Weekly earnings", main="Box plot (whiskers and outliers)")

# box plot with whiskers at min and max values
boxplot(cpsemployed$earnwk, range=0, ylab="Weekly earnings", main="Box plot (whiskers at min/max)")


# graph-display format (one row, two columns)
par(mfrow = c(1,2))

# box plot (whiskers and outliers) for AT&T
boxplot(sp500$T, ylab="Monthly return (T)", main="Box plot (whiskers and outliers)",ylim=c(-0.6,0.8))

# box plot (whiskers and outliers) for Bank of America  
boxplot(sp500$BAC, ylab="Monthly return (BAC)", main="Box plot (whiskers and outliers)",ylim=c(-0.6,0.8))


# MAD for AT&T
mean(abs(sp500$T-mean(sp500$T)))

# MAD for Bank of America
mean(abs(sp500$BAC-mean(sp500$BAC)))


# sample variance and sample standard deviation for AT&T
var(sp500$T)
sd(sp500$T)

# sample variance and sample standard deviation for Bank of America
var(sp500$BAC)
sd(sp500$BAC)


cpsunion <- cpsemployed[cpsemployed$unionstatus=="Union",]
cpsnonunion <- cpsemployed[cpsemployed$unionstatus=="Non-union",]

par(mfrow = c(2,1))

hist(cpsunion$earnwk, breaks=seq(0,max(cpsunion$earnwk)+200,200), freq=FALSE, main="Weekly earnings (union)",xlim=c(0,6000),xlab="Weekly earnings")
lines(density(cpsunion$earnwk), col="blue", lwd=2)

hist(cpsnonunion$earnwk, breaks=seq(0,max(cpsnonunion$earnwk)+200,200), freq=FALSE, main="Weekly earnings (non-union)", xlim=c(0,6000),xlab="Weekly earnings")
lines(density(cpsnonunion$earnwk), col="blue", lwd=2)


cpsmale <- cpsemployed[cpsemployed$gender=="Male",]
cpsfemale <- cpsemployed[cpsemployed$gender=="Female",]

par(mfrow = c(2,1))

hist(cpsmale$earnwk, breaks=seq(0,max(cpsmale$earnwk)+200,200), freq=FALSE, main="Weekly earnings (male)",xlim=c(0,6000),xlab="Weekly earnings")
lines(density(cpsmale$earnwk), col="blue", lwd=2)

hist(cpsfemale$earnwk, breaks=seq(0,max(cpsfemale$earnwk)+200,200), freq=FALSE, main="Weekly earnings (female)", xlim=c(0,6000),xlab="Weekly earnings")
lines(density(cpsfemale$earnwk), col="blue", lwd=2)

Download

# graph-display format (one row, three columns)
par(mfrow = c(1,3))

# histogram with bin widths of one hour
hist(cpsemployed$hrslastwk, breaks=seq(0.5,99.5,1), freq=FALSE, main="1-hour bins, hrslastwk", xlab="Hours worked last week")
            
# histogram with bin widths of ten hours
hist(cpsemployed$hrslastwk, breaks=seq(-5,105,10), freq=FALSE, main="10-hour bins, hrslastwk", xlab="Hours worked last week")
            
# box plot with whiskers and outliers
boxplot(cpsemployed$hrslastwk, ylab="Hours worked last week")

Download

par(mfrow = c(2,2))
s <- seq(-2,+10,0.01)
plot(s, dnorm(s,2, 1), type="l", col="blue", xlab="",ylab="",ylim=c(0,1),main=expression(paste(a==0, ", ", b==2)))
lines(s, dnorm(s,4,2), col="red")
legend("topright", legend=c("x","y = 2x"), col=c("blue","red"), lty=c(1,1), inset=0.02)
s <- seq(-2,+10,0.01)
plot(s, dnorm(s,2,1), type="l", col="blue", xlab="",ylab="",ylim=c(0,1),main=expression(paste(a==0, ", ", b==0.5)))
lines(s, dnorm(s,1,0.5), col="red")
legend("topright", legend=c("x","y = 0.5x"), col=c("blue","red"), lty=c(1,1), inset=0.02)
s <- seq(-2,+10,0.01)
plot(s, dnorm(s,2,1), type="l", col="blue", xlab="",ylab="",ylim=c(0,1),main=expression(paste(a==3, ", ", b==1)))
lines(s, dnorm(s,5,1), col="red")
legend("topright", legend=c("x","y = 3+x"), col=c("blue","red"), lty=c(1,1), inset=0.02)
s <- seq(-2,+10,0.01)
plot(s, dnorm(s,2,1), type="l", col="blue", xlab="",ylab="",ylim=c(0,1),main=expression(paste(a==3, ", ", b==0.5)))
lines(s, dnorm(s,4,0.5), col="red")
legend("topright", legend=c("x","y = 3+0.5x"), col=c("blue","red"), lty=c(1,1), inset=0.02)


cps <- read.csv("cps2019.csv", na.strings="", stringsAsFactors=TRUE)
cpsemployed <- subset(cps,cps$lfstatus=="Employed")

nonworkhrs <- 168-cps$hrslastwk

par(mfrow = c(2,1))

# histogram with bin widths of 1 hour
hist(cpsemployed$hrslastwk, breaks=seq(0.5,99.5,1), freq=FALSE, main="", xlab="Hours worked last week")
            
# histogram with bin widths of 1 hour
hist(nonworkhrs, breaks=seq(68.5,168.5,1), freq=FALSE, main="", xlab="Non-working hours last week")

Download

plot(sp500$T, type="l", ylab="Monthly return (T)")


# create a time-series object for AT&T monthly returns
ts_T <- ts(sp500$T, start=c(1991,1), frequency=12)

plot(ts_T, ylab="Monthly return (T)")


# create the time-series objects for monthly returns
ts_T <- ts(sp500$T, start=c(1991,1), frequency=12)
ts_BAC <- ts(sp500$BAC, start=c(1991,1), frequency=12)

plot(ts_T, ylab="Monthly returns", ylim=c(-0.6,0.6))
lines(ts_BAC, col="blue")
legend("topright", c("T","BAC"), lty=1, col=c("black","blue"))


plot(cbind(ts_T,ts_BAC), main="")

Download

# table of joint sample counts for labor-force status and race
table(cps$lfstatus, cps$race)
addmargins(table(cps$lfstatus, cps$race))

# table of joint sample proportions for labor-force status and race
addmargins(table(cps$lfstatus, cps$race))/nrow(cps)


# table of sample proportions of labor-force status conditional on race
prop.table(table(cps$lfstatus, cps$race), margin=2)


# table of sample proportions of race conditional on labor-force status
prop.table(table(cps$lfstatus, cps$race), margin=1)


# create sample count table for race and labor-force status variables
tbl_racelf <- table(cps$race, cps$lfstatus)

# barplot command --- note that categories on x-axis are based upon columns (lfstatus) of the table
barplot(prop.table(tbl_racelf, margin=1), ylim=c(0,0.8), col=c("blue","dodgerblue","lightblue"),
    legend.text=rownames(tbl_racelf), beside=TRUE, main="")


# create sample count table table for race and labor-force status variables
tbl_lfrace <- table(cps$lfstatus, cps$race)

# barplot command --- categories on x-axis are based upon columns (race) of the table
barplot(prop.table(tbl_lfrace, margin=1), ylim=c(0,0.8), col=c("blue","dodgerblue","lightblue"),
    legend.text=rownames(tbl_lfrace), beside=TRUE,
    args.legend = list(x="topleft",inset=0.01), main="") 


# box plot of weekly earnings, by race 
boxplot(cps$earnwk~cps$race, ylab="Weekly earnings", xlab="Race")


# create weekly earnings vectors for the three subsamples, by race
earnwk_black <- cpsemployed[cpsemployed$race=="Black","earnwk"]
earnwk_other <- cpsemployed[cpsemployed$race=="Other","earnwk"]
earnwk_white <- cpsemployed[cpsemployed$race=="White","earnwk"]

# first plot the density of weekly earnings for black individuals
plot(density(earnwk_black), main="", xlab="Weekly earnings")

# overlay the density of weekly earnings for other-race individuals
lines(density(earnwk_other), lty=2)

# overlay the density of weekly earnings for white individuals
lines(density(earnwk_white), lty=3)

# draw the legend
legend("topright", legend=c("Black","Other","White"), lty=c(1,2,3), inset=0.01)

Download

# scatter plot of weekly earnings versus years of education
plot(cps$educ, cps$earnwk, xlab="Education", ylab="Weekly earnings")


# expanded scatter plot, with weekly earnings, years of education, and weekly hours worked
plot(cps[,c("educ","earnwk","hrslastwk")])


# scatter plot of Lowe's returns versus Home Depot returns
plot(sp500$HD, sp500$LOW, xlab="Monthly returns (HD)", ylab="Monthly returns (LOW)")


# expanded scatter plot of monthly returns
plot(sp500[,c("HD","LOW","BAC","WFC")])


tempx <- c(4,3,8,12,0,10,5)
tempy <- c(8,6,10,1,15,3,6) 

plot(tempx,tempy,xlab="x",ylab="y")


cov(sp500$HD, sp500$LOW)


# scatter plot with horizontal/vertical lines drawn at the sample means
plot(sp500$HD, sp500$LOW, main="", xlab="Monthly returns (HD)", ylab="Monthly returns (LOW)")
abline(h=mean(sp500$LOW))
abline(v=mean(sp500$HD))


cov(cpsemployed$educ, cpsemployed$earnwk)


# scatter plot with horizontal/vertical lines drawn at the sample means
plot(cpsemployed$educ, cpsemployed$earnwk, main="", xlab="Education", ylab="Weekly earnings")
abline(h=mean(cpsemployed$earnwk))
abline(v=mean(cpsemployed$educ))


set.seed(1234)

x <- rnorm(100)
y5 <- rnorm(100)
y1 <- 0.4*x + (1-(0.4)^2)*rnorm(100)
y2 <- 0.8*x + (1-(0.8)^2)*rnorm(100)
y3 <- -0.4*x + (1-(0.4)^2)*rnorm(100)
y4 <- -0.8*x + (1-(0.8)^2)*rnorm(100)
y6 <- x

par(mfrow=c(2,3))
plot(y1,x,main=r[xy]~" = +0.4",ylab="y",xlab="x")
plot(y2,x,main=r[xy]~" = +0.8",ylab="y",xlab="x")
plot(y6,x,main=r[xy]~" = +1",ylab="y",xlab="x")
plot(y3,x,main=r[xy]~" = -0.4",ylab="y",xlab="x")
plot(y4,x,main=r[xy]~" = -0.8",ylab="y",xlab="x")
plot(y5,x,main=r[xy]~" = 0",ylab="y",xlab="x")


temp <- read.csv("parabola-zero-correlation.csv")
par(mfrow=c(1,2))
plot(temp[,1],temp[,2],xlab="x",ylab="y")
plot(temp[,1],temp[,2],xlab="x",ylab="y")
abline(h=mean(temp[,2]),lty=3)
abline(v=mean(temp[,1]),lty=3)


cor(cpsemployed$educ, cpsemployed$earnwk)


cor(cpsemployed$hrslastwk, cpsemployed$earnwk)


cor(sp500$HD, sp500$LOW)


cor(sp500[,c("HD","LOW","BAC","WFC","MRO","COP")])


# sample mean of weekly earnings for union employees
mean(cpsemployed[cpsemployed$unionstatus=="Union","earnwk"])

# sample mean of weekly earnings for non-union employees
mean(cpsemployed[cpsemployed$unionstatus=="Non-union","earnwk"])

# sample correlation between union indicator and weekly earnings
union_var <- ifelse(cpsemployed$unionstatus=="Union",1,0)
cor(union_var, cpsemployed$earnwk)


set.seed(1234)

par(mfrow=c(3,2))
x<-rnorm(200)
y1 <- 0.85*x + (1-(0.85)^2)*rnorm(200)
y2 <- -0.85*x + (1-(0.85)^2)*rnorm(200)
y3 <- rnorm(200)
plot(x,y1,xlim=c(-3,3),ylim=c(-3,3),xlab="x",ylab="y",main="Variation in "~x+y~" for positive"~r[xy])
abline(a=-3,b=-1,lty=3)
abline(a=-2,b=-1,lty=3)
abline(a=-1,b=-1,lty=3)
abline(a=0,b=-1,lty=3)
abline(a=1,b=-1,lty=3)
abline(a=2,b=-1,lty=3)
abline(a=3,b=-1,lty=3)
hist(x+y1,breaks=seq(-7.5,7.5,0.5),freq=FALSE,main="Distribution of "~x+y,ylim=c(0,1),xlab="")
axis(side=1,at=seq(-6,6,1),labels=seq(-6,6,1))
plot(x,y3,xlim=c(-3,3),ylim=c(-3,3),xlab="x",ylab="y",main="Variation in"~x+y~" for "~r[xy]~"= 0")
abline(a=-3,b=-1,lty=3)
abline(a=-2,b=-1,lty=3)
abline(a=-1,b=-1,lty=3)
abline(a=0,b=-1,lty=3)
abline(a=1,b=-1,lty=3)
abline(a=2,b=-1,lty=3)
abline(a=3,b=-1,lty=3)
hist(x+y3,breaks=seq(-7.5,7.5,0.5),freq=FALSE,main="Distribution of "~x+y,ylim=c(0,1),xlab="")
axis(side=1,at=seq(-6,6,1),labels=seq(-6,6,1))
plot(x,y2,xlim=c(-3,3),ylim=c(-3,3),xlab="x",ylab="y",main="Variation in"~x+y~" for negative"~r[xy])
abline(a=-3,b=-1,lty=3)
abline(a=-2,b=-1,lty=3)
abline(a=-1,b=-1,lty=3)
abline(a=0,b=-1,lty=3)
abline(a=1,b=-1,lty=3)
abline(a=2,b=-1,lty=3)
abline(a=3,b=-1,lty=3)
hist(x+y2,breaks=seq(-7.5,7.5,0.5),freq=FALSE,main="Distribution of "~x+y,ylim=c(0,1),xlab="")
axis(side=1,at=seq(-6,6,1),labels=seq(-6,6,1))


# graph-display format (two rows, one column)
par(mfrow=c(2,1))

# create vectors of length nine
mean_bac_wfc <- rep(0,9) 
sd_bac_wfc <- rep(0,9)
mean_cop_wfc <- rep(0,9)
sd_cop_wfc <- rep(0,9)

# for loop with nine iterations (nine different weights)
for (i in 1:9) {
    mean_bac_wfc[i] <- mean(0.1*i*sp500$BAC + (1-0.1*i)*sp500$WFC) 
    sd_bac_wfc[i] <- sd(0.1*i*sp500$BAC + (1-0.1*i)*sp500$WFC) 
    mean_cop_wfc[i] <- mean(0.1*i*sp500$COP + (1-0.1*i)*sp500$WFC)
    sd_cop_wfc[i] <- sd(0.1*i*sp500$COP + (1-0.1*i)*sp500$WFC)
}

# plot standard deviation vs mean for BAC-WFC portolios
plot(mean_bac_wfc, sd_bac_wfc, ylim=c(0.07,0.11), ylab="Standard deviation",
    xlab="Mean", main="BAC-WFC Portfolios for Different Weights")
# add labels showing the weights above the points
for (i in 1:9) { text(mean_bac_wfc[i],sd_bac_wfc[i]+0.005,as.character(0.1*i),cex=0.5) }

# plot standard deviation vs mean for COP-WFC portolios
plot(mean_cop_wfc, sd_cop_wfc, ylim=c(0.06,0.10), ylab="Standard deviation",
    xlab="Mean", main="COP-WFC Portfolios for Different Weights")
# add labels showing the weights above the points
for (i in 1:9) { text(mean_cop_wfc[i],sd_cop_wfc[i]+0.005,as.character(0.1*i),cex=0.5) }

Download

temp <- read.csv("parabola-zero-correlation.csv")
plot(temp[11:20,1],temp[11:20,2],xlab="x",ylab="y")


cor(sp500[,c("HD","LOW","BAC","WFC","MRO","COP")], method="kendall")

Download

par(mfrow=c(2,2))

plot(0:1,c(0.5,0.5),type="h",axes=FALSE,main="pmf for fair coin toss",xlab=expression(paste(x[k],"*")),ylab="",xlim=c(0,1),ylim=c(0,1))
title(ylab=expression(paste("Probability  ",p[X],"(",x[k],"*",")")), line=2.5)
axis(1,at=c(0,1))
axis(2,at=seq(0,1,0.1))

plot(1:6,rep(1/6,6),type="h",axes=FALSE,main="pmf for fair die roll",xlab=expression(paste(x[k],"*")),ylab="",xlim=c(1,6),ylim=c(0,1))
title(ylab=expression(paste("Probability  ",p[X],"(",x[k],"*",")")), line=2.5)
axis(1,at=1:6)
axis(2,at=seq(0,1,0.1))

plot(0:3,c(0.512,0.384,0.096,0.008),type="h",axes=FALSE,main="pmf for # of purchases",xlab=expression(paste(x[k],"*")),ylab="",xlim=c(0,3),ylim=c(0,1))
title(ylab=expression(paste("Probability  ",p[X],"(",x[k],"*",")")), line=2.5)
axis(1,at=0:3)
axis(2,at=seq(0,1,0.1))

temp <- 1:20
probtemp <- 0.2*(0.8^(temp-1))
plot(1:20,probtemp,type="h",axes=FALSE,main="pmf for # visitors until purchase",xlab=expression(paste(x[k],"*")),ylab="",xlim=c(1,20),ylim=c(0,1))
title(ylab=expression(paste("Probability  ",p[X],"(",x[k],"*",")")), line=2.5)
axis(1,at=1:20)
axis(2,at=seq(0,1,0.1))


par(mfrow=c(1,1))

plot(-1:2,c(0,0.5,1,1),type="s",axes=FALSE,lty=3,main="",xlab=expression(paste(x[k],"*")),ylab="",xlim=c(-1,2),ylim=c(0,1))
title(ylab=expression(paste(F[X],"(",x[k],"*",")")), line=2.5)
axis(1,at=c(0,1))
axis(2,at=seq(0,1,0.1))
points(0,0,pch=1)
points(0,0.5,pch=16)
points(1,0.5,pch=1)
points(1,1,pch=16)
segments(-1,0,0,0)
segments(0,0.5,1,0.5)
segments(1,1,2,1)


par(mfrow=c(1,1))

plot(0:7,c(0,1/6,2/6,3/6,4/6,5/6,1,1),type="s",axes=FALSE,main="",xlab=expression(paste(x[k],"*")),ylab="",xlim=c(0,7),ylim=c(0,1),lty=3)
title(ylab=expression(paste(F[X],"(",x[k],"*",")")), line=2.5)
axis(1,at=1:6)
axis(2,at=seq(0,1,0.1))
points(1,0,pch=1)
points(1,1/6,pch=16)
points(2,1/6,pch=1)
points(2,2/6,pch=16)
points(3,2/6,pch=1)
points(3,3/6,pch=16)
points(4,3/6,pch=1)
points(4,4/6,pch=16)
points(5,4/6,pch=1)
points(5,5/6,pch=16)
points(6,5/6,pch=1)
points(6,1,pch=16)
segments(0,0,1,0)
segments(1,1/6,2,1/6)
segments(2,2/6,3,2/6)
segments(3,3/6,4,3/6)
segments(4,4/6,5,4/6)
segments(5,5/6,6,5/6)
segments(6,1,7,1)


par(mfrow=c(1,1))

plot(-1:4,c(0,0.512,0.896,0.992,1,1),type="s",axes=FALSE,main="",xlab=expression(paste(x[k],"*")),ylab="",xlim=c(-1,4),ylim=c(0,1),lty=3)
title(ylab=expression(paste(F[X],"(",x[k],"*",")")), line=2.5)
axis(1,at=0:3)
axis(2,at=seq(0,1,0.1))
points(0,0,pch=1)
points(0,0.512,pch=16)
points(1,0.512,pch=1)
points(1,0.896,pch=16)
points(2,0.896,pch=1)
points(2,0.992,pch=16)
points(3,0.992,pch=1)
points(3,1,pch=16)
segments(-1,0,0,0)
segments(0,0.512,1,0.512)
segments(1,0.896,2,0.896)
segments(2,0.992,3,0.992)
segments(3,1,4,1)

Download

set.seed(1234)

# initialize the number of simulations
num_simulations <- 5000

# roll a fair six-sided die for all simulations
dierolls <- sample(1:6, num_simulations, replace=TRUE)

# graph-display format (two rows, two columns)
par(mfrow=c(2,2))

# plot the sample frequency of a "6" being rolled
plot(cumsum(dierolls==6)/(1:num_simulations), main="Sample proportion of 6's",
    xlab="", ylab="", ylim=c(0,1), cex=0.5)
abline(h=1/6,col="red")

# plot the sample mean of die rolls
plot(cumsum(dierolls)/(1:num_simulations), main="Sample mean of die rolls",
    xlab="", ylab="", ylim=c(1,6), cex=0.5)
abline(h=3.5,col="red")

# plot the sample variance of die rolls
tempvar <- rep(0,num_simulations)
for (i in 2:num_simulations) {
  tempavg <- cumsum(dierolls)[i]/i
  tempvar[i] <- sum((dierolls[1:i]-tempavg)^2)/(i-1)
}
plot(tempvar[2:num_simulations], main="Sample variance of die rolls",
    xlab="", ylab="", ylim=c(2,4), cex=0.5)
abline(h=35/12,col="red")

# plot the sample standard deviation of die rolls
plot(sqrt(tempvar[2:num_simulations]), main="Sample standard deviation of die rolls",
    xlab="", ylab="", ylim=c(1.5,2.5), cex=0.5)
abline(h=sqrt(35/12),col="red")

Download

par(mfrow=c(1,1))
curve(x*(1-x),0,1,main="",xlab=expression(pi),ylab=("Variance"))
abline(v=0.5,lty=3)

Download

dbinom(3,10,0.2)
dbinom(4,10,0.2)


dbinom(0:10,10,0.2)
pbinom(0:10,10,0.2)


set.seed(1234)
rbinom(50,10,0.2)


plot(0:20, dbinom(0:20,20,0.6), type="h", axes=FALSE, main="pmf for # of days that stock goes up",
    xlab="", ylab="Probability", xlim=c(0,20), ylim=c(0,0.2))
axis(1, at=0:20)
axis(2, at=seq(0,0.2,0.02))


par(mfrow=c(2,2))
plot(0:10,dbinom(0:10,10,0.1),type="h",main="Binomial(10,0.1) pmf",xlab="",ylab="Probability")
plot(0:10,dbinom(0:10,10,0.2),type="h",main="Binomial(10,0.2) pmf",xlab="",ylab="Probability")
plot(0:10,dbinom(0:10,10,0.5),type="h",main="Binomial(10,0.5) pmf",xlab="",ylab="Probability")
plot(0:100,dbinom(0:100,100,0.1),type="h",main="Binomial(100,0.1) pmf",xlab="",ylab="Probability")


1-pbinom(100,200,0.52)


1-pbinom(500,1000,0.52)


par(mfrow=c(2,1))
plot((0:200)/200,dbinom(0:200,200,0.52)/200,type="h",main="pmf for P (200-voter poll)",xlab="",ylab="Probability")
abline(v=0.5,col="red",lwd=2)
abline(v=0.52,col="blue",lwd=2)
plot((0:1000)/1000,dbinom(0:1000,1000,0.52)/1000,type="h",main="pmf for P (1000-voter poll)",xlab="",ylab="Probability")
abline(v=0.5,col="red",lwd=2)
abline(v=0.52,col="blue",lwd=2)

Download

dgeom(0:10,0.2)
pgeom(0:10,0.2)


# graph-display format (two rows, two columns)
par(mfrow=c(2,2))

# plots of four different geometric pmf's
plot(0:20, dgeom(0:20,0.1), type="h", main="Geo(0.1) pmf", xlab="", ylab="Probability", ylim=c(0,0.8))
plot(0:20, dgeom(0:20,0.2), type="h", main="Geo(0.2) pmf", xlab="", ylab="Probability", ylim=c(0,0.8))
plot(0:20, dgeom(0:20,0.5), type="h", main="Geo(0.5) pmf", xlab="", ylab="Probability", ylim=c(0,0.8))
plot(0:20, dgeom(0:20,0.8), type="h", main="Geo(0.8) pmf", xlab="", ylab="Probability", ylim=c(0,0.8))

Download

dnbinom(0:2,3,0.2)


# graph-display format (three rows, one column)
par(mfrow=c(3,1))

# plots of three different negative binomial pmf's
plot(0:50, dnbinom(0:50,3,0.2), type="h", main="NegBin(3,0.2) pmf", xlab="", ylab="Probability", ylim=c(0,0.06))
plot(0:50, dnbinom(0:50,3,0.1), type="h", main="NegBin(3,0.1) pmf", xlab="", ylab="Probability", ylim=c(0,0.06))
plot(0:50, dnbinom(0:50,4,0.2), type="h", main="NegBin(4,0.2) pmf", xlab="", ylab="Probability", ylim=c(0,0.06))

Download

# graph-display format (two rows, one column)
par(mfrow=c(2,1))

# plots of two different Poisson pmf's
plot(0:10, dpois(0:10,2), type="h", main="Poisson(2) pmf", xlab="Number of patent applications", ylab="Probability")
plot(0:10, dpois(0:10,4), type="h", main="Poisson(4) pmf", xlab="Number of patent applications", ylab="Probability")


plot(0:40, dpois(0:40,20), type="h", main="", xlab="Customers between 10am and 11am", ylab="Probability")


plot(0:6, dpois(0:6,1/3), type="h", main="", xlab="Customers during a one-minute interval", ylab="Probability")

Download

par(mfrow=c(2,1))

x_beta <- seq(0,1,by=0.01)
y_beta <- dbeta(x_beta,shape1=2,shape2=7)
plot(x_beta,y_beta,type="l",yaxs="i",ylim=c(0,3.5),ylab="",xlab="",xaxt="n",yaxt="n",main="pdf example")
lines(x=c(0.3,0.3),y=c(0,dbeta(0.3,shape1=2,shape2=7)),lty=3)
lines(x=c(0.5,0.5),y=c(0,dbeta(0.5,shape1=2,shape2=7)),lty=3)
mtext("a",side=1,at=0.3)
mtext("b",side=1,at=0.5)
mtext(expression(f[X](x)),side=2,at=3.5)

plot(x_beta,y_beta,type="l",yaxs="i",ylim=c(0,3.5),ylab="",xlab="",xaxt="n",yaxt="n",main="pdf example with interval probability")
xshade <- seq(0.3,0.5,length=100)
yshade <- dbeta(xshade,shape1=2,shape2=7)
polygon(c(0.3,xshade,0.5),c(0,yshade,0),col="blue")
mtext("a",side=1,at=0.3)
mtext("b",side=1,at=0.5)
mtext(expression(f[X](x)),side=2,at=3.5)


temp <- seq(-1,2,0.01)
plot(temp,dunif(temp),type='l',main="",ylab=expression(f[X](x)),xlab="x")


temp <- seq(3,12,0.01)
plot(temp,dunif(temp,5,10),type='l',main="",ylab=expression(f[X](x)),xlab="x")


dunif(4,5,10)
dunif(6,5,10)
dunif(8,5,10)
set.seed(1234)
runif(20,5,10)


# define a function that returns the pdf of a triangular distribution
tri_pdf <- function(x) {
    return( (x>=0)*(x<=1)*x + (x>1)*(x<=2)*(2-x) )
}

# create a grid of values to evaluate the pdf
temp <- seq(-1,3,0.01)

# graph the pdf
plot(temp, tri_pdf(temp), type='l', main="", ylab=expression(f[X](x)), xlab="x")

Download

par(mfrow=c(3,1))

x_beta <- seq(0,1,by=0.01)
y_beta <- dbeta(x_beta,shape1=2,shape2=7)
plot(x_beta,y_beta,type="l",xaxs="i",yaxs="i",ylim=c(0,3.5),xaxt="n",xlab="",ylab=expression(f[X](x)),main="pdf example")
xshade <- seq(0,0.3,length=100)
yshade <- dbeta(xshade,shape1=2,shape2=7)
polygon(c(0,xshade,0.3),c(0,yshade,0),col="blue")
mtext("a",side=1,at=0.3)
mtext("b",side=1,at=0.4)

plot(x_beta,y_beta,type="l",xaxs="i",yaxs="i",ylim=c(0,3.5),xaxt="n",xlab="",ylab=expression(f[X](x)),main="pdf example")
xshade <- seq(0,0.4,length=100)
yshade <- dbeta(xshade,shape1=2,shape2=7)
polygon(c(0,xshade,0.4),c(0,yshade,0),col="blue")
mtext("a",side=1,at=0.3)
mtext("b",side=1,at=0.4)

y_beta <- pbeta(x_beta,shape1=2,shape2=7)
plot(x_beta,y_beta,type="l",xaxs="i",yaxs="i",xaxt="n",yaxt="n",xlab="",ylab=expression(F[X](x)),main="cdf example")
axis(2,seq(0,1.2,by=0.2),las=2)
lines(x=c(0.3,0.3),y=c(0,pbeta(0.3,shape1=2,shape2=7)),lty=3)
lines(x=c(0,0.3),y=c(pbeta(0.3,shape1=2,shape2=7),pbeta(0.3,shape1=2,shape2=7)),lty=3)
lines(x=c(0.4,0.4),y=c(0,pbeta(0.4,shape1=2,shape2=7)),lty=3)
lines(x=c(0,0.4),y=c(pbeta(0.4,shape1=2,shape2=7),pbeta(0.4,shape1=2,shape2=7)),lty=3)
mtext("a",side=1,at=0.3)
mtext("b",side=1,at=0.4)


temp <- seq(-1,2,0.01)
plot(temp,punif(temp),type='l',main="",ylab=expression(F[X](x)),xlab="x")


punif(0.5)-punif(0.2)


# define a function that returns the cdf of a triangular distribution
tri_cdf <- function(x) {
    return( (x>=0)*(x<=1)*(x^2/2) + (x>1)*(x<=2)*(1-((2-x)^2/2)) + (x>2)*1 )
}

# create a grid of values to evaluate the pdf
temp <- seq(-1,3,0.01)

# graph the cdf
plot(temp, tri_cdf(temp), type='l', main="", ylab=expression(F[X](x)), xlab="x")


tri_cdf(1.3)-tri_cdf(0.5)


integrate(dunif,0.2,0.8)
integrate(dunif,-Inf,0.8)
integrate(tri_pdf,0.5,1.3)


par(mfrow=c(1,1))

plot(c(-1,0,0,1,1),c(0,0,0.3,0.8,1),type="l",axes=FALSE,lty=3,main="",xlab="x",ylab=expression(F[X](x)),xlim=c(-1,2),ylim=c(0,1))
axis(1,at=c(0,1))
axis(2,at=seq(0,1,0.1))
points(0,0,pch=1)
points(0,0.3,pch=16)
points(1,0.8,pch=1)
points(1,1,pch=16)
segments(-1,0,0,0)
segments(0,0.3,1,0.8)
segments(1,1,2,1)


par(mfrow=c(2,1))

x_beta <- seq(0,1,by=0.01)
y_beta <- pbeta(x_beta,shape1=2,shape2=7)
plot(x_beta,y_beta,type="l",xaxs="i",yaxs="i",xaxt="n",yaxt="n",ylab=expression(F[X](x)),xlab="x",main="cdf example with quantiles")
axis(2,seq(0,1.2,by=0.1),las=2)
lines(x=c(0,qbeta(0.5,shape1=2,shape2=7)),y=c(0.5,0.5),lty=3)
lines(x=c(qbeta(0.5,shape1=2,shape2=7),qbeta(0.5,shape1=2,shape2=7)),y=c(0,0.5),lty=3)
lines(x=c(0,qbeta(0.8,shape1=2,shape2=7)),y=c(0.8,0.8),lty=3)
lines(x=c(qbeta(0.8,shape1=2,shape2=7),qbeta(0.8,shape1=2,shape2=7)),y=c(0,0.8),lty=3)
mtext(expression(tau[X][","][0.5]),side=1,at=qbeta(0.5,shape1=2,shape2=7))
mtext(expression(tau[X][","][0.8]),side=1,at=qbeta(0.8,shape1=2,shape2=7))

y_beta <- dbeta(x_beta,shape1=2,shape2=7)
plot(x_beta,y_beta,type="l",xaxs="i",yaxs="i",ylim=c(0,3.5),xaxt="n",ylab=expression(f[X](x)),xlab="x",main="corresponding pdf")
lines(x=c(qbeta(0.5,shape1=2,shape2=7),qbeta(0.5,shape1=2,shape2=7)),y=c(0,dbeta(qbeta(0.5,shape1=2,shape2=7),shape1=2,shape2=7)),lty=3)
lines(x=c(qbeta(0.8,shape1=2,shape2=7),qbeta(0.8,shape1=2,shape2=7)),y=c(0,dbeta(qbeta(0.8,shape1=2,shape2=7),shape1=2,shape2=7)),lty=3)
mtext(expression(tau[X][","][0.5]),side=1,at=qbeta(0.5,shape1=2,shape2=7))
mtext(expression(tau[X][","][0.8]),side=1,at=qbeta(0.8,shape1=2,shape2=7))


qunif(0.8)
qunif(c(0.50,0.75),5,10)


# auxiliary function that returns x*f(x) for a U(0,1) random variable
exp_unif <- function(x) {
    return( x*dunif(x) )
}

# numerical integration to evaluate E(X) for X~U(0,1)
integrate(exp_unif,-Inf,Inf)
integrate(exp_unif,-Inf,Inf)$value


# store E(X) for X~U(0,1) in a variable
mean_unif <- integrate(exp_unif,-Inf,Inf)$value

# auxiliary function that returns (x-E(X))^2*f(x) for a U(0,1) random variable
var_unif <- function(x) {
    return( (x-mean_unif)^2*dunif(x) )
}

# numerical integration to evaluate Var(X) for X~U(0,1)
integrate(var_unif,-Inf,Inf)

# numerical integration to evaluate sd(X) for X~U(0,1)
sqrt(integrate(var_unif,-Inf,Inf)$value)


# auxiliary function that returns x*f(x) for a triangular random variable
exp_tri <- function(x) {
    return( x*tri_pdf(x) )
}

# store and print E(X) for X~U(0,1) in a variable
mean_tri <- integrate(exp_tri,-Inf,Inf)$value
print(mean_tri)

# auxiliary function that returns (x-E(X))^2*f(x) for a U(0,1) random variable
var_tri <- function(x) {
    return( (x-mean_tri)^2*tri_pdf(x) )
}

# numerical integration to evaluate Var(X) for a triangular random variable
integrate(var_tri,-Inf,Inf)

# numerical integration to evaluate sd(X) for a triangular random variable
sqrt(integrate(var_tri,-Inf,Inf)$value)

Download

par(mfrow=c(1,1))

budget_line <- function(x) { 2*(90-x) }
temp <- seq(30,110,1)
plot(temp,budget_line(temp),type='l',main="",ylab="Y",xlab="X",ylim=c(30,60))
segments(60,40,80,40,lwd=3)
segments(60,50,80,50,lwd=3)
segments(60,40,60,50,lwd=3)
segments(80,40,80,50,lwd=3)
polygon(c(70,65,80,80,70),c(40,50,50,40,40),col="blue")
text(70,38.5,"(70,40)",adj=c(1,0),cex=0.7)
text(65,50.5,"(65,50)",adj=c(0,0),cex=0.7)


set.seed(1234)

# initialize the number of simulations
num_simulations <- 100000

# create simulated draws
days_filming_draws <- runif(num_simulations,min=60,max=80)
days_editing_draws <- runif(num_simulations,min=40,max=50)

# calculate frequency corresponding to costs being over-budget
mean(days_filming_draws + 0.5*days_editing_draws > 90)



library(unifed)

par(mfrow=c(2,2))

unif2_pdf <- function(x) {
    temp <- rep(0,length(x))
    for (i in 1:length(x)) {
        if (x[i]<0)
            temp[i]=0
        else if (x[i]>2)
            temp[i]=0
        else
            temp[i] = dirwin.hall(x[i],2)
    }
    return(temp)
}

unif3_pdf <- function(x) {
    temp <- rep(0,length(x))
    for (i in 1:length(x)) {
        if (x[i]<0)
            temp[i]=0
        else if (x[i]>3)
            temp[i]=0
        else        
            temp[i] = dirwin.hall(x[i],3)
    }
    return(temp)
}

unif10_pdf <- function(x) {
    temp <- rep(0,length(x))
    for (i in 1:length(x)) {
        if (x[i]<0)
            temp[i]=0
        else if (x[i]>10)
            temp[i]=0
        else
            temp[i] = dirwin.hall(x[i],10)
    }
    return(temp)
}

x <- seq(-1,2,0.001)
plot(x,dunif(x),type="l",xlab="v",ylab=expression(f[V](v)),main=expression(m==1))
plot(x,unif2_pdf(2*x),type="l",xlab="v",ylab=expression(f[V](v)),main=expression(m==2))
plot(x,unif3_pdf(3*x),type="l",xlab="v",ylab=expression(f[V](v)),main=expression(m==3))
plot(x,unif10_pdf(10*x),type="l",xlab="v",ylab=expression(f[V](v)),main=expression(m==10))



par(mfrow=c(2,2))

x = seq(0.01,5,0.01)

plot(x,dgamma(x,1,1/2),type="l",xlab="v",ylab=expression(f[V](v)),main=expression(m==1))
plot(x,dgamma(3*x,3,1/2),type="l",xlab="v",ylab=expression(f[V](v)),main=expression(m==3))
plot(x,dgamma(10*x,10,1/2),type="l",xlab="v",ylab=expression(f[V](v)),main=expression(m==10))
plot(x,dgamma(20*x,20,1/2),type="l",xlab="v",ylab=expression(f[V](v)),main=expression(m==20))

Download

par(mfrow=c(1,1))

x_normal <- seq(-4,4,by=0.01)
y_normal <- dnorm(x_normal)
plot(x_normal,y_normal,type="l",yaxs="i",ylim=c(0,0.5),ylab=expression(f[X](x)),xlab="x",xaxt="n",yaxt="n")
#axis(1,labels=FALSE,xaxt="n")
mtext(expression(mu),side=1,at=0)
lines(x=c(0,0),y=c(0,0.5),lty=3)


par(mfrow=c(2,1))

x_normal <- seq(-4,6,by=0.01)
y_normal <- dnorm(x_normal)
plot(x_normal,y_normal,type="l",yaxs="i",ylim=c(0,0.5),ylab=expression(f[X](x)),xlab="x",xaxt="n",yaxt="n",main="Shift in location")
lines(x_normal,dnorm(x_normal-1.5),col="blue",lty=3)
axis(1,labels=FALSE,xaxt="n")
mtext(expression(mu),side=1,at=0)
lines(x=c(0,0),y=c(0,0.5),lty=3)
lines(x=c(1.5,1.5),y=c(0,0.5),lty=3,col="blue")

x_normal <- seq(-6,6,by=0.01)
y_normal <- dnorm(x_normal)
plot(x_normal,y_normal,type="l",yaxs="i",ylim=c(0,0.5),ylab=expression(f[X](x)),xlab="x",xaxt="n",yaxt="n",main="Increase in variance")
lines(x_normal,dnorm(x_normal,0,2),col="blue",lty=3)
axis(1,labels=FALSE,xaxt="n")
mtext(expression(mu),side=1,at=0)
lines(x=c(0,0),y=c(0,0.5),lty=3)


par(mfrow=c(2,1))

x_normal <- seq(-4,4,by=0.01)
y_normal <- dnorm(x_normal)
plot(x_normal,y_normal,type="l",yaxs="i",ylim=c(0,0.5),ylab=expression(f[X](x)),xlab="x",xaxt="n",yaxt="n",main="pdf of normal random variable")
axis(1,labels=FALSE,xaxt="n")
mtext(expression(mu),side=1,at=0)
lines(x=c(0,0),y=c(0,0.5),lty=3)

x_normal <- seq(-4,4,by=0.01)
y_normal <- pnorm(x_normal)
plot(x_normal,y_normal,type="l",yaxs="i",ylim=c(0,1),ylab=expression(F[X](x)),xlab="x",xaxt="n",yaxt="n",main="cdf of normal random variable")
axis(1,labels=FALSE,xaxt="n")
axis(2,at=seq(0,1,by=0.1),las=2)
mtext(expression(mu),side=1,at=0)
lines(x=c(0,0),y=c(0,1),lty=3)
lines(x=c(-5,0),y=c(0.5,0.5),lty=3)


set.seed(1234)

dnorm(0)
dnorm(1)
pnorm(1)
rnorm(10)

dnorm(0,mean=0,sd=3)
dnorm(1,mean=0,sd=3)
pnorm(1,mean=0,sd=3)
rnorm(10,mean=0,sd=3)


par(mfrow=c(2,1))

x_normal <- seq(-4,4,by=0.01)
y_normal <- dnorm(x_normal)
plot(x_normal,y_normal,type="l",yaxs="i",ylim=c(0,0.5),ylab=expression(f[X](x)),xlab="x",xaxt="n",yaxt="n",main=expression(paste("probability of being within ",sigma," of the mean ",mu)))
axis(1,labels=FALSE,xaxt="n")
mtext(expression(mu),side=1,at=0)
mtext(expression(mu-sigma),side=1,at=-1)
mtext(expression(mu+sigma),side=1,at=1)
xshade <- seq(-1,1,length=100)
yshade <- dnorm(xshade)
polygon(c(-1,xshade,1),c(0,yshade,0),col="blue")
lines(x=c(0,0),y=c(0,0.5),lty=3)

x_normal <- seq(-4,4,by=0.01)
y_normal <- dnorm(x_normal)
plot(x_normal,y_normal,type="l",yaxs="i",ylim=c(0,0.5),ylab=expression(f[X](x)),xlab="x",xaxt="n",yaxt="n",main=expression(paste("probability of being within 2",sigma," of the mean ",mu)))
axis(1,labels=FALSE,xaxt="n")
mtext(expression(mu),side=1,at=0)
mtext(expression(mu-2*sigma),side=1,at=-2)
mtext(expression(mu+2*sigma),side=1,at=2)
xshade <- seq(-2,2,length=100)
yshade <- dnorm(xshade)
polygon(c(-2,xshade,2),c(0,yshade,0),col="blue")
lines(x=c(0,0),y=c(0,0.5),lty=3)


par(mfrow=c(1,1))

x_normal <- seq(-4,4,by=0.01)
y_normal <- dnorm(x_normal)
plot(x_normal,y_normal,type="l",yaxs="i",ylim=c(0,0.5),xaxt="n",ylab=expression(phi(z)),xlab=expression(z),main="")
axis(1,at=seq(-4,4,by=1),las=1)
lines(x=c(0,0),y=c(0,0.5),lty=3)
lines(x=c(-5,0),y=c(dnorm(0),dnorm(0)),lty=3)


# return the 2.5%, 5%, 95%, and 97.5% quantiles of N(0,1)
qnorm(c(0.025,0.05,0.95,0.975))

# confirm the quantiles and probability intervals using the cdf
pnorm(-1.96)
pnorm(1.96)
pnorm(1.96)-pnorm(-1.96)
pnorm(-1.645)
pnorm(1.645)
pnorm(1.645)-pnorm(-1.645)


qnorm(0.85)


qnorm(0.90)


1-pnorm(-0.875)


qnorm(0.925)


1-pnorm(-2.5/sqrt(17))


1-pnorm(0.04/sqrt(0.00618))


wgtx <- seq(0,1,by=0.01)

portfolio_mean <- wgtx*(0.03)+(1-wgtx)*(0.07)

portfolio_variance <- (wgtx^2)*(0.01^2)+((1-wgtx)^2)*(0.08^2)+2*wgtx*(1-wgtx)*(0.2)*(0.01)*(0.08)

portfolio_stdev <- sqrt(portfolio_variance)

plot(portfolio_mean,portfolio_stdev,type="l",main="",xlab="Population mean of portfolio return",ylab="Population standard deviation of portfolio return")
points(0.05,sqrt(0.001705),pch=16)
points(0.2*0.07+0.8*0.03,sqrt(0.04*0.08*0.08+0.64*0.01*0.01+2*0.2*0.8*0.00016),pch=16)
points(0.8*0.07+0.2*0.03,sqrt(0.64*0.08*0.08+0.04*0.01*0.01+2*0.2*0.8*0.00016),pch=16)
points(0.03,0.01,pch=16)
points(0.07,0.08,pch=16)
text(0.03 + 0.001,0.01 - 0.001,"w=0",cex=0.5)
text(0.05 + 0.0015,sqrt(0.001705),"w=0.5",cex=0.5)
text(0.2*0.07+0.8*0.03 + 0.0015,sqrt(0.04*0.08*0.08+0.64*0.01*0.01+2*0.2*0.8*0.00016),"w=0.2",cex=0.5)
text(0.8*0.07+0.2*0.03 + 0.0015,sqrt(0.64*0.08*0.08+0.04*0.01*0.01+2*0.2*0.8*0.00016),"w=0.8",cex=0.5)
text(0.07 - 0.0013,0.08,"w=1",cex=0.5)

Download

par(mfrow=c(1,1))

x_lnormal <- seq(0.01,10,by=0.01)
y_lnormal <- dlnorm(x_lnormal)
plot(x_lnormal,y_lnormal,type="l",main="",xlab="x",ylab=expression(f[X](x)),xaxt="n",yaxt="n")


# create log-earnings variable
logwage <- log(cpsemployed$earnwk)

# graph-display format (two rows, one column)
par(mfrow=c(2,1))

# histogram of earnings, with normal distribution overlaid
hist(cpsemployed$earnwk, breaks=50, freq=FALSE, main="Distribution of weekly earnings", xlab="weekly earnings")
lines(0:8000, dnorm(0:8000,mean(cpsemployed$earnwk),sd(cpsemployed$earnwk)), col="blue")

# histogram of log-earnings, with normal distribution overlaid
hist(logwage, breaks=50, freq=FALSE, main="Distribution of ln(weekly earnings)", xlab="ln(weekly earnings)")
lines(seq(3,9,by=0.01), dnorm(seq(3,9,by=0.01),mean(logwage),sd(logwage)), col="blue")


qlnorm(c(0.025,0.975), meanlog=6.5, sdlog=0.7)


1-plnorm(1000, meanlog=6.5, sdlog=0.7)
set.seed(1234)
rlnorm(10, meanlog=6.5, sdlog=0.7)


exp(6.5+0.5*(0.7)^2)
exp(6.5+0.5*(0.7)^2)*sqrt(exp(0.7)^2-1)

Download

par(mfrow=c(2,2))
temp <- seq(0.01,20,by=0.01)
plot(temp,dchisq(temp,4),type="l",xlab="x",ylab=expression(f[X](x)),main="chi-square random variable, df=4",ylim=c(0,0.2),xlim=c(0,20))
plot(temp,dchisq(temp,6),type="l",xlab="x",ylab=expression(f[X](x)),main="chi-square random variable, df=6",ylim=c(0,0.2),xlim=c(0,20))
plot(temp,dchisq(temp,8),type="l",xlab="x",ylab=expression(f[X](x)),,main="chi-square random variable, df=8",ylim=c(0,0.2),xlim=c(0,20))
plot(temp,dchisq(temp,10),type="l",xlab="x",ylab=expression(f[X](x)),,main="chi-square random variable, df=10",ylim=c(0,0.2),xlim=c(0,20))


qchisq(0.10,4)
qchisq(0.10,6)
qchisq(0.10,8)
qchisq(0.10,10)
pchisq((1.96)^2,1)
pchisq((1.645)^2,1)

Download

par(mfrow=c(1,3))
temp <- seq(0.01,20,by=0.01)
plot(temp,dexp(temp,1),type="l",xlab="x",ylab=expression(f[X](x)),ylim=c(0,0.8),xlim=c(0,10),main=expression(paste("Exponential RV, ",theta==1)))
plot(temp,dexp(temp,0.5),type="l",xlab="x",ylab=expression(f[X](x)),ylim=c(0,0.8),xlim=c(0,10),main=expression(paste("Exponential RV, ",theta==0.5)))
plot(temp,dexp(temp,0.2),type="l",xlab="x",ylab=expression(f[X](x)),ylim=c(0,0.8),xlim=c(0,10),main=expression(paste("Exponential RV, ",theta==0.2)))


pexp(1,rate=0.5)-pexp(0,rate=0.5)
pexp(2,rate=0.5)-pexp(1,rate=0.5)
pexp(3,rate=0.5)-pexp(2,rate=0.5)

set.seed(1234)
rexp(10,rate=0.5)
temp <- rexp(100000,rate=0.5)
mean(temp)
sd(temp)

Download

par(mfrow=c(2,2))

plot((0:2)/2,dbinom(0:2,2,prob=0.2),type="h",main=expression(n==2),xlab=expression(v),ylab=expression(p[bar(X)](v)),ylim=c(0,1))
plot((0:4)/4,dbinom(0:4,4,prob=0.2),type="h",main=expression(n==4),xlab=expression(v),ylab=expression(p[bar(X)](v)),ylim=c(0,1))
plot((0:10)/10,dbinom(0:10,10,prob=0.2),type="h",main=expression(n==10),xlab=expression(v),ylab=expression(p[bar(X)](v)),ylim=c(0,1))
plot((0:20)/20,dbinom(0:20,20,prob=0.2),type="h",main=expression(n==20),xlab=expression(v),ylab=expression(p[bar(X)](v)),ylim=c(0,1))


library(unifed)
temp <- seq(0.001,0.999,by=0.001)
unif2 <- rep(0,999)
unif3 <- rep(0,999)
unif5 <- rep(0,999)
unif10 <- rep(0,999)

for (i in 1:999) {
    unif2[i] <- 2*dirwin.hall(2*temp[i],2)
    unif3[i] <- 3*dirwin.hall(3*temp[i],3)
    unif5[i] <- 5*dirwin.hall(5*temp[i],5)
    unif10[i] <- 10*dirwin.hall(10*temp[i],10)
}

par(mfrow=c(2,2))
plot(temp,unif2,type="l",main=expression(n==2),ylim=c(0,4.3),xlab=expression(v),ylab=expression(f[bar(X)](v)))
plot(temp,unif3,type="l",main=expression(n==3),ylim=c(0,4.3),xlab=expression(v),ylab=expression(f[bar(X)](v)))
plot(temp,unif5,type="l",main=expression(n==5),ylim=c(0,4.3),xlab=expression(v),ylab=expression(f[bar(X)](v)))
plot(temp,unif10,type="l",main=expression(n==10),ylim=c(0,4.3),xlab=expression(v),ylab=expression(f[bar(X)](v)))


set.seed(1234)

# initialize the number of simulations
num_simulations <- 100000

# create a vector of sample averages for n=2
xmean_2 <- replicate(num_simulations, mean(rlnorm(2, meanlog=0, sdlog=1))) 

# create a vector of sample averages for n=5
xmean_5 <- replicate(num_simulations, mean(rlnorm(5, meanlog=0, sdlog=1))) 

# create a vector of sample averages for n=10
xmean_10 <- replicate(num_simulations, mean(rlnorm(10, meanlog=0, sdlog=1))) 

# create a vector of sample averages for n=20
xmean_20 <- replicate(num_simulations, mean(rlnorm(20, meanlog=0, sdlog=1))) 

# graph-display format (two rows, two columns)
par(mfrow=c(2,2))

# plot smoothed densities for all four sample sizes
plot(density(xmean_2), main=expression(n==2), xlim=c(0,8),
    xlab=expression(v), ylab=expression(f[bar(X)](v)))
plot(density(xmean_5), main=expression(n==5), xlim=c(0,8),
    xlab=expression(v), ylab=expression(f[bar(X)](v)))
plot(density(xmean_10), main=expression(n==10), xlim=c(0,8),
    xlab=expression(v), ylab=expression(f[bar(X)](v)))
plot(density(xmean_20), main=expression(n==20), xlim=c(0,8),
    xlab=expression(v), ylab=expression(f[bar(X)](v)))

Download

qchisq(c(0.025,0.975),9)


pchisq(6.25,9)


set.seed(1234)

# initialize the number of simulations
num_simulations <- 100000

# create a vector of sample variances for n=2
varunif_2 <- replicate(num_simulations, var(runif(2)))

# create a vector of sample variances for n=3
varunif_3 <- replicate(num_simulations, var(runif(3)))

# create a vector of sample variances for n=5
varunif_5 <- replicate(num_simulations, var(runif(5)))

# create a vector of sample variances for n=10
varunif_10 <- replicate(num_simulations, var(runif(10)))

# graph-display format (two rows, two columns)
par(mfrow=c(2,2))

# plot smoothed densities for all four sample sizes
plot(density(varunif_2), xlim=c(0,0.5), ylim=c(0,15), main=expression(n==2),
    xlab=expression(v), ylab=expression(f[s[X]^2](v)))
plot(density(varunif_3), xlim=c(0,0.5), ylim=c(0,15), main=expression(n==3),
    xlab=expression(v), ylab=expression(f[s[X]^2](v)))
plot(density(varunif_5), xlim=c(0,0.5), ylim=c(0,15), main=expression(n==5),
    xlab=expression(v), ylab=expression(f[s[X]^2](v)))
plot(density(varunif_10), xlim=c(0,0.5), ylim=c(0,15), main=expression(n==10),
    xlab=expression(v), ylab=expression(f[s[X]^2](v)))


sdunif_2 <- sqrt(varunif_2)
sdunif_3 <- sqrt(varunif_3)
sdunif_5 <- sqrt(varunif_5)
sdunif_10 <- sqrt(varunif_10)

par(mfrow=c(2,2))
plot(density(sdunif_2), xlim=c(0,0.8), ylim=c(0,10), main=expression(n==2),
    xlab=expression(v), ylab=expression(f[s[X]](v)))
plot(density(sdunif_3), xlim=c(0,0.8), ylim=c(0,10), main=expression(n==3),
    xlab=expression(v),ylab=expression(f[s[X]](v)))
plot(density(sdunif_5), xlim=c(0,0.8), ylim=c(0,10), main=expression(n==5),
    xlab=expression(v),ylab=expression(f[s[X]](v)))
plot(density(sdunif_10), xlim=c(0,0.8), ylim=c(0,10), main=expression(n==10),
    xlab=expression(v),ylab=expression(f[s[X]](v)))


set.seed(1234)

# initialize the number of simulations
num_simulations <- 100000

# create a vector of sample standard deviations for n=10
sdlogn_10 <- replicate(num_simulations, sd(rlnorm(10)))

# create a vector of sample standard deviations for n=30
sdlogn_30 <- replicate(num_simulations, sd(rlnorm(30)))

# graph-display format (two rows, one column)
par(mfrow=c(2,1))

# plot smoothed densities for the two sample sizes
plot(density(sdlogn_10), main=expression(n==10), xlim=c(0,8),
    xlab=expression(v), ylab=expression(f[s[X]](v)))
plot(density(sdlogn_30), main=expression(n==30), xlim=c(0,8),
    xlab=expression(v), ylab=expression(f[s[X]](v)))

Download

temp <- seq(-2,4,0.01)

par(mfrow=c(3,1))
plot(temp,5*dnorm(temp)*(pnorm(temp)^4),type="l",main=expression(n==5),xlim=c(-2,4),ylim=c(0,1),xlab=expression(v),ylab=expression(f[max[X]](v)))
plot(temp,10*dnorm(temp)*(pnorm(temp)^9),type="l",main=expression(n==10),xlim=c(-2,4),ylim=c(0,1),xlab=expression(v),ylab=expression(f[max[X]](v)))
plot(temp,30*dnorm(temp)*(pnorm(temp)^29),type="l",main=expression(n==30),xlim=c(-2,4),ylim=c(0,1),xlab=expression(v),ylab=expression(f[max[X]](v)))


set.seed(1234)

# initialize the number of simulations
num_simulations <- 100000

# create a vector of sample medians for n=3
mednorm_3 <- replicate(num_simulations, median(rnorm(3)))

# create a vector of sample medians for n=10
mednorm_10 <- replicate(num_simulations, median(rnorm(10)))

# create a vector of sample medians for n=20
mednorm_20 <- replicate(num_simulations, median(rnorm(20)))

# graph-display format (three rows, one column)
par(mfrow=c(3,1))

# plot smoothed densities for the two sample sizes
# for n=20, plot the sampling dist'n of the sample mean for comparison
plot(density(mednorm_3), ylim=c(0,2), xlim=c(-2,2), main=expression(n==3),
    xlab=expression(v), ylab=expression(f[widetilde(X)[0.5]](v)))
plot(density(mednorm_10), ylim=c(0,2), xlim=c(-2,2), main=expression(n==10),
    xlab=expression(v), ylab=expression(f[widetilde(X)[0.5]](v)))
plot(density(mednorm_20), ylim=c(0,2), xlim=c(-2,2), main=expression(n==20),
    xlab=expression(v), ylab=expression(f[widetilde(X)[0.5]](v)))
lines(seq(-2,2,0.01), dnorm(seq(-2,2,0.01),sd=1/sqrt(20)), lty=3)

Download

pnorm((18.5-20)/(4/sqrt(60)))


par(mfrow=c(2,2))
plot((0:10)/10,dbinom(0:10,10,0.2),type="h",main=expression(n==10),xlab=expression(v),ylab=expression(P(bar(X)==v)),xlim=c(-0.2,1))
lines(seq(-0.2,1,0.01),dnorm(seq(-0.2,1,0.01),mean=.2,sd=sqrt(.2*.8/10))/10,col="blue")
plot((0:20)/20,dbinom(0:20,20,0.2),type="h",main=expression(n==20),xlab=expression(v),ylab=expression(P(bar(X)==v)),xlim=c(-0.2,1))
lines(seq(-0.2,1,0.01),dnorm(seq(-0.2,1,0.01),mean=.2,sd=sqrt(.2*.8/20))/20,col="blue")
plot((0:50)/50,dbinom(0:50,50,0.2),type="h",main=expression(n==50),xlab=expression(v),ylab=expression(P(bar(X)==v)),xlim=c(-0.2,1))
lines(seq(-0.2,1,0.01),dnorm(seq(-0.2,1,0.01),mean=.2,sd=sqrt(.2*.8/50))/50,col="blue")
plot((0:100)/100,dbinom(0:100,100,0.2),type="h",main=expression(n==100),xlab=expression(v),ylab=expression(P(bar(X)==v)),xlim=c(-0.2,1))
lines(seq(-0.2,1,0.01),dnorm(seq(-0.2,1,0.01),mean=.2,sd=sqrt(.2*.8/100))/100,col="blue")


dbinom(10,50,0.2)


pnorm((10.5-10)/sqrt(8))-pnorm((9.5-10)/sqrt(8))


pbinom(54,100,0.40)-pbinom(34,100,0.40)


pnorm((0.455-0.4)/0.0490)-pnorm((0.345-0.4)/0.0490)


1-pnorm(-0.08/sqrt(0.009536))

Download

par(mfrow=c(2,2))
temp <- seq(0.01,400,0.01)
temp2 <- seq(0.01,3,0.01)
plot(temp/9,9*dchisq(temp,9),type="l",main=expression(n==10),xlab=expression(v),ylab=expression(f[s[X]^2](v)),xlim=c(0,3),ylim=c(0,3))
lines(temp2,dnorm(temp2,mean=1,sd=sqrt(2/10)),col="blue",lty=3)
plot(temp/19,19*dchisq(temp,19),type="l",main=expression(n==20),xlab=expression(v),ylab=expression(f[s[X]^2](v)),xlim=c(0,3),ylim=c(0,3))
lines(temp2,dnorm(temp2,mean=1,sd=sqrt(2/20)),col="blue",lty=3)
plot(temp/49,49*dchisq(temp,49),type="l",main=expression(n==50),xlab=expression(v),ylab=expression(f[s[X]^2](v)),xlim=c(0,3),ylim=c(0,3))
lines(temp2,dnorm(temp2,mean=1,sd=sqrt(2/50)),col="blue",lty=3)
plot(temp/99,99*dchisq(temp,99),type="l",main=expression(n==100),xlab=expression(v),ylab=expression(f[s[X]^2](v)),xlim=c(0,3),ylim=c(0,3))
lines(temp2,dnorm(temp2,mean=1,sd=sqrt(2/100)),col="blue",lty=3)

Download

set.seed(1234)

corr_50_00 <- rep(0,100000)
corr_50_40 <- rep(0,100000)
corr_50_80 <- rep(0,100000)
corr_100_00 <- rep(0,100000)
corr_100_40 <- rep(0,100000)
corr_100_80 <- rep(0,100000)

for (i in 1:100000) {
    x1 <- rnorm(50)
    x2 <- rnorm(50)
    x3 <- rnorm(50,0.4*x1,sqrt((1-0.4*0.4)))
    x4 <- rnorm(50,0.8*x1,sqrt((1-0.8*0.8)))
    x5 <- rnorm(100)
    x6 <- rnorm(100)
    x7 <- rnorm(100,0.4*x5,sqrt((1-0.4*0.4)))
    x8 <- rnorm(100,0.8*x5,sqrt((1-0.8*0.8)))

    corr_50_00[i] <- cor(x1,x2)
    corr_50_40[i] <- cor(x1,x3)
    corr_50_80[i] <- cor(x1,x4)
    corr_100_00[i] <- cor(x5,x6)
    corr_100_40[i] <- cor(x5,x7)
    corr_100_80[i] <- cor(x5,x8)
}

par(mfrow=c(2,3))
plot(density(corr_50_00),main=expression(n==50~", "~rho[XY]==0),xlab=expression(v),ylab=expression(f[r[XY]](v)),xlim=c(-0.4,0.4))
lines(seq(-0.6,0.6,0.01),dnorm(seq(-0.6,0.6,0.01),mean=0,sd=sqrt(1/50)),col="blue",lty=3)
plot(density(corr_50_40),main=expression(n==50~", "~rho[XY]==0.4),xlab=expression(v),ylab=expression(f[r[XY]](v)),xlim=c(0,0.8))
lines(seq(-0.2,1,0.01),dnorm(seq(-0.2,1,0.01),mean=0.4,sd=sqrt((1-0.16)^2/50)),col="blue",lty=3)
plot(density(corr_50_80),main=expression(n==50~", "~rho[XY]==0.8),xlab=expression(v),ylab=expression(f[r[XY]](v)),xlim=c(0.6,1))
lines(seq(0.5,1,0.01),dnorm(seq(0.5,1,0.01),mean=0.8,sd=sqrt((1-0.64)^2/50)),col="blue",lty=3)
plot(density(corr_100_00),main=expression(n==100~", "~rho[XY]==0),xlab=expression(v),ylab=expression(f[r[XY]](v)),xlim=c(-0.4,0.4))
lines(seq(-0.6,0.6,0.01),dnorm(seq(-0.6,0.6,0.01),mean=0,sd=sqrt(1/100)),col="blue",lty=3)
plot(density(corr_100_40),main=expression(n==100~", "~rho[XY]==0.4),xlab=expression(v),ylab=expression(f[r[XY]](v)),xlim=c(0,0.8))
lines(seq(-0.2,1,0.01),dnorm(seq(-0.2,1,0.01),mean=0.4,sd=sqrt((1-0.16)^2/100)),col="blue",lty=3)
plot(density(corr_100_80),main=expression(n==100~", "~rho[XY]==0.8),xlab=expression(v),
ylab=expression(f[r[XY]](v)),xlim=c(0.6,1))
lines(seq(0.5,1,0.01),dnorm(seq(0.5,1,0.01),mean=0.8,sd=sqrt((1-0.64)^2/100)),col="blue",lty=3)


temp <- seq(2.5,5.5,0.01)

par(mfrow=c(2,1))
plot(temp,5000*dnorm(temp)*(pnorm(temp)^4999),type="l",main=expression(n==5000),xlim=c(2.5,5.5),ylim=c(0,2),xlab=expression(v),ylab=expression(f[max[X]](v)))
plot(temp,20000*dnorm(temp)*(pnorm(temp)^19999),type="l",main=expression(n==20000),xlim=c(2.5,5.5),ylim=c(0,2),xlab=expression(v),ylab=expression(f[max[X]](v)))

Download

par(mfrow=c(2,2))
temp <- seq(-4,4,by=0.01)
plot(temp,dt(temp,3),type="l",xlab="v",ylab=expression(f[t[3]](v)),main="df=3",ylim=c(0,0.4))
lines(temp,dnorm(temp),col="blue",lty=3)
plot(temp,dt(temp,5),type="l",xlab="v",ylab=expression(f[t[5]](v)),main="df=5",ylim=c(0,0.4))
lines(temp,dnorm(temp),col="blue",lty=3)
plot(temp,dt(temp,10),type="l",xlab="v",ylab=expression(f[t[10]](v)),main="df=10",ylim=c(0,0.4))
lines(temp,dnorm(temp),col="blue",lty=3)
plot(temp,dt(temp,30),type="l",xlab="v",ylab=expression(f[t[30]](v)),main="df=30",ylim=c(0,0.4))
lines(temp,dnorm(temp),col="blue",lty=3)


qt(c(0.975,0.95),3)
qt(c(0.975,0.95),5)
qt(c(0.975,0.95),10)
qt(c(0.975,0.95),30)
qt(c(0.975,0.95),100)
qt(c(0.975,0.95),500)


par(mfrow=c(2,2))

temp <- seq(-4,4,by=0.01)
y_values <- dt(temp,5)
plot(temp,y_values,type="l",yaxs="i",ylim=c(0,0.42),ylab=expression(f[t[5]](v)),xlab="v",xaxt="n",yaxt="n",main=expression(paste(t[5]," distribution")))
axis(1,labels=FALSE,xaxt="n")
mtext("0",side=1,at=0,cex=0.6)
mtext(expression(-t[5*","*0.025]),side=1,at=qt(0.025,5)-0.15,cex=0.6)
mtext(expression(t[5*","*0.025]),side=1,at=qt(0.975,5)+0.15,cex=0.6)
xshade <- seq(-4,qt(0.025,5),length=100)
yshade <- dt(xshade,5)
polygon(c(-4,xshade,qt(0.025,5)),c(0,yshade,0),col="blue")
xshade <- seq(qt(0.975,5),4,length=100)
yshade <- dt(xshade,5)
polygon(c(qt(0.975,5),xshade,4),c(0,yshade,0),col="blue")
lines(c(0,0),c(0,0.42),lty=3)

temp <- seq(-4,4,by=0.01)
y_values <- dt(temp,30)
plot(temp,y_values,type="l",yaxs="i",ylim=c(0,0.42),ylab=expression(f[t[30]](v)),xlab="v",xaxt="n",yaxt="n",main=expression(paste(t[30]," distribution")))
axis(1,labels=FALSE,xaxt="n")
mtext("0",side=1,at=0,cex=0.6)
mtext(expression(-t[30*","*0.025]),side=1,at=qt(0.025,30)-0.5,cex=0.6)
mtext(expression(t[30*","*0.025]),side=1,at=qt(0.975,30)+0.5,cex=0.6)
xshade <- seq(-4,qt(0.025,5),length=100)
yshade <- dt(xshade,30)
polygon(c(-4,xshade,qt(0.025,5)),c(0,yshade,0),col="blue")
xshade <- seq(qt(0.975,5),4,length=100)
yshade <- dt(xshade,30)
polygon(c(qt(0.975,5),xshade,4),c(0,yshade,0),col="blue")
lines(c(0,0),c(0,0.42),lty=3)

temp <- seq(-4,4,by=0.01)
y_values <- dt(temp,5)
plot(temp,y_values,type="l",yaxs="i",ylim=c(0,0.42),ylab=expression(f[t[5]](v)),xlab="v",xaxt="n",yaxt="n",main=expression(paste(t[5]," distribution")))
axis(1,labels=FALSE,xaxt="n")
mtext("0",side=1,at=0,cex=0.6)
mtext(expression(-t[5*","*0.05]),side=1,at=qt(0.05,5)-0.15,cex=0.6)
mtext(expression(t[5*","*0.05]),side=1,at=qt(0.95,5)+0.15,cex=0.6)
xshade <- seq(-4,qt(0.05,5),length=100)
yshade <- dt(xshade,5)
polygon(c(-4,xshade,qt(0.05,5)),c(0,yshade,0),col="blue")
xshade <- seq(qt(0.95,5),4,length=100)
yshade <- dt(xshade,5)
polygon(c(qt(0.95,5),xshade,4),c(0,yshade,0),col="blue")
lines(c(0,0),c(0,0.42),lty=3)

temp <- seq(-4,4,by=0.01)
y_values <- dt(temp,30)
plot(temp,y_values,type="l",yaxs="i",ylim=c(0,0.42),ylab=expression(f[t[30]](v)),xlab="v",xaxt="n",yaxt="n",main=expression(paste(t[30]," distribution")))
axis(1,labels=FALSE,xaxt="n")
mtext("0",side=1,at=0,cex=0.6)
mtext(expression(-t[30*","*0.05]),side=1,at=qt(0.05,30)-0.5,cex=0.6)
mtext(expression(t[30*","*0.05]),side=1,at=qt(0.95,30)+0.5,cex=0.6)
xshade <- seq(-4,qt(0.05,5),length=100)
yshade <- dt(xshade,30)
polygon(c(-4,xshade,qt(0.05,5)),c(0,yshade,0),col="blue")
xshade <- seq(qt(0.95,5),4,length=100)
yshade <- dt(xshade,30)
polygon(c(qt(0.95,5),xshade,4),c(0,yshade,0),col="blue")
lines(c(0,0),c(0,0.42),lty=3)


qt(0.90,19)


set.seed(1234)

mu_x <- 10
sigma_x <- 2
nobs <- 8
r <- 100
CIlower <- numeric(r)
CIupper <- numeric(r)
mu_notinCI <- logical(r)

for (j in 1:r) {
  sample <- rnorm(nobs,mean=mu_x,sd=sigma_x)
  CIlower[j] <- mean(sample)-qt(0.975,nobs-1)*(sd(sample)/sqrt(nobs))
  CIupper[j] <- mean(sample)+qt(0.975,nobs-1)*(sd(sample)/sqrt(nobs))
  mu_notinCI[j] <- ((mu_x<CIlower[j]) | (mu_x>CIupper[j])) 
}

color <- rep(gray(0.7),r)
color[which(mu_notinCI[1:r])] <- "black"

plot(0, xlim=c(mu_x-6*sigma_x/sqrt(nobs),mu_x+6*sigma_x/sqrt(nobs)),ylim=c(1,r),ylab="Simulation number",xlab="",main="")
abline(v=mu_x,lty=2)
for (j in 1:r) {
  lines(c(CIlower[j],CIupper[j]),c(j,j),col=color[j],lwd=2) 
}


# function to calculate the se of the sample mean for a vector
se_meanx <- function (x, na.rm = FALSE) {
    if (na.rm) {
       x <- x[!is.na(x)]
    }
    return(sd(x)/sqrt(length(x)))
}


par(mfrow=c(1,1))

plot(c(0.20,0.22,0.15),c(3,2,1),xlim=c(0,0.4),ylim=c(0,4),pch=16,yaxt="n",ann=FALSE,cex=1.2)
lines(c(0.155,0.245),c(3,3),lwd=2)

text(0.30,3,expression("e-mail A ("*pi[A]*")"),cex=0.8)
lines(c(0.173,0.267),c(2,2),lwd=2)
text(0.32,2,expression("e-mail B ("*pi[B]*")"),cex=0.8)
lines(c(0.136,0.164),c(1,1),lwd=2)
text(0.22,1,expression("no e-mail ("*pi[C]*")"),cex=0.8)


# create union indicator variable from unionstatus factor variable
cps$union <- 1*(cps$unionstatus=="Union")

# loop through variables of interest
for (varname in c("age","educ","ownchild","earnwk","union")) {
    # sample size is number of non-missing observations
    nobs <- sum(!is.na(cps[,varname]))

    # calculate sample mean of variable
    mean_var <- mean(cps[,varname], na.rm=TRUE)

    # calculate standard error of sample mean
    se_mean <- se_meanx(cps[,varname], na.rm=TRUE)

    # output results
    print(paste(varname,":","n",nobs,"Mean",signif(mean_var,digits=5),"SE(mean)",signif(se_mean,digits=5)))
}


# function to calculate the se of the sample stdev for a vector
se_sx <- function (x, na.rm = FALSE) {
    if (na.rm) {
       x <- x[!is.na(x)]
    }
    nobs <- length(x)
    return(sqrt(mean((x-mean(x))^4) - sd(x)^4)/(sqrt(nobs) * sd(x)))
}


# determine the sample size
nobs <- nrow(sp500)

# loop through the stocks of interest
for (stock in c("HD","LOW","BAC","WFC","MRO","COP")) {
    returns <- sp500[,stock]

    # calculate the stdev of returns 
    sx <- sd(returns)

    # calculate the se of the stdev of returns
    stderror_sx <- se_sx(returns)

    # output the stock name, along with stdev and se(stdev)
    print(paste(stock,":","sx",round(sx,4),"se_sx",round(stderror_sx,5)))
}


# function to calculate the se of the sample stdev for a vector
se_rxy <- function (x, y, na.rm = FALSE) {
    # if na.rm is TRUE, keep only observations where both are non-missing
    if (na.rm) {
       nonmissing <- (!is.na(x) & !is.na(y))
       x <- x[nonmissing]
       y <- y[nonmissing]
    }
    nobs <- length(x)
    return((1-cor(x,y)^2)/sqrt(nobs))
}


se_rxy(cps$educ, cps$earnwk, na.rm = TRUE)


# determine the sample size
nobs <- nrow(sp500)

# initialize variables with stock names and number of stocks
stocks <- c("HD","LOW","BAC","WFC","MRO","COP")
num_stocks <- length(stocks)

# double loop to loop through possible pairs of stocks
for (i in 1:(num_stocks-1)) {
    for (j in (i+1):num_stocks) {
        # calculate sample correlation
        rxy <- cor(sp500[,stocks[i]],sp500[,stocks[j]])

        # calculate standard error of sample correlation
        stderr_rxy <- se_rxy(sp500[,stocks[i]],sp500[,stocks[j]])

        # output stock-pair names, along with corr and se(corr)
        print(paste("Pair",stocks[i],stocks[j],": rxy",round(rxy,3),"se(rxy)",round(stderr_rxy,3)))
    }
}


# sample mean of exam-score difference
mean(exams$exam1-exams$exam2)

# sample stdev of exam-score difference
sd(exams$exam1-exams$exam2)

# std error of sample mean of exam-score difference
se_meanx(exams$exam1-exams$exam2)


# calculate means based upon table of counts
meanA <- (44+16)/200
meanB <- (30+16)/200
meandiff <- meanA-meanB

# calculate stdev of linkA-linkB using sample proportion method
sd_1 <- sqrt( (200/199) * ((110/200)*((0-0)-meandiff)^2 + (30/200)*((0-1)-meandiff)^2
                          + (44/200)*((1-0)-meandiff)^2 + (16/200)*((1-1)-meandiff)^2) )

print(paste("SD of linkA-linkB from sample proportion method:",round(sd_1,5)))

# create a data frame based upon the table of counts
links <- data.frame(linkA = c(rep(0,110),rep(0,30),rep(1,44),rep(1,16)),
                    linkB = c(rep(0,110),rep(1,30),rep(0,44),rep(1,16)))

# calculate stdev of linkA-linkB using the sample
sd_2 <- sd(links$linkA - links$linkB)

print(paste("SD of linkA-linkB from sample stdev method:",round(sd_2,5)))


# calculate std errors of the sample averages of union and non-union wages
se_union <- se_meanx(cps[cps$unionstatus=="Union","earnwk"], na.rm=TRUE)
se_nonunion <- se_meanx(cps[cps$unionstatus=="Non-union","earnwk"], na.rm=TRUE)

se_union
se_nonunion

# calculate std error of the difference in sample averages
sqrt(se_union^2 + se_nonunion^2)


# calculate std errors of the stdevs of earnings
se_sx_union <- se_sx(cps[cps$unionstatus=="Union","earnwk"], na.rm=TRUE)
se_sx_nonunion <- se_sx(cps[cps$unionstatus=="Non-union","earnwk"], na.rm=TRUE)

se_sx_union
se_sx_nonunion

# calculate std error of difference in stdevs of earnings
sqrt(se_sx_union^2 + se_sx_nonunion^2)


cor(cps[cps$unionstatus=="Union","earnwk"],cps[cps$unionstatus=="Union","educ"], use="complete.obs")
cor(cps[cps$unionstatus=="Non-union","earnwk"],cps[cps$unionstatus=="Non-union","educ"], use="complete.obs")


se_rxy_union <- se_rxy(cps[cps$unionstatus=="Union","earnwk"],cps[cps$unionstatus=="Union","educ"], na.rm=TRUE)
se_rxy_nonunion <- se_rxy(cps[cps$unionstatus=="Non-union","earnwk"],cps[cps$unionstatus=="Non-union","educ"], na.rm=TRUE)

se_rxy_union
se_rxy_nonunion

sqrt(se_rxy_union^2 + se_rxy_nonunion^2)

Download

set.seed(1234)

# create a data frame with the original data
df <- data.frame(x = c(4,3,8,12,0,10,5), y = c(8,6,10,1,15,3,6))

# determine the number of observations
nobs <- nrow(df)

# create a vector of index values by sampling with replacement
bs_index <- sample(1:nobs, nobs, replace = TRUE)
bs_index

# create a data frame corresponding for the bootstrap sample
bs_df <- df[bs_index,]

bs_df
print(paste("Means for bootstrap sample: x", round(mean(bs_df$x),2), ", y", round(mean(bs_df$y),2)))
print(paste("Medians for bootstrap sample: x", median(bs_df$x), ", y", median(bs_df$y)))
print(paste("Stdevs for bootstrap sample: x", round(sd(bs_df$x),2), ", y", round(sd(bs_df$y),2)))
print(paste("Correlation for bootstrap sample:", round(cor(bs_df$x,bs_df$y),3)))

Download

set.seed(1234)

# create a data frame with the original data
df <- data.frame(x = c(4,3,8,12,0,10,5), y = c(8,6,10,1,15,3,6))

# determine the number of observations
nobs <- nrow(df)

# initialize the number of bootstrap replications
B <- 1000

# initialize vectors to hold bootstrap statistics
bs_meanx <- rep(0,B)
bs_meany <- rep(0,B)
bs_sx <- rep(0,B)
bs_rxy <- rep(0,B)

# loop B times to create bootstrap samples and calculate bootstrap statistics
for (i in 1:B) {
    # create a vector of index values by sampling with replacement
    bs_index <- sample(1:nobs, nobs, replace = TRUE)

    # create a data frame corresponding for the bootstrap sample
    bs_df <- df[bs_index,]

    # calculate the bootstrap statistics
    bs_meanx[i] <- mean(bs_df$x)
    bs_meany[i] <- mean(bs_df$y)
    bs_sx[i] <- sd(bs_df$x)
    bs_rxy[i] <- cor(bs_df$x,bs_df$y)
}

# graph-display format (two rows, two columns)
par(mfrow=c(2,2))

hist(bs_meanx, freq=FALSE, xlab="", ylab="", main=expression(paste("Sample mean of ",x)),
    xlim=c(2,10), yaxt='n', breaks=20)
lines(density(bs_meanx), col="blue", lwd=2)
abline(v=mean(df$x), col="red", lwd=3)

hist(bs_meany, freq=FALSE, xlab="", ylab="", main=expression(paste("Sample mean of ",y)),
    xlim=c(2,12), yaxt='n', breaks=20)
lines(density(bs_meany), col="blue", lwd=2)
abline(v=mean(df$y), col="red", lwd=3)

hist(bs_sx, freq=FALSE, xlab="", ylab="", main=expression(paste("Sample stdev of ",x)),
    xlim=c(1,6), yaxt='n', breaks=40)
lines(density(bs_sx), col="blue", lwd=2)
abline(v=sd(df$x), col="red", lwd=3)

hist(bs_rxy, freq=FALSE, xlab="", ylab="",
    main=expression(paste("Sample correlation of ",x," and ",y)), 
    xlim=c(-1,0), yaxt='n', breaks=40)
lines(density(bs_rxy), col="blue", lwd=2)
abline(v=cor(df$x,df$y), col="red", lwd=3)

Download

set.seed(1234)

nobs <- 100
x <- rlnorm(nobs)
mean(x)
median(x)

# asymptotic standard error of sample mean
sd(x)/sqrt(nobs)

# use bootstrap sampling to calculate bootstrap standard errors
B <- 10000
bs_meanx <- rep(0,B)
bs_medianx <- rep(0,B)

for (i in 1:B) {
    bs_index <- sample(1:nobs, nobs, replace = TRUE)
    bs_x <- x[bs_index]
    bs_meanx[i] <- mean(bs_x)
    bs_medianx[i] <- median(bs_x) 
}

print(paste("Bootstrap standard error of the sample mean:", round(sd(bs_meanx),4)))
print(paste("Bootstrap standard error of the sample median:", round(sd(bs_medianx),4)))


print(paste("Bootstrap standard error of the difference:", round(sd(bs_meanx-bs_medianx),4)))


set.seed(1234)

# calculate difference r(HD,LOW)-r(HD,BAC)
cor(sp500$HD,sp500$LOW)-cor(sp500$HD,sp500$BAC)

# initialize variables
nobs <- nrow(sp500)
B <- 1000
bs_rdiff <- rep(0,B)

# loop to create bootstrap samples and calculate the bootstrap statistic
for (i in 1:B) {
    bs_index <- sample(1:nobs, nobs, replace = TRUE)
    bs_df <- sp500[bs_index,]
    bs_rdiff[i] <- cor(bs_df$HD,bs_df$LOW)-cor(bs_df$HD,bs_df$BAC)
}

# output the bootstrap standard error for r(HD,LOW)-r(HD,BAC)
sd(bs_rdiff)


set.seed(1234)

# construct a vector with the quantiles of interest (deciles and quartiles)
quantiles <- sort(c(seq(0.1,0.9,0.1),0.25,0.75))

# initialize variables
nobs <- nrow(cpsemployed)
B <- 5000

# loop over the quantiles of interest
for (q in quantiles) {
    bs_quantile <- rep(0,B)

    # loop to create bootstrap samples and calculate the bootstrap statistics
    for (i in 1:B) {
        bs_index <- sample(1:nobs, nobs, replace = TRUE)
        bs_quantile[i] <- quantile(cpsemployed[bs_index,"earnwk"],q)
    }

    # output the bootstrap se and normal-based bootstrap CI for this quantile
    quantile_estimate <- quantile(cpsemployed$earnwk,q)
    print(paste("Quantile ", q, ": sample quantile ", round(quantile_estimate),
                ", bootstrap se ", round(sd(bs_quantile),1),
                ", 95% CI (", round(quantile_estimate-1.96*sd(bs_quantile),1),
                ",", round(quantile_estimate+1.96*sd(bs_quantile),1), ")", sep=""))
}


set.seed(1234)

# initialize variables
nobs <- nrow(cpsemployed)
B <- 5000
bs_iqr <- rep(0,B)

# loop to create bootstrap samples and calculate the bootstrap statistics
for (i in 1:B) {
    bs_index <- sample(1:nobs, nobs, replace = TRUE)
    bs_iqr[i] <- IQR(cpsemployed[bs_index,"earnwk"])
}

# output the bootstrap standard error and normal-based CI for the IQR
iqr_earnwk <- IQR(cpsemployed$earnwk)
print(paste("IQR: ", round(iqr_earnwk,1),
                ", bootstrap se ", round(sd(bs_iqr),1),
                ", 95% CI (", round(iqr_earnwk-1.96*sd(bs_iqr),1),
                ",", round(iqr_earnwk+1.96*sd(bs_iqr),1), ")", sep=""))


# output the 2.5% and 97.5% quantiles of the bootstrap distributions
quantile(bs_meanx, c(0.025,0.975))
quantile(bs_medianx, c(0.025,0.975))
quantile(bs_meanx-bs_medianx, c(0.025,0.975))


# output the 5% and 95% quantiles of the bootstrap distributions
quantile(bs_meanx, c(0.05,0.95))
quantile(bs_medianx, c(0.05,0.95))
quantile(bs_meanx-bs_medianx, c(0.05,0.95))


# output the 2.5% and 97.5% quantiles of the bootstrap distribution
quantile(bs_rdiff, c(0.025,0.975))


# output the 2.5% and 97.5% quantiles of the bootstrap distribution
quantile(bs_iqr, c(0.025,0.975))

Download

qchisq(0.95,2)
qchisq(0.90,2)


par(mfrow=c(1,1))

x_chisq <- seq(0,20,by=0.01)
y_chisq <- dchisq(x_chisq,6)
plot(x_chisq,y_chisq,type="l",yaxs="i",ylim=c(0,0.15),ylab="",xlab="",xaxt="n",yaxt="n",main="")
axis(1,labels=FALSE,xaxt="n")
mtext(expression(w[paste(Q,",",0.05)]),side=1,at=qchisq(0.95,6),cex=0.7)
xshade <- seq(qchisq(0.95,6),20,length=100)
yshade <- dchisq(xshade,6)
polygon(c(qchisq(0.95,6),xshade,20),c(0,yshade,0),col="blue")


par(mfrow=c(1,1))

x_chisq <- seq(0,20,by=0.01)
y_chisq <- dchisq(x_chisq,6)
plot(x_chisq,y_chisq,type="l",yaxs="i",ylim=c(0,0.15),ylab="",xlab="",xaxt="n",yaxt="n",main="")
axis(1,labels=FALSE,xaxt="n")
mtext("Wald stat",side=1,at=qchisq(0.90,6),cex=0.7)
xshade <- seq(qchisq(0.90,6),20,length=100)
yshade <- dchisq(xshade,6)
polygon(c(qchisq(0.90,6),xshade,20),c(0,yshade,0),col="blue")


1-pchisq(11.17,2)


1-pchisq(81.59,3)

Download

wald_test <- function(gamma_hat, var_gamma_hat, R=diag(length(gamma_hat)), r=rep(0,length(gamma_hat))) {
    # gamma_hat: L x 1 vector of parameter estimates
    # var_theta_hat: L x L variance-covariance matrix of the parameter estimates
    # R: Q x L matrix of linear constraints to be tested
      ## default value of R is the identity matrix (for testing parameters equal to zero)
    # r: Q x 1 vector of values of the linear constraints to be tested
      ## default value of r is a vector of zeros (for testing parameters equal to zero) 

    # when R has one row (one restriction), make sure R has matrix type
    if (!is.matrix(R)) {
        R <- t(as.matrix(R))
    } 

    # calculate the Wald statistic using linear algebra
    W <- t(R %*% gamma_hat - r) %*% solve(R %*% var_gamma_hat %*% t(R)) %*% (R %*% gamma_hat - r)
    W <- as.numeric(W)

    # calculate the p-value of the Wald test
    p_value <- 1 - pchisq(W, nrow(R))

    # return the Wald test statistic and p-value
    return(list(W = W, p_value = p_value))
}


var_prop_indep <- function(pi_hat, nobs) {
    # pi_hat: vector of sample proportions
    # nobs: vector of sample sizes

    # calculate the variance-covariance matrix
    var_pi_hat <- diag(pi_hat*(1-pi_hat)/nobs)

    return(var_pi_hat)
}

var_mean_indep <- function(x_vectors) {
    # x_vectors: list of sample vectors

    # initialize variables
    num_means <- length(x_vectors)
    tempvec <- rep(0, num_means)

    # calculate the variance-covariance matrix
    for (i in 1:num_means) {
        tempvec[i] <- var(x_vectors[[i]])/length(x_vectors[[i]])
    }
    var_mean <- diag(tempvec)

    return(var_mean)
}

var_mean_onesample <- function(df, vars) {
    # df: data frame containing the sample
    # vars: vector of variable names or indices

    # calculate the variance-covariance matrix
    var_mean <- cov(df[,vars])/nrow(df)

    return(var_mean)
}

Download

options(digits=5)


cps$union <- 1*(cps$unionstatus=="Union")

lm(earnwk~union, data=cps)


lm(earnwk~unionstatus, data=cps)


cigdata <- read.csv("cigdata2019.csv")
cigdata$cigsales <- cigdata$pack_sales_percapita
cigdata$cigtax <- cigdata$statetax_pack

lm(cigsales~cigtax, data=cigdata)


cigdata2019 <- read.csv("cigdata2019.csv",na.strings="",stringsAsFactors=TRUE)

par(mfrow = c(1,1))

plot(cigdata2019$statetax_pack,cigdata2019$pack_sales_percapita,cex=0.5,xlab="State cigarette tax (per pack)",ylab="2019 cigarette sales (packs per capita)",xaxt="n",yaxt="n")
abline(lm(cigdata2019$pack_sales_percapita~cigdata2019$statetax_pack),col="blue",lwd=2)
axis(1, at=seq(0,5,by=1), las=1)
axis(2, at=seq(0,80,by=20), las=1) 


lm(HD~IDX, data=sp500)


sp500 <- read.csv("sp500-monthly-returns.csv",na.strings="",stringsAsFactors=TRUE)

par(mfrow = c(1,1))

plot(sp500$IDX,sp500$HD,cex=0.5,xlab="S&P 500 monthly return",ylab="Home Depot monthly return",xaxt="n",yaxt="n") 
abline(lm(sp500$HD~sp500$IDX),col="blue",lwd=2) 
axis(1, at=seq(-1,1,by=0.05), las=1) 
axis(2, at=seq(-1,1,by=0.1), las=1) 

abline(v=0,lty=3) 
abline(h=0.008731551,lty=3)

Download

# store results of least-squares estimation
results <- lm(cigsales~cigtax, data=cigdata)

# access estimated residuals and fitted values
uhat <- results$residuals
yhat <- results$fitted.values

# output first seven estimated residuals and fitted values
uhat[1:7]
yhat[1:7]


mean(uhat)
mean(cigdata$cigsales)
mean(yhat)


cor(uhat,cigdata$cigtax)
cor(uhat,yhat)


sighatsq_u <- sum(uhat^2)/(nrow(cigdata)-2)
sighat_u <- sqrt(sighatsq_u)
sighat_u
summary(results)$sigma


summary(results)


y <- cigdata$cigsales
x <- cigdata$cigtax

var(yhat)/var(y)
1-var(uhat)/var(y)
cor(y,yhat)^2
cor(y,x)^2
summary(results)$r.squared


# loop through the six stocks of interest
for (varname in c("HD","LOW","BAC","WFC","MRO","COP")) {
    # run the SLR regression of company returns vs index (IDX) returns
    results <- lm(sp500[,varname]~sp500$IDX)

    # access statistics from the regression results
    rsq <- summary(results)$r.squared
    sighat_u <- summary(results)$sigma

    # output the statistics
    print(paste(varname,"versus index:", "R-sq", round(rsq,3), "sighat_u", round(sighat_u,4)))
}

Download

set.seed(1234)

x <- runif(500)
y <- 1+8*x+rnorm(500)
y2 <- 1+8*x+rnorm(500,sd=2*x)

par(mfrow = c(2,1))
plot(x,y,ylab="y",xlab="x",cex=0.5,main="Sample 1 (homoskedastic)")
abline(1,8,col="blue",lwd=2)
plot(x,y2,ylab="y",xlab="x",cex=0.5,main="Sample 2 (heteroskedastic)")
abline(1,8,col="blue",lwd=2)


r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)


install.packages("estimatr")
library(estimatr)


options(scipen=0)

results <- lm_robust(cigsales~cigtax, data=cigdata)
results
summary(results)


results <- lm_robust(HD~IDX, data=sp500)
results


# output results with 95% confidence intervals (default alpha=0.05)
lm_robust(cigsales~cigtax, data=cigdata)

# output results with 90% confidence intervals (alpha=0.10)
lm_robust(cigsales~cigtax, data=cigdata, alpha=0.10)


results <- lm_robust(cigsales~cigtax, data=cigdata)
results


results <- lm_robust(HD~IDX, data=sp500)
results


results <- lm_robust(earnwk~union, data=cps)
results

Download

lm(HD~IDX+LOW, data=sp500)


lm(cigsales~cigtax+producer, data=cigdata)


# define the female indicator variable
cps$female <- 1*(cps$gender=="Female")

# define the experience variable
cps$exper <- cps$age-cps$educ-6

# least-squares estimation of the MLR model
lm(earnwk~educ+exper+union+female, data=cps)


# store results of least-squares estimation
results <- lm(earnwk~educ+exper+union+female, data=cps)

# access estimated residuals and fitted values
uhat <- results$residuals
yhat <- results$fitted.values

# output first ten estimated residuals and fitted values, rounded to the nearest dollar
round(uhat[1:10])
round(yhat[1:10])


# store MLR estimation results (HD on IDX and LOW)
results_mlr <- lm(HD~IDX+LOW, data=sp500)

# store SLR estimation results (HD on IDX)
results_slr1 <- lm(HD~IDX, data=sp500)

# store SLR estimation results (HD on LOW)
results_slr2 <- lm(HD~LOW, data=sp500)

# output the R-squared values and estimated residual standard deviations
print(paste("MLR results (IDX, LOW)", "R-sq", round(summary(results_mlr)$r.squared,3),
            "sighat_u", round(summary(results_mlr)$sigma,4)))

print(paste("SLR results (IDX)", "R-sq", round(summary(results_slr1)$r.squared,3),
            "sighat_u", round(summary(results_slr1)$sigma,4)))

print(paste("SLR results (LOW)", "R-sq", round(summary(results_slr2)$r.squared,3),
            "sighat_u", round(summary(results_slr2)$sigma,4)))


results <- lm_robust(cigsales~cigtax+producer, data=cigdata)
results


# MLR model of HD on IDX and LOW
mlr_results <- lm_robust(HD~IDX+LOW, data=sp500)
mlr_results

# SLR model of HD on IDX
slr_results1 <- lm_robust(HD~IDX, data=sp500)
slr_results1

# SLR model of HD on LOW
slr_results2 <- lm_robust(HD~LOW, data=sp500)
slr_results2


results <- lm_robust(earnwk~educ+exper+union+female, data=cps)
results


results <- lm_robust(earnwk~educ+exper+union+female, data=cps, alpha=0.10)
results


mlr_results <- lm_robust(HD~IDX+LOW, data=sp500)
mlr_results


mlr_results <- lm_robust(cigsales~cigtax+producer, data=cigdata)
mlr_results


linear_combination <- function(regresults, R) {
    R <- t(as.matrix(R))

    estimate <- R %*% regresults$coefficients
    estimate <- as.numeric(estimate)

    se <- sqrt(R %*% regresults$vcov %*% t(R))
    se <- as.numeric(se)

    pvalue <- 2*(1-pnorm(abs(estimate/se)))

    return(list(estimate = estimate, se = se, pvalue = pvalue)) 
}


results <- lm_robust(HD~IDX+LOW, data=sp500)
linear_combination(results, c(0,1,-1))


results <- lm_robust(HD~IDX+LOW+BAC+WFC, data=sp500)
results


test_linear_restrictions <- function(regresults, R, r) {
    return(wald_test(regresults$coefficients, regresults$vcov, R, r)) 
} 


results <- lm_robust(HD~IDX+LOW+BAC+WFC, data=sp500)

# use rbind function to create a matrix consisting of the two restrictions:
#   first restriction is beta3=0, second restriction is beta4=0
R <- rbind(c(0,0,0,1,0), c(0,0,0,0,1))

# create a vector of zeros for the two restrictions
r <- c(0,0)

# conduct the Wald test
test_linear_restrictions(results, R, r)

Download

# create marital-status indicator variables
cps$married <- 1*(cps$marstatus=="Married")
cps$divorced <- 1*(cps$marstatus=="Divorced")
cps$widowed <- 1*(cps$marstatus=="Widowed")
cps$nevermarried <- 1*(cps$marstatus=="Never married")

# estimate Model I
model1 <- lm_robust(earnwk~married+divorced+widowed, data=cps)
model1

# estimate Model II
model2 <- lm_robust(earnwk~married+divorced+widowed+educ+exper+union+female, data=cps)
model2


results <- lm_robust(earnwk~educ+I(educ^2)+exper+union+female, data=cps)
results
summary(results)$r.squared


educ_vec <- c(10,12,14,16)
for (educ_star in educ_vec) {
    print(linear_combination(results, c(0,1,2*educ_star+1,0,0,0)))
}


options(digits=4)

results <- lm_robust(earnwk~educ+exper+I(educ*exper)+union+female, data=cps)
results
summary(results)$r.squared

options(digits=5)


exper_vec <- c(10,20)
for (exper_star in exper_vec) {
    print(linear_combination(results, c(0,1,0,exper_star,0,0)))
}

educ_vec <- c(12,14)
for (educ_star in educ_vec) {
    print(linear_combination(results, c(0,0,1,educ_star,0,0)))
}


options(digits=4)

# create the post-2005 indicator variable
sp500$post2005 <- c(rep(0,180),rep(1,184))

# least-squares estimation for models with post-2005 interaction variable
results_HD <- lm_robust(HD~post2005+IDX+I(post2005*IDX), data=sp500)
results_HD
results_LOW <- lm_robust(LOW~post2005+IDX+I(post2005*IDX), data=sp500)
results_LOW
results_BAC <- lm_robust(BAC~post2005+IDX+I(post2005*IDX), data=sp500)
results_BAC
results_COP <- lm_robust(COP~post2005+IDX+I(post2005*IDX), data=sp500)
results_COP

# partial effect (with standard error) of IDX for post-2005 period in BAC model
linear_combination(results_BAC,c(0,0,1,1))

options(digits=5)

Download

options(digits=4)

logwage_results <- lm_robust(I(log(earnwk))~educ+exper+union+female, data=cps)
logwage_results
logwage_results$r.squared

options(digits=5)


options(digits=4)

logwage_results <- lm_robust(I(log(earnwk))~educ+I(log(exper))+union+female, data=cps)
logwage_results
logwage_results$r.squared

options(digits=5)


options(digits=4)

lm_robust(I(log(cigsales))~I(log(cigtax)), data=cigdata)

options(digits=5)

Download

# input the births2021.csv dataset
births2021 <- read.csv("births2021.csv") 

# create the squared residuals
lmresults <- lm(bweight~age+hsgrad+somecoll+collgrad+married+smoke+male, data=births2021)
births2021$uhatsq <- (lmresults$residuals)^2

# MLR model with birthweight as the outcome
lm_robust(bweight~age+hsgrad+somecoll+collgrad+married+smoke+male, data=births2021)

# Var(U|X) model with squared residuals as the outcome
lm_robust(uhatsq~age+hsgrad+somecoll+collgrad+married+smoke+male, data=births2021)

Download

# input the widgets.csv dataset
widgets <- read.csv("widgets.csv")

# estimate the LPM model
lpm_results <- lm_robust(purchase~emailA+emailB, data=widgets)
lpm_results

# test the equality of the emailA and emailB slopes
linear_combination(lpm_results, c(0,1,-1))


options(digits=4)

union_lpm <- lm_robust(union~educ+exper+I(exper^2)+female, data=cps)
union_lpm

options(digits=5)


linear_combination(union_lpm, c(0,0,1,31,0))
linear_combination(union_lpm, c(0,0,1,41,0))

R Code from the Book

Chapter 1

Section 1.2

Section 1.3

Section 1.4

Section 1.5

Section 1.6

Section 1.7

Section 1.8

Section 1.9

Section 1.10

Section 1.11

Section 1.12

Chapter 2

Section 2.1

Section 2.2

Section 2.3

Chapter 3

Chapter 4

Section 4.2

Section 4.3

Chapter 5

Chapter 6

Section 6.1

Section 6.2

Section 6.3

Section 6.4

Section 6.5

Section 6.6

Section 6.7

Section 6.8

Chapter 7

Section 7.1

Section 7.2

Section 7.4

Chapter 8

Section 8.2

Section 8.3

Chapter 9

Section 9.1

Section 9.2

Section 9.3

Section 9.4

Section 9.5

Chapter 10

Section 10.2

Section 10.3

Section 10.5

Chapter 11

Section 11.1

Section 11.2

Section 11.3

Section 11.4

Chapter 12

Section 12.1

Section 12.2

Section 12.3

Chapter 13

Section 13.1

Section 13.2

Section 13.3

Chapter 14

Section 14.1

Chapter 15

Section 15.1

Section 15.2

Section 15.3

Chapter 16

Section 16.2

Appendix

Chapter 17

Section 17.2

Section 17.3

Section 17.4

Chapter 18

Section 18.2

Section 18.3

Section 18.4

Section 18.5