rm(list = ls())
con = dbConnect(SQLite(),dbname = "../../data/data_anonymized.db");
df.data = dbReadTable(con,"causal_expectations")
dbDisconnect(con)
# filter out incompletes
df.data = df.data %>%
filter(status %in% 3:5)
# demographic information
df.demographics = df.data$datastring %>%
spread_values(age = jstring('questiondata','age'),
gender = jstring('questiondata','sex'),
feedback = jstring('questiondata','feedback')
) %>%
rename(participant = document.id) %>%
mutate(time = difftime(df.data$endhit,df.data$beginhit,units = 'mins'),
age = as.numeric(age)) %>%
bind_cols(df.data %>% select(experiment = codeversion))
# practice data: count how many of the 30 practice questions (3 for each of the 10 trials) participants
# answered correctly
df.practice = df.data$datastring %>%
as.tbl_json() %>%
spread_values(structure = jstring(questiondata, structure)) %>%
enter_object('data') %>%
gather_array('order') %>%
enter_object('trialdata') %>%
gather_array('index') %>%
append_values_string('values') %>%
mutate(index = rep(c('name', 'value'), nrow(.)/2)) %>%
filter(index == 'value') %>%
mutate(index = rep(c('id', 'blockA', 'blockB', 'answerA', 'answerB', 'answerOutcome'), nrow(.)/6)) %>%
rename(participant = document.id) %>%
left_join(df.demographics %>% select(participant, experiment), by = 'participant') %>%
spread(index, values) %>%
mutate_at(vars('blockA', 'blockB'), funs(ifelse(. == 'FALSE', 'no', 'yes'))) %>%
mutate(outcome = ifelse(structure == 'conjunctive' & (blockA == 'no' & blockB == 'no'), 'yes', 'no'),
outcome = ifelse(structure == 'disjunctive' & (blockA == 'no' | blockB == 'no'), 'yes', outcome),
outcome = ifelse(structure == 'xor' & (blockA != blockB), 'yes', outcome),
scoreA = ifelse(blockA == answerA, 1, 0),
scoreB = ifelse(blockB == answerB, 1, 0),
scoreOutcome = ifelse(outcome == answerOutcome, 1, 0)) %>%
group_by(participant,experiment,structure) %>%
summarise(score = sum(scoreA,scoreB,scoreOutcome)) %>%
ungroup()
# main data
df.long = df.data$datastring %>%
as.tbl_json() %>%
enter_object('questiondata') %>%
gather_object('name') %>%
append_values_string('value') %>%
as.data.frame() %>%
rename(participant = document.id) %>%
left_join(df.demographics %>% select(participant, experiment), by = 'participant') %>%
spread(name,value) %>%
mutate_at(vars('Awill', 'Bwill', 'counterfactual', 'pA', 'pB'), funs(as.numeric(.))) %>%
mutate(outcome = NA,
outcome = ifelse(experiment == 'experiment_1', 'positive','negative'),
outcome = factor(outcome, levels = c('negative', 'positive')),
balls = ifelse(experiment %in% paste0("experiment_", c(1, 3)), 'both balls go through', 'both balls are blocked'),
balls = factor(balls, levels = c('both balls go through', 'both balls are blocked')),
choice = 'normal',
choice = ifelse(pA == 0.2 & sentenceChoice == 'Acause', 'abnormal', choice),
choice = ifelse(pB == 0.2 & sentenceChoice == 'Bcause', 'abnormal', choice)
) %>%
left_join(df.practice %>% select(participant,score),by = 'participant') %>%
select(-age,-sex,-condition,-counterbalance) %>%
select(participant,experiment,outcome,balls,structure,pA,pB,Awill,Bwill,everything(),-feedback,feedback)
# apply exclusion criteria
df.long = df.long %>%
filter(score >= 28) %>% #exclude based on answers to practice trials
filter((pA == 0.2 & pB == 0.8 & (Awill + 20) < Bwill) |
(pA == 0.8 & pB == 0.2 & (Bwill + 20) < Awill) |
(pA == pB & abs(Awill-Bwill) < 20)) %>% #exclude based on probability ratings
filter((structure == 'conjunctive' & counterfactual < 50) |
(structure == 'disjunctive' & counterfactual > 50) |
(structure == 'xor' & counterfactual > 50)) #exclude based on counterfactual rating
# only use first 50 participants in each condition
df.long = df.long %>%
group_by(balls, structure) %>%
filter(row_number() <= 50) %>%
ungroup() %>%
arrange(participant)
# remove excluded participants from demographics
df.demographics = df.demographics %>%
filter(participant %in% unique(df.long$participant))
df.demographics %>%
summarise(age.mean = mean(age),
age.sd = sd(age),
n.participants = nrow(.),
n.female = sum(gender == 'female'),
time.mean = mean(time),
time.sd = sd(time)) %>%
mutate_at(vars(contains('age')), funs(round(.))) %>%
mutate_at(vars(contains('time')), funs(round(.,2))) %>%
kable()
age.mean | age.sd | n.participants | n.female | time.mean | time.sd |
---|---|---|---|---|---|
35 | 11 | 300 | 137 | 5.59 mins | 2.82 mins |
tmp = df.long %>%
mutate(success = ifelse(choice == 'normal',0,1)) %>%
group_by(balls, structure) %>%
summarise(success = sum(success),
n = n()
)
tmp2 = tmp %>%
rowwise() %>%
do(test = tidy(binom.test(.$success, .$n, p=0.5))) %>%
unnest()
df.stats = tmp %>%
bind_cols(tmp2) %>%
select(balls, structure, estimate, conf.low, conf.high, p.value) %>%
mutate_at(vars(estimate, contains('conf')), funs(round(.*100, 2))) %>%
mutate(result = paste0(estimate,'%, CI = [', conf.low, '%, ', conf.high, '%], p ',
ifelse(p.value < .001, '< .001', paste0('= ', round(p.value, 3)))))
df.stats %>%
select(balls, structure, result) %>%
kable()
balls | structure | result |
---|---|---|
both balls go through | conjunctive | 96%, CI = [86.29%, 99.51%], p < .001 |
both balls go through | disjunctive | 22%, CI = [11.53%, 35.96%], p < .001 |
both balls go through | xor | 94%, CI = [83.45%, 98.75%], p < .001 |
both balls are blocked | conjunctive | 54%, CI = [39.32%, 68.19%], p = 0.672 |
both balls are blocked | disjunctive | 12%, CI = [4.53%, 24.31%], p < .001 |
both balls are blocked | xor | 8%, CI = [2.22%, 19.23%], p < .001 |
# percentage of selecting the ball that normally gets blocked
df.selections = df.long %>%
mutate(structure = factor(structure,levels = c('conjunctive', 'disjunctive', 'xor'), labels = c('and', 'or', 'xor'))) %>%
group_by(balls, structure) %>%
summarise(percentage = sum(choice == 'abnormal')/n()) %>%
ungroup() %>%
arrange(balls, structure)
# functions
func_softmax = function(a, b, beta){
return(exp(a*beta)/rowSums(exp(cbind(a, b)*beta)))
}
func_outcome_probability = function(a, b, structure){
if (structure == 'or'){
out = 1-(1-a)*(1-b) #theoretical
# out = 0.9 #empirical
}else if(structure == 'and'){
out = (1-a)*(1-b) #theoretical
# out = 0.1 #empirical
}
else if (structure == 'xor'){
out = a*(1-b)+(1-a)*b #theoretical
# out = 0.8 #empirical
}
return(out)
}
func_icard = function(a, b, structure,balls){
if (balls == 1){ #both balls go through
if (structure == 'and'){
out = a*b+(1-a)
}else if (structure == 'or'){
out = a
}
else if (structure == 'xor'){
out = a*b+(1-a)
}
} else if (balls == 0){ #both balls are blocked
if (structure == 'and'){
out = 1-a
}else if (structure == 'or'){
out = a*b+(1-b)
}
else if (structure == 'xor'){
out = a*b+(1-b)
}
}
return(out)
}
# predictions
balls = c(0, 1) #balls go through or don't
structure = c('and', 'or', 'xor')
change = 0.1
a.prob = 0.8 #normally goes through
b.prob = 0.2 #normally is blocked
df.prediction = expand.grid(balls = balls, structure = structure) %>%
mutate(a.prob = a.prob,
b.prob = b.prob,
a.intervention = ifelse(balls == 0, a.prob + change, a.prob-change),
b.intervention = ifelse(balls == 0, b.prob + change, b.prob-change),
outcome = ifelse(balls == 0 | structure == 'xor', 0, 1)
) %>%
mutate_at(vars(contains('intervention')), funs(ifelse(. < 0, 0, .))) %>%
mutate_at(vars(contains('intervention')), funs(ifelse(. > 1, 1, .))) %>%
rowwise() %>%
# correspondence model
mutate(a.prob.actual = ifelse(balls == 1, a.prob, (1 - a.prob)),
b.prob.actual = ifelse(balls == 1, b.prob, (1 - b.prob)),
p.outcome = func_outcome_probability(a.prob, b.prob, structure),
p.outcome.actual = ifelse(outcome == 1, p.outcome,(1 - p.outcome)),
a.correspondent = 1 - abs(p.outcome.actual - a.prob.actual),
b.correspondent = 1 - abs(p.outcome.actual - b.prob.actual)
) %>%
# icard model
mutate(a.icard = func_icard(a.prob, b.prob, structure, balls),
b.icard = func_icard(b.prob, a.prob, structure, balls)
) %>%
ungroup() %>%
mutate(
balls = factor(balls, levels = c(1, 0), labels = c('both balls go through', 'both balls are blocked')),
outcome = factor(outcome, levels = c(0, 1), labels = c('negative', 'positive'))
) %>%
arrange(balls, structure)
# fit beta
func_fit_beta = function(data, cause, alternative){
# sum of squared error
func_sse = function(x){
prediction = func_softmax(cause, alternative, beta = x[1])
out = (data - prediction)^2
return(sum(out))
}
# fite beta
fit = optim(par = runif(1), fn = func_sse, method = "BFGS")
return(fit)
}
# save betas in a separate data frame
model.names = c('abnormal selection', 'correspondence', 'necessity and sufficiency')
df.betas = tibble(model = model.names,
beta = NA)
df.betas$beta[1] = func_fit_beta(data = df.selections$percentage, cause = rep(c(1,0),each=3), alternative = rep(c(0,1),each=3))$par
df.betas$beta[2] = func_fit_beta(data = df.selections$percentage, cause = df.prediction$b.correspondent, alternative = df.prediction$a.correspondent)$par
df.betas$beta[3] = func_fit_beta(data = df.selections$percentage, cause = df.prediction$b.icard, alternative = df.prediction$a.icard)$par
# add predictions with fitted betas
df.prediction = df.prediction %>%
mutate(abnormal_selection = func_softmax(balls == 'both balls go through', balls != 'both balls go through', df.betas$beta[1]),
correspondence = func_softmax(b.correspondent, a.correspondent, df.betas$beta[2]),
icard = func_softmax(b.icard, a.icard, df.betas$beta[3])
)
# display table with fit betas
df.betas %>%
mutate(beta = round(beta,1)) %>%
kable()
model | beta |
---|---|
abnormal selection | 1.0 |
correspondence | 3.2 |
necessity and sufficiency | 2.7 |
# function to get confidence intervals from an exact binomial test
getIntervals = function(nsuccess, ntrials){return(binom.test(nsuccess,ntrials)$conf.int[1:2])}
# data frame with data
df.plot = df.long %>%
select(participant, outcome, balls, structure, choice) %>%
mutate(choice = factor(choice,
levels = c('normal', 'abnormal'),
labels = c(' ball that normally goes through',
' ball that normally gets blocked')),
response = choice %>% as.numeric()-1,
structure = factor(structure,
levels = c('conjunctive', 'disjunctive', 'xor'),
labels = c('and', 'or', 'xor'))
) %>%
count(outcome, balls, structure, choice) %>%
group_by(outcome, balls, structure) %>%
mutate(percentage = n/sum(n),
groupsize = sum(n)) %>%
group_by(outcome, balls, structure, choice) %>%
mutate(ci_low = getIntervals(nsuccess = n, ntrials = groupsize)[1],
ci_high = getIntervals(nsuccess = n, ntrials = groupsize)[2]) %>%
mutate_at(vars(contains('ci_')), funs(ifelse(choice == ' ball that normally goes through', NA, .))) %>%
ungroup()
# data frame with predictions
df.plot.prediction = df.plot %>%
na.omit() %>%
left_join(df.prediction %>% select(balls, structure, outcome,
model.abnormal_selection = abnormal_selection,
model.correspondence = correspondence,
model.icard = icard
)) %>%
mutate(normal = ifelse(balls == 'both balls go through', 0, 1)) %>%
select(balls, structure, outcome, contains('model')) %>%
gather('index', 'value', -c(balls, structure, outcome)) %>%
mutate(index = factor(index,
levels = c('model.abnormal_selection',
'model.correspondence',
'model.icard'
),
labels = c('abnormal selection',
'correspondence',
'necessity and sufficiency'
)))
ggplot(df.plot, aes(x = structure, y = percentage))+
geom_bar(stat= "identity", aes(fill = choice), color = "black")+
geom_hline(yintercept = 0.5,linetype=2,size=1)+
facet_grid(~balls)+
geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0, color = "black", size = 1)+
geom_point(data = df.plot.prediction, aes(y = value, group = index, color = index), position = position_dodge(0.9), size = 4)+
scale_y_continuous(labels = percent_format())+
scale_fill_manual(values = c(' ball that normally gets blocked' = "#0C67E0",
' ball that normally goes through' = "#72E669"
))+
scale_color_grey(start = 0.8, end = 0)+
coord_cartesian(ylim = c(0, 1.01), x = c(0.5, 3.5), expand = F)+
labs(y = "percentage selected", x = 'causal structure')+
scale_x_discrete(labels = toupper(levels(df.plot$structure)))+
theme_bw()+
theme(text = element_text(size=24),
panel.grid = element_blank(),
legend.position = "bottom",
legend.direction = 'vertical',
legend.title = element_blank(),
panel.spacing.x = unit(1,"cm"),
axis.title.x = element_text(margin = margin(t = 0.5, r = 0, b = 0.1, l = 0,"cm")),
strip.background = element_blank())+
guides(fill = guide_legend(order = 1),
color = guide_legend(order = 2))