-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathNet Benefit analysis_How do people feel about AI.Rmd
253 lines (191 loc) · 7.82 KB
/
Net Benefit analysis_How do people feel about AI.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
---
title: "Net benefit analysis"
output: html_document
date: "2023-01-31"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
# setwd("..") # Use this line to set the working directory on your machine where the data is located.
```
```{r}
install.packages("tidyverse")
install.packages("ggplot2")
install.packages("haven")
install.packages("scales")
install.packages("readr")
install.packages("data.table")
install.packages("epiDisplay")
```
```{r}
library(tidyverse)
library(ggplot2)
library(haven)
library(scales)
library(readr)
library(data.table)
library(epiDisplay)
```
Read raw data
```{r}
df1 <- read_dta("262400371_Public Attitudes to AI_client version.dta")
# Prep data
# Remove ppt identifying as 'other' and remove 16 year old
df1 <- df1 %>%
filter(is.na(RS_Sex) | RS_Sex != 3) %>%
filter(CS_Age != "16")
```
Transform to data.table format
```{r}
df1_dt <- as.data.table(df1)
```
Now we will re-code all the variables of interest. We need to consider the "DK" and "Prefer not to say" as missing, in order to compute the net benefit.
For starters, let's list the variable names (for benefits) so we are on the same page:
BENA_MP: Face recognition unblock tech
BENA_BC: Face recognition border control
BENA_PS: Face recognition politicng
BENA_WEL: Eligibility welfare benefits
BENA_JOB: Elejibility jobs
BENA_CAN: Predict risk cancer
BENA_LOAN: Predict risk no pay loan
BENA_SMC: Social media ads consumer products
BENA_SMP: Social media ads political parties
BENA_VASS: Virtual speakers
BENA_VAH: Healthcare virtual assistant
BENA_RVC: Robotic vacuumn cleaner
BENA_RCA: Robotic care assistant
BENA_DC: Driverless cars
BENA_AW: Autonomous weapons
BENA_CCR: AI simulation for climate change research
BENA_VR: VR education
CONA_* is the same, but about the concerns.
```{r}
## We copy the dataset
df1_dt_1 = copy(df1_dt)
df1_dt_1 <- as.data.table(df1_dt)
## We create a list with each AI application identifier
list_variables <- c("MP", "BC", "PS", "WEL", "JOB", "CAN", "LOAN", "SMC", "SMP",
"VASS", "VAH", "RVC", "RCA", "DC", "AW", "CCR", "VR")
## Now we can re-code the variables for benefits and concerns, in order to add
## missing data and flip the scale.
for(i in (list_variables)) {
BENA <- paste0('BENA_',i)
CONA <- paste0('CONA_',i)
df1_dt_1[get(BENA)== 6, (BENA):= NA]
df1_dt_1[get(BENA)== 5, (BENA):= NA]
df1_dt_1[get(BENA)== 4, (BENA):= 0]
df1_dt_1[get(BENA)== 3, (BENA):= -1]
df1_dt_1[get(BENA)== 2, (BENA):= -2]
df1_dt_1[get(BENA)== 1, (BENA):= -3]
df1_dt_1[get(BENA)== -1, (BENA):= 1]
df1_dt_1[get(BENA)== -2, (BENA):= 2]
df1_dt_1[get(BENA)== -3, (BENA):= 3]
df1_dt_1[get(CONA)== 6, (CONA):= NA]
df1_dt_1[get(CONA)== 5, (CONA):= NA]
df1_dt_1[get(CONA)== 4, (CONA):= 0]
df1_dt_1[get(CONA)== 3, (CONA):= -1]
df1_dt_1[get(CONA)== 2, (CONA):= -2]
df1_dt_1[get(CONA)== 1, (CONA):= -3]
df1_dt_1[get(CONA)== -1, (CONA):= 1]
df1_dt_1[get(CONA)== -2, (CONA):= 2]
df1_dt_1[get(CONA)== -3, (CONA):= 3]
}
## We also need to re-code the variables for AWARENESS
for(i in (list_variables)) {
AWARE <- paste0('AWARE_',i)
df1_dt_1[get(AWARE)== 4, (AWARE):= NA]
df1_dt_1[get(AWARE)== 2, (AWARE):= 0]
df1_dt_1[get(AWARE)== 3, (AWARE):= 0]
}
## And EXPERIENCE
list_variables_2 <- c("MP", "BC", "PS", "WEL", "JOB", "CAN", "LOAN", "SMC", "SMP",
"VASS", "VAH", "RVC", "VR")
for(i in (list_variables_2)) {
EXP <- paste0('EXP_',i)
df1_dt_1[get(EXP)== 5, (EXP):= NA]
df1_dt_1[get(EXP)== 4, (EXP):= NA]
df1_dt_1[get(EXP)== 2, (EXP):= 1]
df1_dt_1[get(EXP)== 3, (EXP):= 0]
}
```
Now, following the same approach, we can create the net benefit variables
```{r}
for(i in (list_variables)) {
BENA <- paste0('BENA_',i)
CONA <- paste0('CONA_',i)
NETBEN <- paste0('NETBEN_',i)
df1_dt_1[is.na(get(BENA))==F & is.na(get(CONA))==F, (NETBEN) := get(BENA)-get(CONA)]
}
df1_dt_1[, tab1(NETBEN_CCR, graph=F)] ##2320 NA
#Double check in several ways
df1_dt_1[is.na(BENA_CCR)==T | is.na(CONA_CCR)==T, tab1(NETBEN_CCR, graph=F)]
df1_dt_1[, tab1(BENA_CCR-CONA_CCR, graph=F)]
df1_dt_1[, tab1(NETBEN_VASS, graph=F)] ##2179
#Double check in several ways
df1_dt_1[is.na(BENA_VASS)==T | is.na(CONA_VASS)==T, tab1(NETBEN_VASS, graph=F)]
df1_dt_1[, tab1(BENA_VASS-CONA_VASS, graph=F)]
```
We can look now at whethet the mean values make sense for each of the technologies
```{r}
df1_dt_1[, summ(NETBEN_MP, graph=F)]
df1_dt_1[, summ(NETBEN_BC, graph=F)]
df1_dt_1[, summ(NETBEN_PS, graph=F)]
df1_dt_1[, summ(NETBEN_WEL, graph=F)]
df1_dt_1[, summ(NETBEN_JOB, graph=F)]
df1_dt_1[, summ(NETBEN_CAN, graph=F)]
df1_dt_1[, summ(NETBEN_LOAN, graph=F)]
df1_dt_1[, summ(NETBEN_SMC, graph=F)]
df1_dt_1[, summ(NETBEN_SMP, graph=F)]
df1_dt_1[, summ(NETBEN_VASS, graph=F)]
df1_dt_1[, summ(NETBEN_VAH, graph=F)]
df1_dt_1[, summ(NETBEN_RVC, graph=F)]
df1_dt_1[, summ(NETBEN_RCA, graph=F)]
df1_dt_1[, summ(NETBEN_DC, graph=F)]
df1_dt_1[, summ(NETBEN_AW, graph=F)]
df1_dt_1[, summ(NETBEN_CCR, graph=F)]
df1_dt_1[, summ(NETBEN_VR, graph=F)]
```
Now it is time to produce a graph showing the net benefit of each of the 18 technologies, in a comparative way
```{r}
# FIRST WE CREATE THE WEIGHTED MEANS FOR EACH TECHNOLOGY
WeightedMeans <- lapply(df1_dt_1[ , c("NETBEN_MP", "NETBEN_BC", "NETBEN_PS", "NETBEN_WEL", "NETBEN_JOB", "NETBEN_CAN", "NETBEN_LOAN", "NETBEN_SMC", "NETBEN_SMP","NETBEN_VASS", "NETBEN_VAH", "NETBEN_RVC", "NETBEN_RCA", "NETBEN_DC", "NETBEN_AW", "NETBEN_CCR", "NETBEN_VR")], weighted.mean, w = df1_dt_1$PV25_Weight, na.rm = T)
# Now we put this in the correct format
WeightedMeans_df <- data.frame(WeightedMeans)
WeightedMeans_dt <- as.data.table(WeightedMeans_df)
# transpose
t_WeightedMeans_dt <- transpose(WeightedMeans_dt)
t_WeightedMeans_dt[, VarNames := colnames(WeightedMeans_dt)]
colnames(t_WeightedMeans_dt)[1] <- "Net Benefit"
t_WeightedMeans_df <- data.frame(t_WeightedMeans_dt)
#We re-code the names so they look nice in the graph
t_WeightedMeans_df_RC <- t_WeightedMeans_df %>%
mutate("VarNames" =
dplyr::recode(VarNames,
"NETBEN_MP" = "Face recognition: unblock",
"NETBEN_BC" = "Face recognition: border",
"NETBEN_PS" = "Face recognition: policing",
"NETBEN_WEL" = "Eligibility: welfare",
"NETBEN_JOB" = "Eligibility: jobs",
"NETBEN_CAN" = "Predict: risk cancer",
"NETBEN_LOAN" = "Predict: loan",
"NETBEN_SMC" = "Social media ads: product",
"NETBEN_SMP" = "Social media ads: political",
"NETBEN_VASS" = "Virtual assistant: speaker",
"NETBEN_VAH" = "Virtual assistsant: health",
"NETBEN_RVC" = "Robotics: vacuum",
"NETBEN_RCA" = "Robotics: care assistant",
"NETBEN_DC" = "Autonomous: cars",
"NETBEN_AW" = "Autonomous: weapons",
"NETBEN_CCR" = "Simulation: climate change",
"NETBEN_VR" = "Simulation: VR education"))
#And now we can plot
t_WeightedMeans_df_RC <- t_WeightedMeans_df_RC %>% mutate(VarNames = fct_reorder(VarNames, Net.Benefit))
p<-ggplot(data=t_WeightedMeans_df_RC, aes(x=Net.Benefit, y=VarNames)) +
geom_bar(stat="identity") +
ggtitle("Net benefit across AI applications") +
xlab("Net benefit") + ylab("AI application") + scale_x_continuous(limits=c(-3, 3), n.breaks = 6) +
geom_col(fill = "#FF6A58") +
theme(plot.title = element_text(hjust = 0.5))
p
```