forked from NicoRiedel/OpenAccessWhitelist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathJournal_Positive_List_functions.R
136 lines (112 loc) · 4.52 KB
/
Journal_Positive_List_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#currency exchange rates for conversion to EUR
get_exchange_rate <- function(currency_to)
{
exchange_rates <- paste0("http://api.exchangeratesapi.io/v1/latest?access_key=56a7ab0a1771e5918692ae48cfd32c0a&format=1") |>
readLines(warn = FALSE) |>
jsonlite::fromJSON() |>
purrr::pluck("rates")
exchange_rate <- exchange_rates |>
purrr::pluck(currency_to)
return(exchange_rate)
}
#transforms logical columns to yes/no columns
logical_to_yes_no <- function(logic_vec)
{
yes_no <- c("no", "yes")
return(yes_no[logic_vec + 1])
}
#there are some regionally focused journals that will not be that relevant
#for our scientists in Berlin
#non-comprehensive list of regional terms to filter those regional journals
#some of those excluded journals might still have a more international focus
#but I did not check all journals individually
is_regional_journal <- function(journal_name)
{
regional_terms <- c("Korean", "Canadian", "Oman", "Libyan", "Iran",
"Macedonian", "Chinese", "India", "Scandinavi",
"Saudi", "Arab", "Asia", "Nigeria", "Yoga",
"Israel", "Australasian", "Bosnian", "Qaboos",
"Japan", "Ethiopian", "Africa", "Korea",
"Slovenian", "Polish", "Brazilian", "Malaysia",
"Egyptian", "Alexandria", "Bali", "Indonesia",
"Nepal", "Pakistan", "Jundishapur", "South Eastern",
"Anatolia", "Rambam Maimonides", "Sri Lanka",
"Bangabandhu Sheikh Mujib", "Irish", "Zahedan",
"Middle East", "Bangladesh", "Motriz", "Nordic",
"Medicinski", "Medyczne", "Dānish", "Instituto",
"Egyptian", "Balkan", "Turkish", "Istanbul",
"Brasileira", "Upsala", "stanbul", "Sahara")
grep_results <- sapply(regional_terms, grepl, x=as.character(journal_name))
return(any(grep_results))
}
#splits the subject categories and selects only the most narrow subcategories
subject_simplification <- function(subject)
{
subject_categories <- strsplit(subject, "|", fixed = TRUE)[[1]]
subject_subcategories <- strsplit(subject_categories, ":")
#if there are several different main categories take the most detailed category for each of them
if(length(subject_categories) > 1) {
subjects_simplified <- sapply(subject_subcategories, tail, n=1)
} else {
#if there is only one main category, take the two most detailed subcategories
#unless there are only two hiracy levels, then only take one
if(length(subject_subcategories) > 2) {
subjects_simplified <- tail(subject_subcategories[[1]], n=2)
subjects_simplified <- rev(subjects_simplified) #reverse order such that the more specific is the first element
} else {
subjects_simplified <- c(tail(subject_subcategories[[1]], n=1), "")
}
}
subjects_simplified <- trimws(subjects_simplified)
return(subjects_simplified)
}
#define separate function instead of using regular join because the
#scopus eISSN as well as pISSN has to be checked against both DOAJ pISSN/eISSN
get_scopus_var <- function(eISSN, pISSN, scopus_data, varname)
{
#try al matches of pISSN/eISSN
if(!is.na(eISSN)) {
ee_var <- get_var_sub(eISSN, scopus_data$eISSN, scopus_data[[varname]])
ep_var <- get_var_sub(eISSN, scopus_data$pISSN, scopus_data[[varname]])
} else {
ee_var <- NA
ep_var <- NA
}
if(!is.na(pISSN)) {
pe_var <- get_var_sub(pISSN, scopus_data$eISSN, scopus_data[[varname]])
pp_var <- get_var_sub(pISSN, scopus_data$pISSN, scopus_data[[varname]])
} else {
pe_var <- NA
pp_var <- NA
}
#get nonzero entry
var_vec <- c(ee_var, ep_var, pe_var, pp_var)
var_vec <- var_vec[!is.na(var_vec)]
if(length(var_vec) == 0) {
var <- NA
} else {
var <- var_vec[1]
}
return(var)
}
#obtains var value for one combination of eISSN/pISSN
get_var_sub <- function(ISSN, scopus_ISSN, scopus_var)
{
SRJ_idx <- match(ISSN, scopus_ISSN)
if(is.na(SRJ_idx)) {
var <- NA
} else {
var <- scopus_var[[SRJ_idx]]
}
return(var)
}
#calculates JIF quartiles (beware that quartile definition is ambiguous)
calculate_quartiles <- function(n)
{
q_vec <- rep("", n)
q_vec[1:ceiling(n/4)] <- "Q1"
q_vec[(ceiling(n/4) + 1):ceiling(n/2)] <- "Q2"
q_vec[(ceiling(n/2) + 1):ceiling(n*3/4)] <- "Q3"
q_vec[(ceiling(n*3/4) + 1):n] <- "Q4"
return(q_vec)
}