Skip to content

Commit 973daf7

Browse files
authored
5 exploiter le endpoint range pour obtenir facilement les codes et libellés (#6)
* ajout des fonctions get_range et get_range_geo (travaux Christophe) * fix version de R à cause du pipe
1 parent 829c3a8 commit 973daf7

14 files changed

+10797
-3
lines changed

DESCRIPTION

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: melodi
22
Title: Retrieve Data from the Insee Melodi APIs
3-
Version: 0.2.0
3+
Version: 0.3.0
44
Authors@R: c(
55
person("Cédric", "Bobinec", email = "cedric.bobinec@insee.fr", role = c("aut", "cre")),
66
person(family = "Institut national de la statistique et des études économiques", role = "cph")
@@ -9,14 +9,16 @@ Description: A wrapper for the Insee Melodi APIs that returns data frames and me
99
License: MIT + file LICENSE
1010
Encoding: UTF-8
1111
Roxygen: list(markdown = TRUE)
12-
RoxygenNote: 7.3.2
12+
RoxygenNote: 7.3.3
1313
Language: fr
1414
Suggests:
1515
knitr,
1616
rmarkdown,
1717
testthat (>= 3.0.0),
1818
vcr (>= 0.6.0)
1919
Config/testthat/edition: 3
20+
Depends:
21+
R (>= 4.1.0)
2022
Imports:
2123
dplyr,
2224
tidyr,

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ export(get_file)
88
export(get_local_data)
99
export(get_local_data_by_com)
1010
export(get_metadata)
11+
export(get_range)
12+
export(get_range_geo)
1113
importFrom(magrittr,"%>%")

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# melodi 0.3.0
2+
- #5 : ajout de fonctions permettant de récupérer les libellés des codes associés à un jeu de données :
3+
`get_range_geo` (géographie) and `get_range` (toutes les autres dimensions)
4+
15
# melodi 0.2.0
26
- #16 : `get_all_data` : au lieu de construire la requête d'accès et les noms des fichiers dans le zip manuellement, récupération de l'URL via le catalogue (plus fiable et permet de gérer le cas où le CSV du DS est millésimé)
37
- #17 : `get_catalog` : ajout des millésimes géo et niveaux géographiques

R/get_range.R

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#' Get dataset range (codes and labels)
2+
#'
3+
#' Retrieves the list of dimensions and all their possible modalities values
4+
#' (codes and human-readable labels) for a given dataset.
5+
#' For GEO dimension, it is recommanded to used dedicated function : get_range_geo()
6+
#'
7+
#' @param ds_name dataset name
8+
#' @param base_url_melodi API Melodi URL - default production URL
9+
#' @param lang french or english labels - default french ("fr")
10+
#' @param exclusions_list exclude some dimensions for a faster and light result - default : "GEO", "TIME_PERIOD"
11+
#'
12+
#' @return A data frame with dimensions and modalities codes and labels
13+
#' @export
14+
#'
15+
#' @examples
16+
#' get_range("DS_POPULATIONS_REFERENCE")
17+
#' get_range(ds_name = "DS_EC_DECES", lang = "en")
18+
get_range <- function(
19+
ds_name,
20+
base_url_melodi = "https://api.insee.fr/melodi",
21+
lang = "fr",
22+
exclusions_list = c("GEO")
23+
) {
24+
# check parameters
25+
if (!lang %in% c("fr", "en")) {
26+
stop("lang must be : fr or en")
27+
}
28+
url <- paste0(base_url_melodi, "/range/", ds_name)
29+
30+
message("Request dataset range : ", url)
31+
32+
dataset <- httr2::request(url) |>
33+
httr2::req_perform() |>
34+
httr2::resp_body_json(simplifyVector = FALSE)
35+
36+
range <- dataset[["range"]]
37+
38+
# Keep or exclude TIME_PERIOD, exlude GEO
39+
range <- Filter(function(x) {
40+
concept_code <- x[["concept"]][["code"]]
41+
!(concept_code %in% exclusions_list)
42+
}, range)
43+
44+
# for null cases (English GEO labels...)
45+
safe_extract <- function(x) {
46+
if (is.null(x)) NA else x
47+
}
48+
49+
codebook_list <- list()
50+
51+
for (i in seq_along(range)) {
52+
# concepts returned by the API are effectively dimensions of the dataset
53+
dimension <- range[[i]][["concept"]][["code"]] |> safe_extract()
54+
dimension_label <- range[[i]][["concept"]][["label"]][[lang]] |> safe_extract()
55+
56+
values <- range[[i]][["values"]]
57+
58+
for (j in seq_along(values)) {
59+
value <- values[[j]][["code"]] |> safe_extract()
60+
value_label <- values[[j]][["label"]][[lang]] |> safe_extract()
61+
62+
entry <- list(
63+
DIM = dimension,
64+
DIM_LABEL = dimension_label,
65+
MOD = value,
66+
MOD_LABEL = value_label
67+
)
68+
69+
# Ajouter l'entrée au codebook
70+
codebook_list[[length(codebook_list) + 1]] <- entry
71+
}
72+
}
73+
74+
codebook_df <- do.call(rbind, lapply(codebook_list, as.data.frame))
75+
rownames(codebook_df) <- NULL
76+
77+
codebook_df <- codebook_df |>
78+
dplyr::arrange(dimension, value) # |>
79+
# dplyr::add_count(dimension, name = "value_count")
80+
81+
return(codebook_df)
82+
}

R/get_range_geo.R

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#' Get dataset's geography : codes and labels
2+
#'
3+
#' Retrieves the list of geo dimensions and all their possible values
4+
#' (codes and human-readable labels) for a given dataset.
5+
#'
6+
#' For other dimensions, refer to get_range()
7+
#'
8+
#' @param ds_name dataset name
9+
#' @param base_url_melodi API Melodi URL - default production URL
10+
#' @param lang french or english labels - default french ("fr")
11+
#'
12+
#' @return A data frame with columns `GEO_REF`, `GEO_OBJECT`, `GEO`, `GEO_LABEL`
13+
#' @export
14+
#'
15+
#' @examples
16+
#' get_range_geo("DS_POPULATIONS_REFERENCE")
17+
#' get_range("DS_TICM_PRATIQUES")
18+
get_range_geo <- function(
19+
ds_name,
20+
base_url_melodi = "https://api.insee.fr/melodi",
21+
lang = "fr"
22+
) {
23+
# check parameters
24+
if (!lang %in% c("fr", "en")) {
25+
stop("lang must be : fr or en")
26+
}
27+
url <- paste0(base_url_melodi, "/range/", ds_name)
28+
29+
message("Request dataset range : ", url)
30+
31+
dataset <- httr2::request(url) |>
32+
httr2::req_perform() |>
33+
httr2::resp_body_json(simplifyVector = FALSE)
34+
35+
range <- dataset[["range"]]
36+
37+
# Keep GEO only
38+
range <- Filter(function(x) {
39+
x[["concept"]][["code"]] == "GEO"
40+
}, range)
41+
42+
if (length(range) == 0) {
43+
stop("Error: 'GEO' dimension is not present in the dataset.")
44+
}
45+
46+
# for null cases (English GEO labels...)
47+
safe_extract <- function(x) {
48+
if (is.null(x)) NA else x
49+
}
50+
51+
codebook_list <- list()
52+
53+
for (i in seq_along(range)) {
54+
# concepts returned by the API are effectively dimensions of the dataset
55+
dimension <- range[[i]][["concept"]][["code"]] |> safe_extract()
56+
dimension_label <- range[[i]][["concept"]][["label"]][[lang]] |> safe_extract()
57+
58+
values <- range[[i]][["values"]] |> safe_extract()
59+
60+
for (j in seq_along(values)) {
61+
value <- values[[j]][["code"]] |> safe_extract()
62+
value_label <- values[[j]][["label"]][[lang]] |> safe_extract()
63+
value_id <- values[[j]][["id"]] |> safe_extract()
64+
65+
# Créer la liste sans geo_object si dimension_geo est FALSE
66+
codebook_list[[length(codebook_list) + 1]] <- list(
67+
dimension = dimension,
68+
dimension_label = dimension_label,
69+
value = value,
70+
value_label = value_label,
71+
value_id = value_id)
72+
}
73+
}
74+
75+
codebook_df <- do.call(rbind, lapply(codebook_list, as.data.frame))
76+
rownames(codebook_df) <- NULL
77+
78+
codebook_df <- codebook_df |>
79+
dplyr::arrange(dimension, value) |>
80+
tidyr::separate(value_id, into = c("GEO_REF", "GEO_OBJECT", "GEO"), sep = "-") |>
81+
dplyr::mutate(GEO_LABEL = value_label) |>
82+
dplyr::select(GEO_REF, GEO_OBJECT, GEO, GEO_LABEL) |>
83+
dplyr::arrange(GEO_OBJECT, GEO)
84+
85+
return(codebook_df)
86+
}

R/globals.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ globalVariables(
99
# get_all_data
1010
"COD_VAR", "LIB_VAR", "language", "accessURL", "filename",
1111
# get_data
12-
"value", "GEO"
12+
"value", "GEO",
13+
# get_range_geo
14+
"GEO_REF", "GEO_OBJECT", "GEO_LABEL"
1315
)
1416
)

man/get_range.Rd

Lines changed: 34 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_range_geo.Rd

Lines changed: 33 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)