Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Results of sanitary controls in France can be found on data.gouv.fr however, only the running year is available… Thanks to @cquest@amicale.net we can access the archives since 2017.
< section id="config-and-data-cleaning" class="level2">Config and data cleaning
library(tidyverse) library(janitor) library(fs) library(sf) library(glue) library(ggspatial) library(rnaturalearth) library(patchwork) post_dir <- "" # "posts/where_and_when_not_to_eat_in_france/" # smoothing source(glue("{post_dir}fonctions_lissage.R"), encoding = "UTF-8") # move DROM source(glue("{post_dir}translation.R"), encoding = "UTF-8") # params ------------------------------------------------------------------ rayon <- 30000 # smoothing radius (m) pixel <- 2000 # pixel resolution output for smoothing (m) proj_liss <- "EPSG:3035" # an equal area projection
# 2017-2018 : yearly archive ; extract the last doy export # # http://data.cquest.org/alim_confiance/exports_alim_confiance_2017.7z # -> export_alimconfiance_2017-12-31.csv # http://data.cquest.org/alim_confiance/exports_alim_confiance_2018.7z # -> export_alimconfiance_2018-12-31.csv alim_dcq <- dir_ls(glue("{post_dir}data/data.cquest.org"), regexp = "\\.csv") |> map_dfr(read_csv2) |> clean_names() # 2019-2023 # archives at the end of each year + last export # # http://files.opendatarchives.fr/dgal.opendatasoft.com/archives/export_alimconfiance/20191231T053939Z%20export_alimconfiance.csv.gz # http://files.opendatarchives.fr/dgal.opendatasoft.com/archives/export_alimconfiance/20201231T083455Z%20export_alimconfiance.csv.gz # http://files.opendatarchives.fr/dgal.opendatasoft.com/archives/export_alimconfiance/20211231T083316Z%20export_alimconfiance.csv.gz # http://files.opendatarchives.fr/dgal.opendatasoft.com/archives/export_alimconfiance/20221231T150844Z%20export_alimconfiance.csv.gz # http://files.opendatarchives.fr/dgal.opendatasoft.com/export_alimconfiance.csv.gz alim_oda <- dir_ls(glue("{post_dir}data/files.opendatarchives.fr"), regexp = "\\.csv") |> map_dfr(read_csv2) |> clean_names() |> distinct() |> filter(date_inspection < "2023-03-01") # https://datanova.laposte.fr/explore/dataset/laposte_hexasmal/download?format=shp cp <- read_sf("~/data/laposte/laposte_hexasmal.shp") # Built from IGN Adminexpress # http://r.iresmi.net/wp-content/uploads/2023/04/admin_express_simpl_2022.zip dep <- read_sf("~/data/adminexpress/adminexpress_cog_simpl_000_2022.gpkg", layer = "departement") |> translater_drom(ajout_zoom_pc = TRUE) reg_fx <- read_sf("~/data/adminexpress/adminexpress_cog_simpl_000_2022.gpkg", layer = "region") |> filter(insee_reg > "06") fx <- read_sf("~/data/adminexpress/adminexpress_cog_simpl_000_2022.gpkg", layer = "region") |> filter(insee_reg > "06") |> summarise() |> st_transform(proj_liss) # projection names nom_proj_liss <- str_extract(st_crs(fx)$wkt, '(?<=PROJCRS\\[").*(?=",)') nom_proj_legale <- str_extract(st_crs(dep)$wkt, '(?<=PROJCRS\\[").*(?=",)') # basemap countries <- ne_countries(50, returnclass = "sf") |> filter(admin != "France")
alim_raw <- bind_rows(alim_dcq, alim_oda) |> mutate(insee_dep = case_when(str_sub(code_postal, 1, 2) >= "97" ~ str_sub(code_postal, 1, 3), str_sub(code_postal, 1, 3) %in% c("200", "201") ~ "2A", str_sub(code_postal, 1, 3) %in% c("202", "206") ~ "2B", TRUE ~ str_sub(code_postal, 1, 2))) # trying to add coordinates to unknown locations using postal codes # only 5800 on 7500 georef <- cp |> group_by(code_postal) |> summarise(geometry = first(geometry)) |> inner_join(alim_raw |> filter(is.na(geores), !is.na(code_postal)), by = "code_postal") # build sf object alim <- alim_raw |> filter(!is.na(geores)) |> separate(geores, c("y", "x"), sep = ",") |> st_as_sf(coords = c("x", "y"), crs = "EPSG:4326") |> bind_rows(georef) alim |> write_sf(glue("{post_dir}results/alim.gpkg"))
Exploring data
First a global view of the dataset:
alim |> st_drop_geometry() |> count(annee = year(date_inspection), semaine = isoweek(date_inspection)) |> ggplot(aes(semaine, n, group = annee, color = as_factor(annee))) + geom_point() + geom_smooth(span = 0.4) + scale_y_continuous(labels = scales::label_number(big.mark = " ")) + labs(title = "Contrôles officiels sanitaires", subtitle = "France - 2017-2023", color = "année") + guides(color = guide_legend(reverse=TRUE))
About 800 controls per week, except during the lock-down in 2020, and a slightly lower control pressure in 2021 and 2022.
alim |> st_drop_geometry() |> group_by(mois = ymd(paste0("2020", str_pad(month(date_inspection), 2, "left", "0"), "01"))) |> summarise(pcent_neg = sum(synthese_eval_sanit %in% c("A améliorer", "A corriger de manière urgente")) / n()) |> ggplot(aes(mois, pcent_neg)) + geom_col() + scale_x_date(date_labels = "%b", date_breaks = "2 months", expand = c(0.01, 0.01)) + scale_y_continuous(breaks = scales::breaks_pretty(), labels = scales::percent_format(decimal.mark = ",", suffix = " %")) + labs(title = "Résultats des contrôles officiels sanitaires", subtitle = "France - 2017-2023", y = "% mauvais", caption = glue("http://r.iresmi.net/ données : Alim'confiance via opendatarchives.fr mauvais = 'À améliorer' ou 'À corriger de manière urgente'")) + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.ticks.x = element_blank(), plot.caption = element_text(size = 7))
Poor results (To be improved or To be corrected urgently) are stable at around 6 %, except a recent spike in 2023?
alim |> st_drop_geometry() |> group_by(annee = year(date_inspection)) |> summarise(pcent_neg = sum(synthese_eval_sanit %in% c("A améliorer", "A corriger de manière urgente")) / n()) |> ggplot(aes(annee, pcent_neg)) + geom_col() + scale_y_continuous(breaks = scales::breaks_pretty(), labels = scales::label_percent(decimal.mark = ",", suffix = " %")) + labs(title = "Résultats des contrôles officiels sanitaires", subtitle = "France - 2017-2023", x = "année", y = "% mauvais", caption = glue("http://r.iresmi.net/ données : Alim'confiance via opendatarchives.fr mauvais = 'À améliorer' ou 'À corriger de manière urgente'")) + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.ticks.x = element_blank(), plot.caption = element_text(size = 7))
It seems that poor results increase during the year, from June to November.
This surprising periodic phenomenon is also visible by day:
alim |> st_drop_geometry() |> group_by(date_inspection = as.Date(date_inspection)) |> summarise(pcent_neg = sum(synthese_eval_sanit %in% c("A améliorer", "A corriger de manière urgente")) / n()) |> ggplot(aes(date_inspection, pcent_neg)) + geom_point() + geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k = 30)) + scale_x_date(labels = ~ format(.x, "%b\n%Y"), date_breaks = "6 months") + scale_y_continuous(breaks = scales::breaks_pretty(), labels = scales::percent_format(decimal.mark = ",", suffix = " %")) + labs(title = "Résultats des contrôles officiels sanitaires", subtitle = "France - 2017-2023", x = "jour", y = "% mauvais", caption = glue("http://r.iresmi.net/ données : Alim'confiance via opendatarchives.fr mauvais = 'À améliorer' ou 'À corriger de manière urgente'")) + theme(plot.caption = element_text(size = 7))
So for the « when », it is: not in summer or autumn.
< section id="maps" class="level2">Maps
What about the « where »? It seems you also could be careful in some départements…
bilan_dep <- alim |> st_drop_geometry() |> group_by(insee_dep) |> summarise(pcent_neg = sum(synthese_eval_sanit %in% c("A améliorer", "A corriger de manière urgente")) / n() * 100) |> arrange(desc(pcent_neg)) dep |> left_join(bilan_dep, by = "insee_dep") |> ggplot() + geom_sf(aes(fill = pcent_neg), color = "white", linewidth = 0.05) + geom_sf(data = reg_fx, fill = NA, color = "white", linewidth = .1) + scale_fill_viridis_b("% mauvais", breaks = c(2, 5, 10, 15), na.value = NA) + labs(title = "Résultats des contrôles officiels sanitaires", subtitle = "France - 2017-2023", caption = glue("http://r.iresmi.net/ données : Alim'confiance via opendatarchives.fr mauvais = 'À améliorer' ou 'À corriger de manière urgente' proj. métropole {nom_proj_legale} fond carto. : d'après IGN adminexpress")) + theme_void() + theme(plot.caption = element_text(size = 7))
Not good in Guadeloupe, Guyane, Réunion and the southern lower Seine valley, west of Paris.
Can we see more in details? Using a 30 km kernel smoothing:
lissage_alim <- function(annee = NULL) { if (is.null(annee)) { alim <- alim |> filter(insee_dep < "97") |> mutate(poids = 1) annee_titre <- glue("{year(min(alim$date_inspection))}-{year(max(alim$date_inspection))}") } else { alim <- alim |> filter(insee_dep < "97", year(date_inspection) == annee) |> mutate(poids = 1) annee_titre <- annee } smooth_total <- alim |> lissage(poids, rayon, pixel, zone = fx) smooth_mauvais <- alim |> filter(synthese_eval_sanit %in% c("A améliorer", "A corriger de manière urgente")) |> lissage(poids, bandwidth = rayon, resolution = pixel, zone = fx) pcent_mauvais <- smooth_mauvais / smooth_total * 100 raster::writeRaster(pcent_mauvais, glue("{post_dir}results/alimconfiance_pcent_mauvais_{annee_titre}_fx.tif"), overwrite = TRUE) p <- ggplot() + geom_sf(data = countries, color = "grey", fill = "white") + layer_spatial(as(pcent_mauvais, "SpatRaster")) + geom_sf(data = dep, fill = NA, color = "white", linewidth = .05) + geom_sf(data = reg_fx, fill = NA, color = "white", linewidth = .1) + scale_fill_viridis_b("% mauvais", breaks = c(2, 5, 10, 15), na.value = NA) + coord_sf(xlim = st_bbox(pcent_mauvais)[c(1, 3)], ylim = st_bbox(pcent_mauvais)[c(2, 4)], crs = proj_liss) + theme_void() + theme(plot.caption = element_text(size = 7)) if (!is.null(annee)) { p + labs(title = glue("{annee_titre} - {format(nrow(alim), big.mark = ' ')} inspections")) } else { p + annotation_scale(height = unit(0.1, "cm"), bar_cols = c("grey", "white"), text_col = "grey", line_col = "grey") + labs(title = "Résultats des contrôles officiels sanitaires", subtitle = glue("France métropolitaine - {annee_titre}"), caption = glue("http://r.iresmi.net/ données : Alim'confiance via opendatarchives.fr mauvais = 'À améliorer' ou 'À corriger de manière urgente' lissage à {rayon / 1000} km proj. {nom_proj_liss} fond carto. : d'après IGN adminexpress")) } } lissage_alim()
It confirms some hot-spots west of Paris, in Alsace, in Indre, Cher, Alpes-Maritimes and between Gironde and Landes. You are safer in Paris and Bretagne…
Has it changed?
2018:(year(max(alim$date_inspection)) - 1) |> map(lissage_alim) |> reduce(`+`) + plot_layout(ncol = 3, guides = "collect") + plot_annotation( title = "Résultats des contrôles officiels sanitaires - France métropolitaine", caption = glue("http://r.iresmi.net/ données : Alim'confiance via opendatarchives.fr mauvais = 'À améliorer' ou 'À corriger de manière urgente' lissage à {rayon / 1000} km proj. {nom_proj_liss} fond carto. : d'après IGN adminexpress")) & theme(plot.caption = element_text(size = 7), plot.title = element_text(size = 10))
No real trend…
< !-- -->R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.