Catégories
R

Post to Mastodon from R

Mastodon is a decentralized microblogging platform.

We can analyse some data and directly post our findings to a Mastodon instance.

For example we can plot the different TLDs used by the Mastodon Fediverse and publish it on mastodon.cloud.

library(tidyverse)
library(httr)
library(jsonlite)
library(scales)
library(ggthemes)
library(ggrepel)

# data source
url <- "https://instances.mastodon.xyz/"

mastodon <- GET(paste0(url, "instances.json")) %>% 
  content(as = "text") %>% 
  fromJSON() %>% 
  select(-info) %>% 
  filter(!str_detect(name, "((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)")) %>% # remove IP
  mutate(statuses = as.integer(statuses),
         name_clean = str_remove(str_to_lower(name), "/$|\\.$|:\\d*$"), # remove trailing dots, slashes and port number
         tld = str_replace_all(name_clean, "(.*?)(\\.[a-z0-9]*)$", "\\2")) %>% # extract TLD
  filter(! tld %in% c("0")) # remove special cases

download_time <- format(Sys.time(), tz = "UTC", usetz = TRUE)

# saveRDS(mastodon, paste0("data/mastodon_", download_time, ".rds"))

nbi <- nrow(mastodon)
nbu <- sum(mastodon$users, na.rm = TRUE)

# cleaning and
# plot the TLDs (top level domains) of Mastondon  instances -----------------------

mastodon %>% 
  filter(! str_detect(tld, "xn--")) %>% 
  group_by(tld) %>% 
  summarise(nb = n(),
            users = sum(users, na.rm = TRUE),
            statuses = sum(statuses, na.rm = TRUE)) %>% 
  filter(nb > 0 & users > 0 & statuses > 0) %>% {
  ggplot(., aes(users, statuses, label = tld, size = nb, color = nb)) +
    geom_text_repel(segment.size = 0, force = 0.5) +
    #geom_point(alpha = 0.4) +
    labs(title = "Mastodon Top Level Domains",
         subtitle = paste(nrow(.), "TLDs for", nbi, "instances having", nbu, "users -", download_time),
         x = "users", 
         y = "statuses",
         caption = paste("r.iresmi.net\ndata from", url, "\nIDN removed ; positions adapted for clarity")) +
    scale_x_log10(labels = comma) +
    scale_y_log10(labels = comma) +
    scale_color_viridis_c(trans = "log", name = "# of\ninstances", labels = function(x) round(x, 0)) +
    scale_size(range = c(3, 10), guide = FALSE) +
    theme(plot.caption = element_text(size = 7)) }

(plot_file <- paste0("img/mastodon_tld_", download_time, ".png")) %>% 
  ggsave(width = 15, height = 10, units = "cm", dpi = 100, scale = 2)

Before posting we have to create an authorization token once.

# Registration
# run this part once and write down client_id and client_secret,
# you can then comment this part

r <- POST(paste0(instance , "api/v1/apps"),
     body = list(client_name = "my_application_name",
                 redirect_uris = "urn:ietf:wg:oauth:2.0:oob",
                 scopes = "write"))
stop_for_status(r)
apps <- content(r)

paste("client_id :", apps[["client_id"]])
paste("client_secret", apps[["client_secret"]])

# end of registration ; set your client id/secret below

Login

# Your instance, login and password

instance <- "https://mastodon.cloud/"
user <- "*******"
pass <- "*******"

client_id <- "********************"
client_secret <- "******************"

# Login -------------------------------------------------------------------

r <- POST(paste0(instance , "oauth/token"),
          body = list(client_id = client_id,
                      client_secret = client_secret,
                      grant_type = "password",
                      username = user,
                      password = pass,
                      scope = "write"))
stop_for_status(r)
token <- content(r)

We can then post the created image and its accompanying status.

# post image
r <- POST(paste0(instance , "api/v1/media"),
          add_headers(Authorization = paste("Bearer", token[["access_token"]])),
          body = list(file = upload_file(plot_file)))
stop_for_status(r)
media <- content(r)

# post status
r <- POST(paste0(instance , "/api/v1/statuses"),
          add_headers(Authorization = paste("Bearer", token[["access_token"]])),
          body = list(status = paste0("#Mastodon Top Level Domains\n#Rstats\n", media[["text_url"]]),
                      "media_ids[]" = media[["id"]]))
stop_for_status(r)
statuses <- content(r)
Catégories
R

Generate multiple language version plots

The use case is to create the same plot in different languages. I used this technique for Wikipedia plots.

We are going to build a list containing all translations, we will then loop over each language, generating and saving the plot.

# Mauna Loa atmospheric CO2 change
# multi language plot for Wikipedia

# Required packages
library(tidyverse)
library(gridExtra)
library(scales)
library(lubridate)
library(Hmisc)

# Translations ------------------------------------------------------------

language <- list(
  en_US = list(
    locale_lc_time = "en_US.UTF-8",
    title = expression(paste("Monthly mean ", CO[2], " concentration ")),
    caption = paste("Data : R. F. Keeling, S. J. Walker, S. C. Piper and A. F. Bollenbacher\nScripps CO2 Program (http://scrippsco2.ucsd.edu). Accessed ", Sys.Date()),
    x = "Year",
    y = expression(paste(CO[2], " fraction in dry air (", mu, "mol/mol)")),
    x2 = "Month",
    y2 = expression(atop(paste(CO[2], " fraction in dry air (", mu, "mol/mol)"), "Departure from yearly average")),
    title2 = "Seasonal variation"
  ),
  fr_FR = list(
    locale_lc_time = "fr_FR.UTF-8",
    title = expression(paste("Moyenne mensuelle de la concentration de ", CO[2])),
    caption = paste("données : R. F. Keeling, S. J. Walker, S. C. Piper et A. F. Bollenbacher\nScripps CO2 Program (http://scrippsco2.ucsd.edu). Accédé le", Sys.Date()),
    x = "année",
    y = expression(paste("fraction de ", CO[2], " dans l'air sec (", mu, "mol/mol)")),
    x2 = "mois",
    y2 = expression(atop(paste("fraction de ", CO[2], " dans l'air sec (", mu, "mol/mol)"), "en écart à la moyenne annuelle")),
    title2 = "Variation saisonnière"
  ),
  de_DE = list(
    locale_lc_time = "de_DE.UTF-8",
    title = expression(paste("Monatliche durchschnittliche ", CO[2], "-Konzentration")),
    caption = paste("Datei : R. F. Keeling, S. J. Walker, S. C. Piper und A. F. Bollenbacher\nScripps CO2 Program (http://scrippsco2.ucsd.edu). Zugänglich am", Sys.Date()),
    x = "Jahr",
    y = expression(paste(CO[2], "-Anteil in trockener Luft (", mu, "mol/mol)")),
    x2 = "Monate",
    y2 = expression(atop(paste(CO[2], "-Anteil in trockener Luft (", mu, "mol/mol)"), "Abweichung vom Jahresmittel")),
    title2 = "Monatliche Variation"
  ),
  es_ES = list(
    locale_lc_time = "es_ES.UTF-8",
    title = expression(paste("Media mensual de la concentración de ", CO[2])),
    caption = paste("dato : R. F. Keeling, S. J. Walker, S. C. Piper y A. F. Bollenbacher\nScripps CO2 Program (http://scrippsco2.ucsd.edu). Visitada", Sys.Date()),
    x = "Año",
    y = expression(paste("Fraccion de ", CO[2],  " en aire secco (", mu, "mol/mol)")),
    x2 = "Mes",
    y2 = expression(atop(paste("Fraccion de ", CO[2],  " en aire secco (", mu, "mol/mol)"), "Desviación de la media anual")),
    title2 = "Variación mensual"
  ),
  cs_CZ = list(
    locale_lc_time = "cs_CZ.UTF-8",
    title = expression(paste("Průměrné měsíční koncentrace oxidu uhličitého")),
    caption = paste("data : R. F. Keeling, S. J. Walker, S. C. Piper a A. F. Bollenbacher\nScripps CO2 Program (http://scrippsco2.ucsd.edu). Přístupné", Sys.Date()),
    x = "rok",
    y = expression(paste("koncentrace ", CO[2], " v suchém vzduchu (", mu, "mol/mol)")),
    x2 = "měsíc",
    y2 = expression(atop(paste("koncentrace ", CO[2], " v suchém vzduchu (", mu, "mol/mol)"), "odchylka od ročního průměru")),
    title2 = "Měsíční změna (průměrná roční odchylka)"
  ),
  nn_NO = list(
    locale_lc_time = "nn_NO.UTF-8",
    title = expression(paste("Gjennomsnittlig månedlig ", CO[2], "-konsentrasjon")),
    caption = paste("data : R. F. Keeling, S. J. Walker, S. C. Piper og A. F. Bollenbacher\nScripps CO2 Program (http://scrippsco2.ucsd.edu). Vist", Sys.Date()),
    x = "År",
    y = expression(paste(CO[2],"-andel i tørr luft (", mu, "mol/mol)")),
    x2 = "Måned",
    y2 = expression(atop(paste(CO[2],"-andel i tørr luft (", mu, "mol/mol)"),
                         "Avvik fra årlig gjennomsnitt")),
    title2 = "Årlig variasjon"
  )
)


# Data --------------------------------------------------------------------

# http://scrippsco2.ucsd.edu/data/atmospheric_co2/primary_mlo_co2_record
# used during US gov shutdown
co2ml <- read_csv("http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/monthly/monthly_in_situ_co2_mlo.csv",
                  col_names = c("year", "month", "xls_date", "decimal",
                                "co2", "co2_seas_adj", "fit", "fit_seas_adj",
                                "co2_filled", "co2_filled_seas_adj"),
                  col_types = "iiiddddddd",
                  skip = 57,
                  na = "-99.99",
                  comment = "\"") %>% 
  group_by(year) %>% 
  mutate(year_mean = mean(co2_filled, na.rm = TRUE),
         delta = co2_filled - year_mean,
         vdate = ymd(paste0("2015-", month, "-01")))

# Generate the plot for each language -------------------------------------

for (l in names(language)) {
  message(l)
  current <- language[[l]]
  
  # format the date in local names
  Sys.setlocale("LC_TIME", current$locale_lc_time)
  
  # main plot
  p1 <- ggplot(co2ml, aes(decimal, co2_filled)) + 	
    geom_line(color = "pink") +
    geom_point(color = "red", size = 0.6) +
    stat_smooth(span = 0.1) +
    scale_x_continuous(breaks = pretty_breaks()) +
    scale_y_continuous(breaks = pretty_breaks(4), minor_breaks = pretty_breaks(8)) +
    labs(
      x = current$x,
      y = current$y,
      title = current$title,
      subtitle = paste("Mauna Loa", min(co2ml$year), "-", max(co2ml$year)),
      caption = current$caption) +
    theme_bw() +
    theme(plot.caption = element_text(size = 7))
  
  # inset plot
  p2 <- ggplot(co2ml, aes(vdate, delta)) +
    geom_hline(yintercept = 0) +
    stat_smooth(span = 0.4, se = FALSE) +
    stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 0.3) + 
    scale_x_date(breaks = pretty_breaks(4), minor_breaks = pretty_breaks(12), labels = date_format("%b")) +
    labs(
      x = current$x2,
      y = current$y2,
      title = current$title2) +
    theme_bw()
  
  # merge the plots and export in SVG
  p1 + annotation_custom(grob = ggplotGrob(p2), xmin = 1957, xmax = 1991, ymin = 361, ymax = 412)
  ggsave(file = paste("co2_mauna_loa", l, Sys.Date(), "wp.svg", sep = "_"), width = 20, height = 20, units = "cm", device = svg)
  
}

Our plots as a nice gallery :

Last version of the code on Gitlab.

Catégories
R

Mapping multiple trends with confidence

A tutorial to compute trends by groups and plot/map the results

We will use dplyr::nest to create a list-column and will apply a model (with purrr::map) to each row, then we will extract each slope and its p value with map and broom::tidy.

Setup

library(tidyverse)
library(httr)
library(sf)
library(readxl)
library(janitor)
library(fs)
library(broom)
library(scales)
library(rnaturalearth)
library(rnaturalearthdata)
library(rgeos)

fk <- function(x) format(x, big.mark = " ")

Data

Map data. Départements polygons from OSM.

if(!file_exists("departements-20140528-100m.shp")) {
  f <- tempfile()
  GET("http://osm13.openstreetmap.fr/~cquest/openfla/export/departements-20140528-100m-shp.zip",
      write_disk(f))
  unzip(f)
}

dep <- st_read("departements-20140528-100m.shp")

Population data by département 1990-2008 from INSEE.

if(!file_exists("pop.xls")) {
  GET("https://www.insee.fr/fr/statistiques/fichier/2012713/TCRD_004.xls", 
      write_disk("pop.xls"))
}
  
pop <- read_xls("pop.xls", skip = 3) %>% 
  clean_names() %>% 
  head(-1) %>% 
  rename(insee_dep = x1,
         dep = x2,
         x2018 = x2018_p) %>% 
  select(-4) %>% 
  gather(annee, pop, 3:7) %>% 
  mutate(annee = as.integer(str_replace(annee, "x", "")))

Population trends for each département

pop_model <- function(df) {
  lm(pop ~ annee, data = df)
}

trends <- pop %>% 
  group_by(insee_dep, dep) %>% 
  nest() %>% 
  mutate(model = map(data, pop_model),
         glance = map(model, glance),
         coeff = map(model, tidy, conf.int = TRUE)) 

Plot

trends %>% 
  unnest(coeff) %>% 
  filter(term == "annee",
         !insee_dep %in% c("F", "M")) %>% 
  ggplot(aes(fct_reorder(insee_dep, estimate), estimate,
             color = if_else(p.value <= .05,
                             if_else(estimate >= 0, "positive", "négative"),
                             "n.s."))) +
    geom_point() +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = .5) +
    scale_color_manual(name = "Tendance",
                       values = c("positive" = "red",  "n.s." = "lightgray", "négative" = "blue")) +
    scale_y_continuous(labels = fk) +
    labs(title = "Évolution des populations départementales françaises",
         subtitle = "1990-2018",
         x = "dép.",
         y = bquote(Delta[population] ~ (habitant %.% an^{-1})),
         caption = "r.iresmi.net\ndonnées INSEE") +
    guides(color = guide_legend(reverse = TRUE)) +
    theme(plot.caption = element_text(size = 7))
Only 9 départements have a clearly decreasing population

Map

pop_dep <- trends %>% 
  unnest(coeff) %>% 
  filter(term == "annee") %>% 
  right_join(dep, by = c("insee_dep" = "code_insee")) %>%
  left_join(filter(pop, annee == 2018, !insee_dep %in% c("F", "M")), by = "insee_dep") %>% 
  st_as_sf() %>% 
  st_transform(2154) 

moy_fr <- trends %>% 
  unnest(coeff) %>% 
  filter(term == "annee",  !insee_dep %in% c("F", "M")) %>% 
  summarise(mean(estimate, na.rm = TRUE)) %>% 
  pull()

world <- ne_countries(scale = "medium", returnclass = "sf") %>% 
  filter(continent == "Europe") %>% 
  st_transform(2154) 

pop_dep %>% 
  ggplot() +
    geom_sf(data = world, fill = "grey97", color = 0) +
    geom_sf(color = "lightgrey", fill = "floralwhite", size = .2) +
    stat_sf_coordinates(data = filter(pop_dep, p.value > .05),
                        aes(size = pop),
                        fill = "lightgrey", color = "lightgrey", shape = 21, alpha = 0.8) +
    stat_sf_coordinates(data = filter(pop_dep, p.value <= .05),
                        aes(size = pop, fill = estimate),
                        color = "lightgrey", shape = 21, alpha = 0.8) +
    coord_sf(xlim = c(100000, 1200000), ylim = c(6000000, 7200000)) +
    scale_fill_gradient2(name = bquote(atop(displaystyle(atop(Delta ~ population[1990-2018],
                                                               (habitant %.% an^{-1}))), 
                                             moy. == .(round(moy_fr)) ~ ", en gris : n.s.")),
                          labels = fk,
                          low = "darkblue", mid = "white", high = "darkred", midpoint = moy_fr) +
    scale_size_area(name = "habitants (2018)", labels = fk, max_size = 10) +
    labs(title = "Évolution des populations départementales françaises",
         subtitle = "Métropole, 1990-2018",
         x = "",
         y = "",
         caption = "r.iresmi.net\ndonnées INSEE 2018\nfond cartographique : contributeurs Openstreetmap 2014\nNaturalearth") +
    theme_bw() +
    theme(plot.caption = element_text(size = 7),
          legend.text.align = 1)
Population is growing stronger in Paris suburbs and in peripheral southern départements

Catégories
R

Bibliography with knitr : cite your references and packages

A tutorial to use your Zotero references with {rmarkdown} : easily add the citations, automatically generate your bibliography, including the references of the packages used in your analysis.

It’s a good practice to cite the R packages you use in your analysis. However it can be cumbersome to maintain the list of your package’s references in Zotero while the packages used can change when you update your script. Here we use the automatic updates of Zotero to generate our main bibliography and we auto-generate the package bibliography and then merge them.

Prerequisites

Install R, Rstudio, Zotero.

Install Better BibTex for Zotero.

To easily add citations in the rmarkdown text in RStudio, we can also add the {citr} package.

Build the Zotero library

Add references to Zotero.

Create a collection for your project and drag and drop the references needed in this collection.

Zotero

Export the collection (BibLatex format). Tick the update option.

Export

Save it in your R project folder as zotero.bib. Each time you modify this Zotero collection, the zotero.bib file will be updated. Check in the Zotero settings :

Preferences

You can also add a personalized CSL file (used to format references) in your R project folder (see the repository). Below I chose ISO-690 (author-date, no abstract, French).

In your rmarkdown text

In the setup chunk, we load the required packages, generate the .bib file for the packages, merge it with our Zotero bibliography and add the packages as nocite (they are not cited in the text but have to appear in the references).

We must run the setup chunk at least once to generate the files and check which citation to use for the packages that provide multiple references.

We can then carry on our analysis with R chunks and some text with citations.

The citations can be added with [@key] where key is the citation key generated by Better Bibtex and found at the top of the Zotero info panel. Or we can use {citr} which will help us find it (Addins menu in Rstudio > Insert citations).

The bibliography will be added at the end of the document.

---
title: "Bibliography with knitr : cite your references and packages"
author: "r.iresmi.net"
date: "`r Sys.Date()`"
output: html_document
bibliography: biblio.bib
csl: iso690-author-date-fr-no-abstract.csl
link-citations: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

# all your necessary packages
packages <- c("tidyverse", 
             "knitr",     
             "bibtex"
             # add your other packages here     
             )  

# install if needed and load packages
to_install <- packages[! packages %in% installed.packages()[, "Package"]]
if (length(to_install)) { 
  install.packages(to_install, repos = "https://cloud.r-project.org")
}
invisible(lapply(packages, library, character.only = TRUE))

# get the packages version 
packages_versions <- function(p) {
  paste(packageDescription(p)$Package, packageDescription(p)$Version, sep = " ")
}

# Get the packages references
write.bib(packages, "packages.bib")

# merge the Zotero references and the packages references
cat(paste("% Automatically generated", Sys.time()), "\n% DO NOT EDIT",
    { readLines("zotero.bib") %>% 
      paste(collapse = "\n") },
    { readLines("packages.bib") %>% 
      paste(collapse = "\n") },
    file = "biblio.bib",
    sep = "\n")


# Some packages reference keys must be modified
# (their key is not the package name)
# check in packages.bib
packages_keys <- packages %>% 
  enframe() %>% 
  mutate(value = case_when(value == "knitr" ~ "@knitr1",
                           #value == "boot" ~ "@boot1",
                           TRUE ~ paste0("@", value)))
```

---
nocite: |
  @Svenssonguideornitho2007
  `r paste(packages_keys$value, collapse = "\n  ")`
---

Any text...

Data analyzed with R [@r_development_core_team_r:_2010][^r_version].

[^r_version]: `r version$version.string`, with : `r paste(lapply(sort(packages), packages_versions), collapse = ", ")`.

We can easily cite references (menu Addins / Insert citations) after having run the setup chunk to create the bibliography file [@moreno_measuring_2017].

Lorem Ipsum

## References

After knitting we get the file :

knitted

See the result.