boB Rudis
5 years ago
commit
8fb58d1b3a
10 changed files with 1997 additions and 0 deletions
@ -0,0 +1,38 @@ |
|||||
|
# ---> R |
||||
|
# History files |
||||
|
.Rhistory |
||||
|
.Rapp.history |
||||
|
|
||||
|
# Session Data files |
||||
|
.RData |
||||
|
|
||||
|
# Example code in package build process |
||||
|
*-Ex.R |
||||
|
|
||||
|
# Output files from R CMD build |
||||
|
/*.tar.gz |
||||
|
|
||||
|
# Output files from R CMD check |
||||
|
/*.Rcheck/ |
||||
|
|
||||
|
# RStudio files |
||||
|
.Rproj.user/ |
||||
|
|
||||
|
# produced vignettes |
||||
|
vignettes/*.html |
||||
|
vignettes/*.pdf |
||||
|
|
||||
|
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 |
||||
|
.httr-oauth |
||||
|
|
||||
|
# knitr and R markdown default cache directories |
||||
|
/*_cache/ |
||||
|
/cache/ |
||||
|
|
||||
|
# Temporary files created by R markdown |
||||
|
*.utf8.md |
||||
|
*.knit.md |
||||
|
|
||||
|
# Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html |
||||
|
rsconnect/ |
||||
|
|
@ -0,0 +1,21 @@ |
|||||
|
MIT License |
||||
|
|
||||
|
Copyright (c) <year> <copyright holders> |
||||
|
|
||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
of this software and associated documentation files (the "Software"), to deal |
||||
|
in the Software without restriction, including without limitation the rights |
||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
copies of the Software, and to permit persons to whom the Software is furnished |
||||
|
to do so, subject to the following conditions: |
||||
|
|
||||
|
The above copyright notice and this permission notice (including the next |
||||
|
paragraph) shall be included in all copies or substantial portions of the |
||||
|
Software. |
||||
|
|
||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |
||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS |
||||
|
OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
||||
|
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF |
||||
|
OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
@ -0,0 +1,3 @@ |
|||||
|
# cran-mirror-security |
||||
|
|
||||
|
Exploring CRAN claims of the "security" of CRAN mirrors |
@ -0,0 +1,15 @@ |
|||||
|
Version: 1.0 |
||||
|
|
||||
|
RestoreWorkspace: Default |
||||
|
SaveWorkspace: Default |
||||
|
AlwaysSaveHistory: Default |
||||
|
|
||||
|
EnableCodeIndexing: Yes |
||||
|
UseSpacesForTab: Yes |
||||
|
NumSpacesForTab: 2 |
||||
|
Encoding: UTF-8 |
||||
|
|
||||
|
RnwWeave: Sweave |
||||
|
LaTeX: pdfLaTeX |
||||
|
|
||||
|
StripTrailingWhitespace: Yes |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
@ -0,0 +1,200 @@ |
|||||
|
library(xml2) |
||||
|
library(httr) |
||||
|
library(curl) |
||||
|
library(stringi) |
||||
|
library(urltools) |
||||
|
library(ipinfo) |
||||
|
library(openssl) |
||||
|
library(furrr) |
||||
|
library(vershist) |
||||
|
library(ggalt) |
||||
|
library(ggbeeswarm) |
||||
|
library(hrbrthemes) |
||||
|
library(tidyverse) |
||||
|
|
||||
|
mdoc <- xml2::read_xml("~/data/mirrors.html", as_html = TRUE) |
||||
|
|
||||
|
xml_find_all(mdoc, ".//td/a[contains(@href, 'https')]") %>% |
||||
|
xml_attr("href") %>% |
||||
|
unique() -> ssl_mirrors |
||||
|
|
||||
|
ssl_mirrors |
||||
|
|
||||
|
plan(multiprocess) |
||||
|
|
||||
|
dl_cert <- possibly(openssl::download_ssl_cert, NULL) |
||||
|
HEAD_ <- possibly(httr::HEAD, NULL) |
||||
|
dig <- possibly(curl::nslookup, NULL) |
||||
|
query_ip_ <- possibly(ipinfo::query_ip, NULL) |
||||
|
|
||||
|
ssl_mirrors %>% |
||||
|
future_map(~{ |
||||
|
host <- domain(.x) |
||||
|
ip <- dig(host, TRUE) |
||||
|
ip_info <- if (length(ip)) query_ip_(ip) else NULL |
||||
|
list( |
||||
|
host = host, |
||||
|
cert = dl_cert(host), |
||||
|
head = HEAD_(.x), |
||||
|
ip = ip, |
||||
|
ip_info = ip_info |
||||
|
) |
||||
|
}) -> mir_dat |
||||
|
|
||||
|
saveRDS(mir_dat, "~/data/mir-dat.rds") |
||||
|
|
||||
|
str(mir_dat, 3) |
||||
|
|
||||
|
maps::map("world", ".", exact = FALSE, plot = FALSE, fill = TRUE) %>% |
||||
|
fortify() %>% |
||||
|
filter(region != "Antarctica") -> world |
||||
|
|
||||
|
map_chr(mir_dat, ~.x$ip_info$loc) %>% |
||||
|
stri_split_fixed(pattern = ",", n = 2, simplify = TRUE) %>% |
||||
|
as_tibble() %>% |
||||
|
mutate_all(list(as.numeric)) -> wheres_cran |
||||
|
|
||||
|
ggplot() + |
||||
|
ggalt::geom_cartogram( |
||||
|
data = world, map = world, aes(long, lat, map_id=region), |
||||
|
color = ft_cols$gray, size = 0.125 |
||||
|
) + |
||||
|
geom_point(data = wheres_cran, aes(V2, V1), color = "white") + |
||||
|
ggalt::coord_proj("+proj=wintri") + |
||||
|
labs( |
||||
|
x = NULL, y = NULL |
||||
|
) + |
||||
|
theme_ft_rc(grid="") + |
||||
|
theme(axis.text = element_blank()) |
||||
|
|
||||
|
map_df(mir_dat, ~{ |
||||
|
tibble( |
||||
|
host = .x$host, |
||||
|
s_issuer = .x$cert[[1]]$issuer %||% NA_character_, |
||||
|
i_issuer = .x$cert[[2]]$issuer %||% NA_character_, |
||||
|
algo = .x$cert[[1]]$algorithm %||% NA_character_, |
||||
|
names = .x$cert[[1]]$alt_names %||% NA_character_, |
||||
|
nm_ct = length(.x$cert[[1]]$alt_names), |
||||
|
key_size = .x$cert[[1]]$pubkey$size %||% NA_integer_ |
||||
|
) |
||||
|
}) -> certs |
||||
|
|
||||
|
count(certs, names, sort=TRUE) |
||||
|
|
||||
|
distinct(certs, host, algo, key_size) %>% |
||||
|
count(algo, key_size, sort=TRUE) |
||||
|
|
||||
|
distinct(certs, host, i_issuer) %>% |
||||
|
count(i_issuer, sort = TRUE) %>% |
||||
|
print(n = 28) |
||||
|
|
||||
|
count(certs, host, sort=TRUE) %>% |
||||
|
ggplot() + |
||||
|
geom_quasirandom(aes("", n)) |
||||
|
|
||||
|
filter(certs, host == "cran.cnr.berkeley.edu") %>% |
||||
|
select(names) |
||||
|
|
||||
|
filter(certs, host == "cran.rapporter.net") %>% |
||||
|
select(names) |
||||
|
|
||||
|
filter(certs, host == "cran-r.c3sl.ufpr.br") %>% |
||||
|
select(names) |
||||
|
|
||||
|
filter(certs, host == "fourdots.com") %>% |
||||
|
select(names) |
||||
|
|
||||
|
map(mir_dat, ~.x$head$headers) %>% |
||||
|
compact() %>% |
||||
|
map_df(~{ |
||||
|
flatten_df(.x) %>% |
||||
|
gather(name, value) |
||||
|
}, .id = "site_num") -> hdrs |
||||
|
|
||||
|
count(hdrs, name, sort=TRUE) %>% |
||||
|
print(n=31) |
||||
|
|
||||
|
filter(hdrs, name == "server") %>% |
||||
|
separate(value, c("kind", "version"), sep="/", fill="right", extra="merge") -> svr |
||||
|
|
||||
|
count(svr, kind, sort=TRUE) |
||||
|
|
||||
|
apache_httpd_version_history() %>% |
||||
|
arrange(rls_date) %>% |
||||
|
mutate( |
||||
|
vers = factor(as.character(vers), levels = as.character(vers)) |
||||
|
) -> apa_all |
||||
|
|
||||
|
filter(svr, kind == "Apache") %>% |
||||
|
filter(!is.na(version)) %>% |
||||
|
mutate(version = stri_replace_all_regex(version, " .*$", "")) %>% |
||||
|
count(version) %>% |
||||
|
separate(version, c("maj", "min", "pat"), sep="\\.", convert = TRUE, fill = "right") %>% |
||||
|
mutate(pat = ifelse(is.na(pat), 1, pat)) %>% |
||||
|
mutate(v = sprintf("%s.%s.%s", maj, min, pat)) %>% |
||||
|
mutate(v = factor(v, levels = apa_all$vers)) %>% |
||||
|
arrange(v) -> apa_vers |
||||
|
|
||||
|
filter(apa_all, vers %in% apa_vers$v) %>% |
||||
|
arrange(rls_date) %>% |
||||
|
group_by(rls_year) %>% |
||||
|
slice(1) %>% |
||||
|
ungroup() %>% |
||||
|
arrange(rls_date) -> apa_yrs |
||||
|
|
||||
|
ggplot() + |
||||
|
geom_blank( |
||||
|
data = apa_vers, aes(v, n) |
||||
|
) + |
||||
|
geom_segment( |
||||
|
data = apa_yrs, aes(vers, 0, xend=vers, yend=Inf), |
||||
|
linetype = "dotted", size = 0.25, color = "white" |
||||
|
) + |
||||
|
geom_segment( |
||||
|
data = apa_vers, aes(v, n, xend=v, yend=0), |
||||
|
color = ft_cols$gray, size = 8 |
||||
|
) + |
||||
|
geom_label( |
||||
|
data = apa_yrs, aes(vers, Inf, label = rls_year), |
||||
|
family = font_rc, color = "white", fill = "#262a31", size = 4, |
||||
|
vjust = 1, hjust = 0, nudge_x = 0.01, label.size = 0 |
||||
|
) + |
||||
|
scale_y_comma(limits = c(0, 15)) + |
||||
|
labs( |
||||
|
x = "Apache Version #", y = "# Servers" |
||||
|
) + |
||||
|
theme_ft_rc(grid="Y") + |
||||
|
theme(axis.text.x = element_text(family = "mono", color = "white")) |
||||
|
|
||||
|
cran_mirror_other_things <- readRDS("~/data/cran-mirror-other-things.rds") |
||||
|
|
||||
|
distinct(cran_mirror_other_things, ip, port) %>% |
||||
|
count(ip, sort = TRUE) |
||||
|
|
||||
|
distinct(cran_mirror_other_things, ip, port) %>% |
||||
|
count(port, sort=TRUE) |
||||
|
|
||||
|
distinct(cran_mirror_other_things, ip, port) %>% |
||||
|
filter(ip == "104.25.94.23") |
||||
|
|
||||
|
distinct(cran_mirror_other_things, ip, port) %>% |
||||
|
filter(ip == "143.107.10.17") |
||||
|
|
||||
|
distinct(cran_mirror_other_things, ip, port) %>% |
||||
|
filter(ip == "137.208.57.37") |
||||
|
|
||||
|
cran_recog <- readRDS("~/data/cran-recog.rds") |
||||
|
|
||||
|
count(cran_recog, ip, sort=TRUE) |
||||
|
|
||||
|
distinct(cran_recog, ip, cpe) %>% |
||||
|
filter(ip == "192.75.96.254") |
||||
|
|
||||
|
distinct(cran_recog, ip, cpe) %>% |
||||
|
count(ip, sort=TRUE) |
||||
|
|
||||
|
cran_dns <- readRDS("~/data/cran-dns.rds") |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
Loading…
Reference in new issue