From 71a3c9fc173d25dbc2f5a1608ea9aa89388a4337 Mon Sep 17 00:00:00 2001 From: hrbrmstr Date: Mon, 18 Mar 2019 13:32:17 -0400 Subject: [PATCH] nginx apache --- R/apache-httpd.R | 86 ++++++++++++++++++++++++++----------- R/nginx.R | 84 +++++++++++++++++++----------------- man/apache_httpd_version_history.Rd | 2 +- 3 files changed, 106 insertions(+), 66 deletions(-) diff --git a/R/apache-httpd.R b/R/apache-httpd.R index c1ee453..9fc2fb4 100644 --- a/R/apache-httpd.R +++ b/R/apache-httpd.R @@ -1,6 +1,6 @@ #' Retrieve Apache httpd Version Release History #' -#' Reads to build a data frame of +#' Reads releases to build a data frame of #' Apache `httpd` version release numbers and dates with semantic version #' strings parsed and separate fields added. The data frame is also arranged in #' order from lowest version to latest version and the `vers` column is an @@ -10,32 +10,68 @@ #' @export apache_httpd_version_history <- function() { - ap <- readr::read_lines("https://archive.apache.org/dist/httpd/") + page <- gh::gh("/repos/apache/httpd/tags") - apd <- xml2::read_html(paste0(ap[grepl('"(httpd-|apache_)[[:digit:]]+\\.[[:digit:]]+\\.[[:digit:]]+\\.tar', ap)], collapse="\n")) + purrr::map_df( + page, ~{ + list( + vers = .x$name, + rls_date = gh::gh(.x$commit$url)$commit$author$date # kinda dangerous + ) + }) -> xdf - rvest::html_text(apd) %>% - stri_split_lines() %>% - unlist() %>% - stri_match_first_regex("([[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2})") %>% - .[,2] -> rls_dates + sgh_next <- purrr::safely(gh::gh_next) # to stop on gh_next() error - rvest::html_nodes(apd, "a") %>% - rvest::html_attr("href") %>% - stri_match_first_regex("[-_]([[:digit:]]+\\.[[:digit:]]+\\.[[:digit:]]+)\\.tar") %>% - .[,2] -> vers + while(TRUE) { + page <- sgh_next(page) + if (is.null(page$result)) break; + page <- page$result + dplyr::bind_rows( + xdf, + purrr::map_df( + page, ~{ + list( + vers = .x$name, + rls_date = gh::gh(.x$commit$url)$commit$author$date # kinda dangerous + ) + }) + ) -> xdf + } - dplyr::tibble( - vers = vers, - rls_date = rls_dates - ) %>% - dplyr::distinct(vers, .keep_all=TRUE) %>% - mutate(rls_date = as.Date(rls_date)) %>% - mutate(rls_year = lubridate::year(rls_date)) %>% - dplyr::bind_cols( - semver::parse_version(.$vers) %>% - dplyr::as_tibble() + dplyr::mutate(xdf, vers = stri_replace_first_fixed(vers, "v", "")) %>% + dplyr::mutate(rls_date = as.Date(stri_sub(rls_date, 1, 10))) %>% + dplyr::mutate(rls_year = lubridate::year(rls_date)) %>% + tidyr::separate(vers, c("major", "minor", "patch", "build"), remove=FALSE) %>% + dplyr::mutate(prerelease = ifelse( + stri_detect_regex(build, "[[:alpha:]]"), + stri_extract_first_regex(build, "[[:alpha:]][[:alnum:]]+"), + "" + )) %>% + dplyr::mutate(build = stri_replace_first_regex(build, "[[:alpha:]][[:alnum:]]+", "")) %>% + dplyr::mutate_at(.vars=c("major", "minor", "patch", "build"), .funs=c(as.integer)) %>% + dplyr::add_row( + vers = "2.0.0", + rls_date = as.Date("2001-02-09"), + rls_year = 2001, + major = 2L, minor = 0L, patch = 0L, + prerelease = NA_character_, build = NA ) %>% - dplyr::arrange(major, minor, patch) %>% - dplyr::mutate(vers = factor(vers, levels = vers)) -} + dplyr::add_row( + vers = "1.3.37", + rls_date = as.Date("2006-07-27"), + rls_year = 2006, + major = 1L, minor = 3L, patch = 37L, + prerelease = NA_character_, build = NA + ) %>% + dplyr::add_row( + vers = "1.3.41", + rls_date = as.Date("2009-10-03"), + rls_year = 2009, + major = 1L, minor = 3L, patch = 41L, + prerelease = NA_character_, build = NA + ) %>% + dplyr::arrange(rls_date) %>% + dplyr::mutate(vers = factor(vers, levels=vers)) %>% + dplyr::select(vers, rls_date, rls_year, major, minor, patch, prerelease, build) -> out + +} \ No newline at end of file diff --git a/R/nginx.R b/R/nginx.R index fe25369..bd46cc6 100644 --- a/R/nginx.R +++ b/R/nginx.R @@ -10,51 +10,55 @@ #' @export nginx_version_history <- function() { - nginx_changes_url <- - "https://raw.githubusercontent.com/nginx/nginx/master/docs/xml/nginx/changes.xml" + page <- gh::gh("/repos/nginx/nginx/tags") - doc <- suppressWarnings(xml2::read_xml(nginx_changes_url)) + purrr::map_df( + page, ~{ + list( + vers = sub("^release-", "", .x$name), + rls_date = gh::gh(.x$commit$url)$commit$author$date # kinda dangerous + ) + }) -> xdf - dplyr::tibble( - vers = rvest::xml_nodes(doc, xpath="//changes") %>% - xml2::xml_attr("ver"), - ts = rvest::xml_nodes(doc, xpath="//changes") %>% - xml2::xml_attr("date") %>% - as.Date(), - year = lubridate::year(ts) - ) -> c1 + sgh_next <- purrr::safely(gh::gh_next) # to stop on gh_next() error - c( - readr::read_lines("https://nginx.org/en/CHANGES-1.0"), - readr::read_lines("https://nginx.org/en/CHANGES-1.2"), - readr::read_lines("https://nginx.org/en/CHANGES-1.4"), - readr::read_lines("https://nginx.org/en/CHANGES-1.6"), - readr::read_lines("https://nginx.org/en/CHANGES-1.8"), - readr::read_lines("https://nginx.org/en/CHANGES-1.10"), - readr::read_lines("https://nginx.org/en/CHANGES-1.12"), - readr::read_lines("https://nginx.org/en/CHANGES-1.14") - ) -> nl + while(TRUE) { + page <- sgh_next(page) + if (is.null(page$result)) break; + page <- page$result + dplyr::bind_rows( + xdf, + purrr::map_df( + page, ~{ + list( + vers = sub("^release-", "", .x$name), + rls_date = gh::gh(.x$commit$url)$commit$author$date # kinda dangerous + ) + }) + ) -> xdf + } - read.csv( - stringsAsFactors = FALSE, - text = paste0( - c("v,d", nl[grepl("^Changes", nl)] %>% - gsub("Changes with nginx ", "", .) %>% - gsub("[[:space:]]{3,}", ",", .)), collapse="\n") - ) %>% - dplyr::as_tibble() %>% - dplyr::mutate(d = lubridate::dmy(d)) %>% - dplyr::select(vers=1, ts=2) %>% - dplyr::mutate(year = lubridate::year(ts)) -> c2 - - dplyr::bind_rows(c1, c2) %>% - dplyr::distinct() %>% - dplyr::bind_cols( - semver::parse_version(.$vers) %>% - dplyr::as_tibble() + dplyr::mutate(xdf, vers = stri_replace_first_fixed(vers, "v", "")) %>% + dplyr::mutate(rls_date = as.Date(stri_sub(rls_date, 1, 10))) %>% + dplyr::mutate(rls_year = lubridate::year(rls_date)) %>% + tidyr::separate(vers, c("major", "minor", "patch", "build"), remove=FALSE) %>% + dplyr::mutate(prerelease = ifelse( + stri_detect_regex(build, "[[:alpha:]]"), + stri_extract_first_regex(build, "[[:alpha:]][[:alnum:]]+"), + "" + )) %>% + dplyr::mutate(build = stri_replace_first_regex(build, "[[:alpha:]][[:alnum:]]+", "")) %>% + dplyr::mutate_at(.vars=c("major", "minor", "patch", "build"), .funs=c(as.integer)) %>% + dplyr::bind_rows( + tibble( + vers = c("1.2.1"), + rls_date = as.Date(c("2012-06-07")), + rls_year = lubridate::year(rls_date), + major = 1L, minor = 2L, patch = 1L, + prerelease = NA, build = NA + ) ) %>% dplyr::arrange(major, minor, patch) %>% dplyr::mutate(vers = factor(vers, levels=vers)) %>% - dplyr::rename(rls_date = ts, rls_year = year) - + dplyr::select(vers, rls_date, rls_year, major, minor, patch, prerelease, build) -> out } \ No newline at end of file diff --git a/man/apache_httpd_version_history.Rd b/man/apache_httpd_version_history.Rd index 2992de5..d758a2a 100644 --- a/man/apache_httpd_version_history.Rd +++ b/man/apache_httpd_version_history.Rd @@ -7,7 +7,7 @@ apache_httpd_version_history() } \description{ -Reads \url{https://archive.apache.org/dist/httpd/} to build a data frame of +Reads \url{https://github.com/apache/httpd} releases to build a data frame of Apache \code{httpd} version release numbers and dates with semantic version strings parsed and separate fields added. The data frame is also arranged in order from lowest version to latest version and the \code{vers} column is an