diff --git a/.Rbuildignore b/.Rbuildignore index c9a5c92..19cdbd8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -19,3 +19,4 @@ ^CRAN-RELEASE$ ^appveyor\.yml$ ^tools$ +^LICENSE\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 9bc44cc..ff03e52 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pwhois Type: Package -Title: pwhois title goes here otherwise CRAN checks fail +Title: Issue 'WHOIS' Queries and Process Various 'WHOIS' Resoponses Version: 0.1.0 Date: 2021-02-13 Authors@R: c( @@ -8,17 +8,24 @@ Authors@R: c( comment = c(ORCID = "0000-0001-5670-2640")) ) Maintainer: Bob Rudis -Description: A good description goes here otherwise CRAN checks fail. +Description: WHOIS (pronounced as the phrase "who is") is a query and response protocol + that is widely used for querying databases that store the registered users + or assignees of an Internet resource, such as a domain name, an IP address + block or an autonomous system, but is also used for a wider range of other + information. The protocol stores and delivers database content in a human- + readable format. The current iteration of the WHOIS protocol was drafted + by the Internet Society, and is documented in RFC 3912. Tools are provided + to issue query requests and process query responses. URL: https://git.rud.is/hrbrmstr/pwhois BugReports: https://git.rud.is/hrbrmstr/pwhois/issues Encoding: UTF-8 -License: AGPL +License: MIT + file LICENSE Suggests: covr, tinytest Depends: R (>= 3.5.0) Imports: - httr, + stringi, jsonlite Roxygen: list(markdown = TRUE) RoxygenNote: 7.1.1 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f51d1b2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2021 +COPYRIGHT HOLDER: Bob Rudis diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..c2304a0 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2021 Bob Rudis + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index 5b4b9ae..290b1d0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,6 @@ # Generated by roxygen2: do not edit by hand -import(httr) -importFrom(jsonlite,fromJSON) +export(tidy_cymru) +export(tidy_pwhois) +export(whois) +import(stringi) diff --git a/R/pwhois-package.R b/R/pwhois-package.R index 00e88bf..e51c8ef 100644 --- a/R/pwhois-package.R +++ b/R/pwhois-package.R @@ -1,9 +1,17 @@ -#' ... -#' +#' pwhois +#' +#' WHOIS (pronounced as the phrase "who is") is a query and response protocol +#' that is widely used for querying databases that store the registered users +#' or assignees of an Internet resource, such as a domain name, an IP address +#' block or an autonomous system, but is also used for a wider range of other +#' information. The protocol stores and delivers database content in a human- +#' readable format. The current iteration of the WHOIS protocol was drafted +#' by the Internet Society, and is documented in RFC 3912. Tools are provided +#' to issue query requests and process query responses. +#' #' @md #' @name pwhois #' @keywords internal #' @author Bob Rudis (bob@@rud.is) -#' @import httr -#' @importFrom jsonlite fromJSON +#' @import stringi "_PACKAGE" diff --git a/R/tidy.R b/R/tidy.R new file mode 100644 index 0000000..799bde2 --- /dev/null +++ b/R/tidy.R @@ -0,0 +1,52 @@ +#' Turn a Prefix WhoIs server response into a data frame +#' +#' @param pwhois_raw_response the raw response from a call to [whois()] +#' @return data frame +#' @export +#' @examples +#' res <- whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois +#' tidy_pwhois(res) +tidy_pwhois <- function(pwhois_raw_response) { + + if (any(grepl("Org-Record", pwhois_raw_response))) { + + starts <- which(grepl("Org-Record", pwhois_raw_response)) + ends <- c(starts[-1]-1, length(pwhois_raw_response)) + + mapply( + FUN = function(start, end) process_pwhois_response(pwhois_raw_response[start:end]), + starts, ends, SIMPLIFY = FALSE + ) -> out + + bind_rows(out) + + } else { + process_pwhois_response(pwhois_raw_response) + } + +} + +#' Turn a Team Cymru server response into a data frame +#' +#' @param cymru_raw_response the raw response from a call to [whois()] +#' @return data frame +#' @export +#' @examples +#' res <- whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois +#' tidy_pwhois(res) +tidy_cymru <- function(cymru_raw_response) { + + tmp <- stri_split_fixed(cymru_raw_response, "|", simplify = TRUE) + tmp <- apply(tmp, c(1, 2), stri_trim_both) + + stri_replace_all_regex( + stri_trans_tolower(tmp[1,]), + "[ -]", " " + ) -> cols + + set_names( + as.data.frame(tmp[2:nrow(tmp),1:ncol(tmp), drop=FALSE]), + cols + ) + +} diff --git a/R/utils-mappers.R b/R/utils-mappers.R new file mode 100644 index 0000000..9cdb8db --- /dev/null +++ b/R/utils-mappers.R @@ -0,0 +1,320 @@ +# NOTE: At the bottom of this source file show the equivalents to purrr mappers +# +# NOTE these aren't 100% equivalent to the purrr mappers but cover very common use-cases +# +# NOTE formula function (e.g. ~{}) are 100% supported +# +# NOTE: THESE DO NOT SUPPORT list EXTRACTORS + +set_names <- function(object = nm, nm) { + names(object) <- nm + object +} + +map <- function(.x, .f, ..., .default) { + + default_exists <- !missing(.default) + + if (inherits(.f, "formula")) { + .body <- dimnames(attr(terms(.f), "factors"))[[1]] + .f <- function(.x, . = .x) {} + body(.f) <- as.expression(parse(text=.body)) + } + + nm <- names(.x) + + if (inherits(.f, "function")) { + + lapply(.x, function(x) { + res <- .f(x, ...) + if ((length(res) == 0) & default_exists) res <- .default + res + }) -> out + + } else if (is.numeric(.f) | is.character(.f)) { + + lapply(.x, function(x) { + res <- try(x[[.f]], silent = TRUE) + if (inherits(res, "try-error")) res <- NULL + if ((length(res) == 0) & default_exists) res <- .default + res + }) -> out + + } + + if (length(nm) > 0) out <- set_names(out, nm) + + out + +} + +map2 <- function(.x, .y, .f, ..., .default) { + + default_exists <- !missing(.default) + + if (inherits(.f, "formula")) { + .body <- dimnames(attr(terms(.f), "factors"))[[1]] + .f <- function(.x, .y, . = .x) {} + body(.f) <- as.expression(parse(text=.body)) + } + + if (inherits(.f, "function")) { + mapply( + function(x, ...) { + res <- .f(x, ...) + if ((length(res) == 0) & default_exists) res <- .default + res + }, + .x, .y, + ..., + SIMPLIFY=FALSE, USE.NAMES=FALSE + ) + } + +} + +map_chr <- function(.x, .f, ...) { + nm <- names(.x) + out <- as.character((map(.x, .f, ..., .default = .default))) + if (length(nm) > 0) set_names(out, nm) else out +} + +map2_chr <- function(.x, .y, .f, ...) { + as.character(unlist(map2(.x, .y, .f, ..., .default = .default))) +} + +map_lgl <- function(.x, .f, ...) { + nm <- names(.x) + out <- as.logical(unlist(map(.x, .f, ..., .default = .default))) + if (length(nm) > 0) set_names(out, nm) else out +} + +map2_lgl <- function(.x, .y, .f, ...) { + as.logical(unlist(map2(.x, .y, .f, ..., .default = .default))) +} + +map_dbl <- function(.x, .f, ...) { + nm <- names(.x) + out <- as.double(unlist(map(.x, .f, ..., .default = .default))) + if (length(nm) > 0) set_names(out, nm) else out +} + +map2_dbl <- function(.x, .y, .f, ...) { + as.double(unlist(map2(.x, .y, .f, ..., .default = .default))) +} + +map_int <- function(.x, .f, ..., .default) { + nm <- names(.x) + out <- as.integer(unlist(map(.x, .f, ..., .default = .default))) + if (length(nm) > 0) set_names(out, nm) else out +} + +map2_int <- function(.x, .y, .f, ...) { + as.integer(unlist(map2(.x, .y, .f, ..., .default = .default))) +} + + +map_df <- function(.x, .f, ..., .id=NULL) { + + res <- map(.x, .f, ...) + out <- bind_rows(res, .id=.id) + out + +} + +map_dfr <- map_df + +map_dfc <- function(.x, .f, ...) { + + res <- map(.x, .f, ...) + out <- bind_cols(res) + out + +} + +map2_df <- function(.x, .y, .f, ..., .id=NULL) { + + res <- map2(.x, .y, .f, ...) + out <- bind_rows(res, .id = .id) + out + +} + + +map2_dfc <- function(.x, .y, .f, ...) { + + res <- map2(.x, .y, .f, ...) + out <- bind_cols(res) + out + +} + +# this has limitations and is more like 75% of dplyr::bind_rows() +# this is also orders of magnitude slower than dplyr::bind_rows() +bind_rows <- function(..., .id = NULL) { + + res <- list(...) + + if (length(res) == 1) res <- res[[1]] + + cols <- unique(unlist(lapply(res, names), use.names = FALSE)) + + if (!is.null(.id)) { + inthere <- cols[.id %in% cols] + if (length(inthere) > 0) { + .id <- make.unique(c(inthere, .id))[2] + } + } + + id_vals <- if (is.null(names(res))) 1:length(res) else names(res) + + saf <- default.stringsAsFactors() + options(stringsAsFactors = FALSE) + on.exit(options(stringsAsFactors = saf)) + + idx <- 1 + do.call( + rbind.data.frame, + lapply(res, function(.x) { + x_names <- names(.x) + moar_names <- setdiff(cols, x_names) + if (length(moar_names) > 0) { + for (i in 1:length(moar_names)) { + .x[[moar_names[i]]] <- rep(NA, length(.x[[1]])) + } + } + if (!is.null(.id)) { + .x[[.id]] <- id_vals[idx] + idx <<- idx + 1 + } + .x + }) + ) -> out + + rownames(out) <- NULL + + class(out) <- c("tbl_df", "tbl", "data.frame") + + out + +} + +bind_cols <- function(...) { + + res <- list(...) + + row_mismatch <- lapply(res, nrow) != nrow(res[[1]]) + + if (any(row_mismatch)) { + first_mismatch_pos <- which(row_mismatch)[1] + stop(paste0("Argument ", first_mismatch_pos, + " must be length ", nrow(res[[1]]), + ", not ", nrow(res[[first_mismatch_pos]]))) + } + + if (length(res) == 1) res <- res[[1]] + + col_names <- unlist(lapply(res, names), use.names = FALSE) + col_names <- make.unique(col_names, sep = "") + + saf <- default.stringsAsFactors() + options(stringsAsFactors = FALSE) + on.exit(options(stringsAsFactors = saf)) + + out <- do.call(cbind.data.frame, res) + + names(out) <- col_names + rownames(out) <- NULL + + class(out) <- c("tbl_df", "tbl", "data.frame") + + out + +} + + +# set.seed(1) +# 1:10 %>% +# map(rnorm, n = 10) %>% +# map_dbl(mean) +# +# set.seed(1) +# 1:10 %>% +# purrr::map(rnorm, n = 10) %>% +# purrr::map_dbl(mean) +# +# +# # Or use an anonymous function +# set.seed(1) +# 1:10 %>% +# map(function(x) rnorm(10, x)) +# +# set.seed(1) +# 1:10 %>% +# purrr::map(function(x) rnorm(10, x)) +# +# # Or a formula +# set.seed(1) +# 1:10 %>% +# map(~ rnorm(10, .x)) +# +# set.seed(1) +# 1:10 %>% +# purrr::map(~ rnorm(10, .x)) +# +# # Extract by name or position +# # .default specifies value for elements that are missing or NULL +# l1 <- list(list(a = 1L), list(a = NULL, b = 2L), list(b = 3L)) +# l1 %>% map("a", .default = "???") +# l1 %>% purrr::map("a", .default = "???") +# +# l1 %>% map_int("b", .default = NA) +# l1 %>% purrr::map_int("b", .default = NA) +# +# l1 %>% map_int(2, .default = NA) +# l1 %>% purrr::map_int(2, .default = NA) +# +# # Supply multiple values to index deeply into a list +# l2 <- list( +# list(num = 1:3, letters[1:3]), +# list(num = 101:103, letters[4:6]), +# list() +# ) +# l2 %>% map(c(2, 2)) +# l2 %>% purrr::map(c(2, 2)) +# +# +# # A more realistic example: split a data frame into pieces, fit a +# # model to each piece, summarise and extract R^2 +# mtcars %>% +# split(.$cyl) %>% +# map(~ lm(mpg ~ wt, data = .x)) %>% +# map(summary) %>% +# map_dbl("r.squared") +# +# mtcars %>% +# split(.$cyl) %>% +# purrr::map(~ lm(mpg ~ wt, data = .x)) %>% +# purrr::map(summary) %>% +# purrr::map_dbl("r.squared") +# +# +# # Use map_lgl(), map_dbl(), etc to reduce to a vector. +# # * list +# mtcars %>% map(sum) +# mtcars %>% purrr::map(sum) +# # * vector +# mtcars %>% map_dbl(sum) +# mtcars %>% purrr::map_dbl(sum) +# +# # If each element of the output is a data frame, use +# # map_dfr to row-bind them together: +# mtcars %>% +# split(.$cyl) %>% +# map(~ lm(mpg ~ wt, data = .x)) %>% +# map_dfr(~ as.data.frame(t(as.matrix(coef(.))))) +# +# mtcars %>% +# split(.$cyl) %>% +# purrr::map(~ lm(mpg ~ wt, data = .x)) %>% +# purrr::map_dfr(~ as.data.frame(t(as.matrix(coef(.))))) diff --git a/R/utils-safely.R b/R/utils-safely.R new file mode 100644 index 0000000..8e7f90d --- /dev/null +++ b/R/utils-safely.R @@ -0,0 +1,90 @@ +# Less cool counterparts to purrr's side-effect capture-rs +# +# Most of the helper functions are 100% from output.R in purrr repo +# +# @param quiet Hide errors (`TRUE`, the default), or display them +# as they occur? +# @param otherwise Default value to use when an error occurs. +# +# @return `safely`: wrapped function instead returns a list with +# components `result` and `error`. One value is always `NULL`. +# +# `quietly`: wrapped function instead returns a list with components +# `result`, `output`, `messages` and `warnings`. +# +# `possibly`: wrapped function uses a default value (`otherwise`) +# whenever an error occurs. +safely <- function(.f, otherwise = NULL, quiet = TRUE) { + function(...) capture_error(.f(...), otherwise, quiet) +} + +quietly <- function(.f) { + function(...) capture_output(.f(...)) +} + +possibly <- function(.f, otherwise, quiet = TRUE) { + force(otherwise) + function(...) { + tryCatch(.f(...), + error = function(e) { + if (!quiet) + message("Error: ", e$message) + otherwise + }, + interrupt = function(e) { + stop("Terminated by user", call. = FALSE) + } + ) + } +} + +capture_error <- function(code, otherwise = NULL, quiet = TRUE) { + tryCatch( + list(result = code, error = NULL), + error = function(e) { + if (!quiet) + message("Error: ", e$message) + + list(result = otherwise, error = e) + }, + interrupt = function(e) { + stop("Terminated by user", call. = FALSE) + } + ) +} + +capture_output <- function(code) { + warnings <- character() + wHandler <- function(w) { + warnings <<- c(warnings, w$message) + invokeRestart("muffleWarning") + } + + messages <- character() + mHandler <- function(m) { + messages <<- c(messages, m$message) + invokeRestart("muffleMessage") + } + + temp <- file() + sink(temp) + on.exit({ + sink() + close(temp) + }) + + result <- withCallingHandlers( + code, + warning = wHandler, + message = mHandler + ) + + output <- paste0(readLines(temp, warn = FALSE), collapse = "\n") + + list( + result = result, + output = output, + warnings = warnings, + messages = messages + ) +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..86c579d --- /dev/null +++ b/R/utils.R @@ -0,0 +1,40 @@ +open_socket <- possibly(socketConnection, "") +close_socket <- safely(close) + +process_pwhois_response <- function(resp) { + + tmp <- stri_split_fixed(resp, ": ", simplify = TRUE) + cols <- stri_replace_all_fixed(stri_trans_tolower(tmp[,1]), "-", "_") + out <- as.data.frame(as.list(setNames(tmp[,2], cols))) + + if (hasName(out, "route_originated_ts")) { + out[["route_originated_ts"]] <- anytime::anytime(as.numeric(out[["route_originated_ts"]])) + } + + if (hasName(out, "cache_date")) { + out[["cache_date"]] <- anytime::anytime(as.numeric(out[["cache_date"]])) + } + + if (hasName(out, "latitude")) { + out[["latitude"]] <- as.numeric(out[["latitude"]]) + } + + if (hasName(out, "longitude")) { + out[["longitude"]] <- as.numeric(out[["latitude"]]) + } + + if (hasName(out, "modify_date")) { + out[["modify_date"]] <- as.POSIXct(out[["modify_date"]], format = "%b %d %Y %H:%M:%S") + } + + if (hasName(out, "create_date")) { + out[["create_date"]] <- as.POSIXct(out[["create_date"]], format = "%b %d %Y %H:%M:%S") + } + + if (hasName(out, "can_allocate")) { + out[["can_allocate"]] <- as.logical(as.integer(out[["can_allocate"]])) + } + + out + +} \ No newline at end of file diff --git a/R/whois.R b/R/whois.R new file mode 100644 index 0000000..5efd1d5 --- /dev/null +++ b/R/whois.R @@ -0,0 +1,85 @@ +#' Issue a 'WHOIS' query and retrieve the response +#' +#' WHOIS (pronounced as the phrase "who is") is a query and response protocol +#' that is widely used for querying databases that store the registered users +#' or assignees of an Internet resource, such as a domain name, an IP address +#' block or an autonomous system, but is also used for a wider range of other +#' information. The protocol stores and delivers database content in a human- +#' readable format. The current iteration of the WHOIS protocol was drafted +#' by the Internet Society, and is documented in RFC 3912. +#' +#' The default server — `whois.pwhois.org` — is [The Prefix WhoIs Project](https://pwhois.org/) +#' WHOIS server which provides a whois-compatible client and server framework for +#' disclosing various up-to-date routing information. Instead of using +#' registrar-originated network information (which is often unspecific or +#' inaccurate), Prefix WhoIs uses the Internet's global routing table as +#' gleaned from a number of routing peers around the world. Other sources of +#' information, such as imported data from ARIN are also supported. +#' +#' The pwhois service supports special query types including: +#' +#' - `registry key=value`: you can search the pwhois database for any registry +#' field. The ones available at the time of the date on +#' this package are: "`Origin-AS`", "`Prefix"`, "`AS-Path"`, +#' "`AS-Org-Name`", "`Org-Name`", "`Net-Name`", "`Cache-Date`", +#' "`Latitude`", "`Longitude`", "`City"`, "`Region"` , +#' "`Country"` , and "`Country-Code`" +#' - `type=cymru`: The record(s) will be returned in +#' [Team Cymru format](https://team-cymru.com/community-services/ip-asn-mapping/) +#' and [tidy_cymru()] can be used to post-process the response. +#' - `type=rpsl`: The record(s) will be returned in +#' [Routing Policy Specification Language](https://tools.ietf.org/html/rfc2650) +#' (RPSL) format. A 'tidy' post-processor is on the TODO list. PRs are welcome. +#' +#' This function can also be used to query the +#' [Team Cymru WHOIS](https://team-cymru.com/community-services/ip-asn-mapping/) +#' server which supports various enhanced queries. See the link for more +#' information. [tidy_cymru()] can be used to post-process these responses. +#' +#' @param query <chr> the text query to send to the destination whois `host` +#' @param host <chr> the WHOIS host to query; This defaults to +#' [The Prefix WhoIs Project](https://pwhois.org/) WHOIS server +#' (a.k.a. "pwhois") as it allows for more robust queries to be performed. +#' @param port <int> TCP port the `host` WHOIS server is running on; +#' defaults to `43` (the standard WHOIS port) +#' @param timeout connection timeout; see [connections] +#' @return raw character response from the `host` WHOIS server. Use +#' the built-in tidying functions to post-process the response. +#' @export +#' @examples +#' whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois +#' whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois +#' whois('type=rpsl 17.253.144.10') # 'type=rpsl' is only supported on pwhois +#' whois('-v AS23028', "whois.cymru.com") # this only works with the Team Cymru WHOIS service +#' whois("17.253.144.10") +whois <- function(query, host = "whois.pwhois.org", port = 43L, + timeout = getOption("timeout")) { + + query <- as.character(query[1]) + host <- as.character(host[1]) + port <- as.integer(port[1]) + + if (!endsWith(query, "\n")) query <- sprintf("%s\n", query) + + open_socket( + host = host, + port = port, + blocking = TRUE, + open = "r+", + timeout = timeout + ) -> con + + on.exit(close_socket(con)) + + if (!inherits(con, "sockconn")) { + warning(sprintf("Error opening connection to %s:%s", query, port)) + return(NA_character_) + } + + cat(query, file = con) + + res <- readLines(con) + + res + +} diff --git a/README.Rmd b/README.Rmd index fa571bd..4bb4776 100644 --- a/README.Rmd +++ b/README.Rmd @@ -39,6 +39,45 @@ packageVersion("pwhois") ``` +A basic query: + +```{ex-01} +(res <- whois("17.253.144.10")) + +str(tidy_pwhois(res), 1) +``` + +The Prefix WhoIs project supports some advanced queries including the ability to search on any of their WHOIS registry fields: + +```{r ex-02} +(res <- whois("registry org-name=apple, inc")) + +tibble::as_tibble(tidy_pwhois(res)) +``` + +and can return results in different formats including Team Cymru tables: + +```{r ex-03} +(res <- whois('type=cymru 17.253.144.10')) + +str(tidy_cymru(res), 1) +``` + +That `tidy_cymru()` function can be used on responses from the Team Cymru +WHOIS service as well: + +```{r ex-03a} +(res <- whois('-v AS23028', "whois.cymru.com")) + +str(tidy_cymru(res)) +``` + +The Prefix WhoIs project can also return responses in Routing Policy Specification Language (RPSL), but there is no 'tidy' function for this format yet: + +```{r ex-04} +whois('type=rpsl 17.253.144.10') +``` + ## pwhois Metrics ```{r cloc, echo=FALSE} diff --git a/README.md b/README.md new file mode 100644 index 0000000..3f5b63a --- /dev/null +++ b/README.md @@ -0,0 +1,221 @@ + +[![Project Status: Active – The project has reached a stable, usable +state and is being actively +developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) +[![Signed +by](https://img.shields.io/badge/Keybase-Verified-brightgreen.svg)](https://keybase.io/hrbrmstr) +![Signed commit +%](https://img.shields.io/badge/Signed_Commits-100%25-lightgrey.svg) +[![Linux build +Status](https://travis-ci.org/hrbrmstr/pwhois.svg?branch=master)](https://travis-ci.org/hrbrmstr/pwhois) +![Minimal R +Version](https://img.shields.io/badge/R%3E%3D-3.5.0-blue.svg) +![License](https://img.shields.io/badge/License-MIT-blue.svg) + +# pwhois + +Issue ‘WHOIS’ Queries and Process Various ‘WHOIS’ Resoponses + +## Description + +WHOIS (pronounced as the phrase “who is”) is a query and response +protocol that is widely used for querying databases that store the +registered users or assignees of an Internet resource, such as a domain +name, an IP address block or an autonomous system, but is also used for +a wider range of other information. The protocol stores and delivers +database content in a human- readable format. The current iteration of +the WHOIS protocol was drafted by the Internet Society, and is +documented in RFC 3912. Tools are provided to issue query requests and +process query responses. + +## What’s Inside The Tin + +The following functions are implemented: + +- `tidy_cymru`: Turn a Team Cymru server response into a data frame +- `tidy_pwhois`: Turn a Prefix WhoIs server response into a data frame +- `whois`: Issue a ‘WHOIS’ query and retrieve the response + +## Installation + +``` r +remotes::install_git("https://git.rud.is/hrbrmstr/pwhois.git") +# or +remotes::install_gitlab("hrbrmstr/pwhois") +# or +remotes::install_bitbucket("hrbrmstr/pwhois") +``` + +NOTE: To use the ‘remotes’ install options you will need to have the +[{remotes} package](https://github.com/r-lib/remotes) installed. + +## Usage + +``` r +library(pwhois) + +# current version +packageVersion("pwhois") +## [1] '0.1.0' +``` + +A basic query: + +``` {ex-01} +(res <- whois("17.253.144.10")) + +str(tidy_pwhois(res), 1) +``` + +The Prefix WhoIs project supports some advanced queries including the +ability to search on any of their WHOIS registry fields: + +``` r +(res <- whois("registry org-name=apple, inc")) +## [1] "Org-Record: 0" "Org-ID: GRNA" +## [3] "Org-Name: Green Apple, Inc." "Can-Allocate: 0" +## [5] "Street-1: 5222 33rd Street SE" "City: Grand Rapids" +## [7] "State: MI" "Postal-Code: 49512-2070" +## [9] "Country: US" "Register-Date: 1997-12-19" +## [11] "Update-Date: 2011-09-24" "Create-Date: Nov 23 2005 02:48:10" +## [13] "Modify-Date: Feb 13 2021 01:35:47" "Admin-0-Handle: INE-ARIN" +## [15] "NOC-0-Handle: INE-ARIN" "Abuse-0-Handle: INE-ARIN" +## [17] "Tech-0-Handle: INE-ARIN" "Referral-Server: rwhois://rwhois.iserv.net:4321" +## [19] "Comment: http://www.greenapple.com" "Org-Record: 1" +## [21] "Org-ID: Network of Apple, Inc." "Org-Name: Network of Apple, Inc." +## [23] "Can-Allocate: 1" "Register-Date: 2011-03-02" +## [25] "Update-Date: 2011-03-02" "Create-Date: Mar 01 2011 13:07:13" +## [27] "Modify-Date: Mar 01 2011 13:07:13" "Org-Record: 2" +## [29] "Org-ID: C03342993" "Org-Name: APPLE, INC - CORP PROJECT" +## [31] "Can-Allocate: 0" "Street-1: 1 INFINITE LOOP # MS60" +## [33] "City: CUPERTINO" "State: CA" +## [35] "Postal-Code: 95014-2083" "Country: US" +## [37] "Register-Date: 2013-03-14" "Update-Date: 2013-03-14" +## [39] "Create-Date: May 30 2013 16:03:51" "Modify-Date: Feb 13 2021 01:35:47" +## [41] "Org-Record: 3" "Org-ID: C03342938" +## [43] "Org-Name: APPLE, INC - CORP PROJECT" "Can-Allocate: 0" +## [45] "Street-1: 1 INFINITE LOOP # MS60" "City: CUPERTINO" +## [47] "State: CA" "Postal-Code: 95014-2083" +## [49] "Country: US" "Register-Date: 2013-03-14" +## [51] "Update-Date: 2013-03-14" "Create-Date: May 30 2013 16:03:51" +## [53] "Modify-Date: Feb 13 2021 01:35:47" "Org-Record: 4" +## [55] "Org-ID: C03342954" "Org-Name: APPLE, INC - CORP PROJECT" +## [57] "Can-Allocate: 0" "Street-1: 1 INFINITE LOOP # MS60" +## [59] "City: CUPERTINO" "State: CA" +## [61] "Postal-Code: 95014-2083" "Country: US" +## [63] "Register-Date: 2013-03-14" "Update-Date: 2013-03-14" +## [65] "Create-Date: May 30 2013 16:03:51" "Modify-Date: Feb 13 2021 01:35:47" +## [67] "Org-Record: 5" "Org-ID: Network of Apple, Inc." +## [69] "Org-Name: Network of Apple, Inc." "Can-Allocate: 1" +## [71] "Register-Date: 2013-05-31" "Update-Date: 2013-05-31" +## [73] "Create-Date: May 30 2013 18:55:56" "Modify-Date: May 30 2013 18:55:56" +## [75] "Org-Record: 6" "Org-ID: C04676970" +## [77] "Org-Name: APPLE, INC - CORP PROJECT" "Can-Allocate: 0" +## [79] "Street-1: 1 INFINITE LOOP # MS60" "City: CUPERTINO" +## [81] "State: CA" "Postal-Code: 95014-2083" +## [83] "Country: US" "Register-Date: 2013-08-21" +## [85] "Update-Date: 2013-08-21" "Create-Date: Aug 23 2013 02:32:19" +## [87] "Modify-Date: Feb 13 2021 01:35:47" "Org-Record: 7" +## [89] "Org-ID: C04716251" "Org-Name: APPLE, INC - RETAIL PROJECT" +## [91] "Can-Allocate: 0" "Street-1: 1500 POLARIS PKWY" +## [93] "City: COLUMBUS" "State: OH" +## [95] "Postal-Code: 43240" "Country: US" +## [97] "Register-Date: 2013-09-30" "Update-Date: 2013-09-30" +## [99] "Create-Date: Oct 02 2013 01:52:33" "Modify-Date: Mar 23 2015 08:01:17" +## [101] "Org-Record: 8" "Org-ID: Apple, Inc." +## [103] "Org-Name: Apple, Inc." "Can-Allocate: 1" +## [105] "Register-Date: 2014-10-19" "Update-Date: 2014-10-19" +## [107] "Create-Date: Oct 18 2014 05:49:09" "Modify-Date: Oct 18 2014 05:49:09" +## [109] "Org-Record: 9" "Org-ID: C06092907" +## [111] "Org-Name: Apple, Inc IP - MRC" "Can-Allocate: 0" +## [113] "Street-1: 21625 Gresham Dr" "City: Ashburn" +## [115] "State: VA" "Postal-Code: 20147" +## [117] "Country: US" "Register-Date: 2016-04-14" +## [119] "Update-Date: 2016-06-21" "Create-Date: Dec 13 2016 10:45:20" +## [121] "Modify-Date: Feb 13 2021 01:35:47" "Org-Record: 10" +## [123] "Org-ID: C07098191" "Org-Name: APPLE, INC.-COLOCATION - APPLE, INC." +## [125] "Can-Allocate: 0" "Street-1: 340 CUMBERLAND AVE" +## [127] "City: PORTLAND" "State: ME" +## [129] "Postal-Code: 04101" "Country: US" +## [131] "Register-Date: 2018-10-24" "Update-Date: 2018-10-24" +## [133] "Create-Date: Oct 26 2018 02:13:03" "Modify-Date: Feb 13 2021 01:35:47" + +tibble::as_tibble(tidy_pwhois(res)) +## # A tibble: 11 x 19 +## org_record org_id org_name can_allocate street_1 city state postal_code country register_date update_date +## +## 1 0 GRNA Green A… FALSE 5222 33… Gran… MI 49512-2070 US 1997-12-19 2011-09-24 +## 2 1 Netwo… Network… TRUE 2011-03-02 2011-03-02 +## 3 2 C0334… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-03-14 2013-03-14 +## 4 3 C0334… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-03-14 2013-03-14 +## 5 4 C0334… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-03-14 2013-03-14 +## 6 5 Netwo… Network… TRUE 2013-05-31 2013-05-31 +## 7 6 C0467… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-08-21 2013-08-21 +## 8 7 C0471… APPLE, … FALSE 1500 PO… COLU… OH 43240 US 2013-09-30 2013-09-30 +## 9 8 Apple… Apple, … TRUE 2014-10-19 2014-10-19 +## 10 9 C0609… Apple, … FALSE 21625 G… Ashb… VA 20147 US 2016-04-14 2016-06-21 +## 11 10 C0709… APPLE, … FALSE 340 CUM… PORT… ME 04101 US 2018-10-24 2018-10-24 +## # … with 8 more variables: create_date , modify_date , admin_0_handle , noc_0_handle , +## # abuse_0_handle , tech_0_handle , referral_server , comment +``` + +and can return results in different formats including Team Cymru tables: + +``` r +(res <- whois('type=cymru 17.253.144.10')) +## [1] "AS | IP | ORG NAME | CC | NET NAME | AS ORG NAME" +## [2] "714 | 17.253.144.10 | Apple Inc. | US | APPLE-WWNET | Apple Inc." + +str(tidy_cymru(res), 1) +## 'data.frame': 1 obs. of 6 variables: +## $ as : chr "714" +## $ ip : chr "17.253.144.10" +## $ org name : chr "Apple Inc." +## $ cc : chr "US" +## $ net name : chr "APPLE-WWNET" +## $ as org name: chr "Apple Inc." +``` + +That `tidy_cymru()` function can be used on responses from the Team +Cymru WHOIS service as well: + +``` r +(res <- whois('-v AS23028', "whois.cymru.com")) +## [1] "AS | CC | Registry | Allocated | AS Name" "23028 | US | arin | 2002-01-04 | TEAM-CYMRU, US" + +str(tidy_cymru(res)) +## 'data.frame': 1 obs. of 5 variables: +## $ as : chr "23028" +## $ cc : chr "US" +## $ registry : chr "arin" +## $ allocated: chr "2002-01-04" +## $ as name : chr "TEAM-CYMRU, US" +``` + +The Prefix WhoIs project can also return responses in Routing Policy +Specification Language (RPSL), but there is no ‘tidy’ function for this +format yet: + +``` r +whois('type=rpsl 17.253.144.10') +## [1] "Origin: AS714" "Route: 17.253.144.0/21" +## [3] "Date: 20210213" "AS-Org-Name: Apple Inc." +## [5] "Org-Name: Apple Inc." "Net-Name: APPLE-WWNET" +## [7] "Source: PWHOIS Server 208.74.248.120:43 at 20210213" +``` + +## pwhois Metrics + +| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) | +|:-----|---------:|-----:|----:|-----:|------------:|----:|---------:|-----:| +| R | 7 | 0.44 | 303 | 0.47 | 110 | 0.4 | 196 | 0.41 | +| Rmd | 1 | 0.06 | 17 | 0.03 | 29 | 0.1 | 44 | 0.09 | +| SUM | 8 | 0.50 | 320 | 0.50 | 139 | 0.5 | 240 | 0.50 | + +clock Package Metrics for pwhois + +## Code of Conduct + +Please note that this project is released with a Contributor Code of +Conduct. By participating in this project you agree to abide by its +terms. diff --git a/man/pwhois.Rd b/man/pwhois.Rd index 3236199..f8b08c1 100644 --- a/man/pwhois.Rd +++ b/man/pwhois.Rd @@ -4,9 +4,16 @@ \name{pwhois} \alias{pwhois} \alias{pwhois-package} -\title{...} +\title{pwhois} \description{ -A good description goes here otherwise CRAN checks fail. +WHOIS (pronounced as the phrase "who is") is a query and response protocol +that is widely used for querying databases that store the registered users +or assignees of an Internet resource, such as a domain name, an IP address +block or an autonomous system, but is also used for a wider range of other +information. The protocol stores and delivers database content in a human- +readable format. The current iteration of the WHOIS protocol was drafted +by the Internet Society, and is documented in RFC 3912. Tools are provided +to issue query requests and process query responses. } \seealso{ Useful links: diff --git a/man/tidy_cymru.Rd b/man/tidy_cymru.Rd new file mode 100644 index 0000000..2f89f2c --- /dev/null +++ b/man/tidy_cymru.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidy.R +\name{tidy_cymru} +\alias{tidy_cymru} +\title{Turn a Team Cymru server response into a data frame} +\usage{ +tidy_cymru(cymru_raw_response) +} +\arguments{ +\item{cymru_raw_response}{the raw response from a call to \code{\link[=whois]{whois()}}} +} +\value{ +data frame +} +\description{ +Turn a Team Cymru server response into a data frame +} +\examples{ +res <- whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois +tidy_pwhois(res) +} diff --git a/man/tidy_pwhois.Rd b/man/tidy_pwhois.Rd new file mode 100644 index 0000000..935dfe6 --- /dev/null +++ b/man/tidy_pwhois.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidy.R +\name{tidy_pwhois} +\alias{tidy_pwhois} +\title{Turn a Prefix WhoIs server response into a data frame} +\usage{ +tidy_pwhois(pwhois_raw_response) +} +\arguments{ +\item{pwhois_raw_response}{the raw response from a call to \code{\link[=whois]{whois()}}} +} +\value{ +data frame +} +\description{ +Turn a Prefix WhoIs server response into a data frame +} +\examples{ +res <- whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois +tidy_pwhois(res) +} diff --git a/man/whois.Rd b/man/whois.Rd new file mode 100644 index 0000000..77da101 --- /dev/null +++ b/man/whois.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/whois.R +\name{whois} +\alias{whois} +\title{Issue a 'WHOIS' query and retrieve the response} +\usage{ +whois( + query, + host = "whois.pwhois.org", + port = 43L, + timeout = getOption("timeout") +) +} +\arguments{ +\item{query}{ the text query to send to the destination whois \code{host}} + +\item{host}{ the WHOIS host to query; This defaults to +\href{https://pwhois.org/}{The Prefix WhoIs Project} WHOIS server +(a.k.a. "pwhois") as it allows for more robust queries to be performed.} + +\item{port}{ TCP port the \code{host} WHOIS server is running on; +defaults to \code{43} (the standard WHOIS port)} + +\item{timeout}{connection timeout; see \link{connections}} +} +\value{ +raw character response from the \code{host} WHOIS server. Use +the built-in tidying functions to post-process the response. +} +\description{ +WHOIS (pronounced as the phrase "who is") is a query and response protocol +that is widely used for querying databases that store the registered users +or assignees of an Internet resource, such as a domain name, an IP address +block or an autonomous system, but is also used for a wider range of other +information. The protocol stores and delivers database content in a human- +readable format. The current iteration of the WHOIS protocol was drafted +by the Internet Society, and is documented in RFC 3912. +} +\details{ +The default server — \code{whois.pwhois.org} — is \href{https://pwhois.org/}{The Prefix WhoIs Project} +WHOIS server which provides a whois-compatible client and server framework for +disclosing various up-to-date routing information. Instead of using +registrar-originated network information (which is often unspecific or +inaccurate), Prefix WhoIs uses the Internet's global routing table as +gleaned from a number of routing peers around the world. Other sources of +information, such as imported data from ARIN are also supported. + +The pwhois service supports special query types including: +\itemize{ +\item \verb{registry key=value}: you can search the pwhois database for any registry +field. The ones available at the time of the date on +this package are: "\code{Origin-AS}", "\verb{Prefix"}, "\verb{AS-Path"}, +"\code{AS-Org-Name}", "\code{Org-Name}", "\code{Net-Name}", "\code{Cache-Date}", +"\code{Latitude}", "\code{Longitude}", "\verb{City"}, "\verb{Region"} , +"\verb{Country"} , and "\code{Country-Code}" +\item \code{type=cymru}: The record(s) will be returned in +\href{https://team-cymru.com/community-services/ip-asn-mapping/}{Team Cymru format} +and \code{\link[=tidy_cymru]{tidy_cymru()}} can be used to post-process the response. +\item \code{type=rpsl}: The record(s) will be returned in +\href{https://tools.ietf.org/html/rfc2650}{Routing Policy Specification Language} +(RPSL) format. A 'tidy' post-processor is on the TODO list. PRs are welcome. +} + +This function can also be used to query the +\href{https://team-cymru.com/community-services/ip-asn-mapping/}{Team Cymru WHOIS} +server which supports various enhanced queries. See the link for more +information. \code{\link[=tidy_cymru]{tidy_cymru()}} can be used to post-process these responses. +} +\examples{ +whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois +whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois +whois('type=rpsl 17.253.144.10') # 'type=rpsl' is only supported on pwhois +whois('-v AS23028', "whois.cymru.com") # this only works with the Team Cymru WHOIS service +whois("17.253.144.10") +}