boB Rudis
3 years ago
17 changed files with 1024 additions and 12 deletions
@ -0,0 +1,2 @@ |
|||
YEAR: 2021 |
|||
COPYRIGHT HOLDER: Bob Rudis |
@ -0,0 +1,21 @@ |
|||
# MIT License |
|||
|
|||
Copyright (c) 2021 Bob Rudis |
|||
|
|||
Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
of this software and associated documentation files (the "Software"), to deal |
|||
in the Software without restriction, including without limitation the rights |
|||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
copies of the Software, and to permit persons to whom the Software is |
|||
furnished to do so, subject to the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be included in all |
|||
copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
SOFTWARE. |
@ -1,4 +1,6 @@ |
|||
# Generated by roxygen2: do not edit by hand |
|||
|
|||
import(httr) |
|||
importFrom(jsonlite,fromJSON) |
|||
export(tidy_cymru) |
|||
export(tidy_pwhois) |
|||
export(whois) |
|||
import(stringi) |
|||
|
@ -1,9 +1,17 @@ |
|||
#' ... |
|||
#' |
|||
#' pwhois |
|||
#' |
|||
#' WHOIS (pronounced as the phrase "who is") is a query and response protocol |
|||
#' that is widely used for querying databases that store the registered users |
|||
#' or assignees of an Internet resource, such as a domain name, an IP address |
|||
#' block or an autonomous system, but is also used for a wider range of other |
|||
#' information. The protocol stores and delivers database content in a human- |
|||
#' readable format. The current iteration of the WHOIS protocol was drafted |
|||
#' by the Internet Society, and is documented in RFC 3912. Tools are provided |
|||
#' to issue query requests and process query responses. |
|||
#' |
|||
#' @md |
|||
#' @name pwhois |
|||
#' @keywords internal |
|||
#' @author Bob Rudis (bob@@rud.is) |
|||
#' @import httr |
|||
#' @importFrom jsonlite fromJSON |
|||
#' @import stringi |
|||
"_PACKAGE" |
|||
|
@ -0,0 +1,52 @@ |
|||
#' Turn a Prefix WhoIs server response into a data frame |
|||
#' |
|||
#' @param pwhois_raw_response the raw response from a call to [whois()] |
|||
#' @return data frame |
|||
#' @export |
|||
#' @examples |
|||
#' res <- whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois |
|||
#' tidy_pwhois(res) |
|||
tidy_pwhois <- function(pwhois_raw_response) { |
|||
|
|||
if (any(grepl("Org-Record", pwhois_raw_response))) { |
|||
|
|||
starts <- which(grepl("Org-Record", pwhois_raw_response)) |
|||
ends <- c(starts[-1]-1, length(pwhois_raw_response)) |
|||
|
|||
mapply( |
|||
FUN = function(start, end) process_pwhois_response(pwhois_raw_response[start:end]), |
|||
starts, ends, SIMPLIFY = FALSE |
|||
) -> out |
|||
|
|||
bind_rows(out) |
|||
|
|||
} else { |
|||
process_pwhois_response(pwhois_raw_response) |
|||
} |
|||
|
|||
} |
|||
|
|||
#' Turn a Team Cymru server response into a data frame |
|||
#' |
|||
#' @param cymru_raw_response the raw response from a call to [whois()] |
|||
#' @return data frame |
|||
#' @export |
|||
#' @examples |
|||
#' res <- whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois |
|||
#' tidy_pwhois(res) |
|||
tidy_cymru <- function(cymru_raw_response) { |
|||
|
|||
tmp <- stri_split_fixed(cymru_raw_response, "|", simplify = TRUE) |
|||
tmp <- apply(tmp, c(1, 2), stri_trim_both) |
|||
|
|||
stri_replace_all_regex( |
|||
stri_trans_tolower(tmp[1,]), |
|||
"[ -]", " " |
|||
) -> cols |
|||
|
|||
set_names( |
|||
as.data.frame(tmp[2:nrow(tmp),1:ncol(tmp), drop=FALSE]), |
|||
cols |
|||
) |
|||
|
|||
} |
@ -0,0 +1,320 @@ |
|||
# NOTE: At the bottom of this source file show the equivalents to purrr mappers |
|||
# |
|||
# NOTE these aren't 100% equivalent to the purrr mappers but cover very common use-cases |
|||
# |
|||
# NOTE formula function (e.g. ~{}) are 100% supported |
|||
# |
|||
# NOTE: THESE DO NOT SUPPORT list EXTRACTORS |
|||
|
|||
set_names <- function(object = nm, nm) { |
|||
names(object) <- nm |
|||
object |
|||
} |
|||
|
|||
map <- function(.x, .f, ..., .default) { |
|||
|
|||
default_exists <- !missing(.default) |
|||
|
|||
if (inherits(.f, "formula")) { |
|||
.body <- dimnames(attr(terms(.f), "factors"))[[1]] |
|||
.f <- function(.x, . = .x) {} |
|||
body(.f) <- as.expression(parse(text=.body)) |
|||
} |
|||
|
|||
nm <- names(.x) |
|||
|
|||
if (inherits(.f, "function")) { |
|||
|
|||
lapply(.x, function(x) { |
|||
res <- .f(x, ...) |
|||
if ((length(res) == 0) & default_exists) res <- .default |
|||
res |
|||
}) -> out |
|||
|
|||
} else if (is.numeric(.f) | is.character(.f)) { |
|||
|
|||
lapply(.x, function(x) { |
|||
res <- try(x[[.f]], silent = TRUE) |
|||
if (inherits(res, "try-error")) res <- NULL |
|||
if ((length(res) == 0) & default_exists) res <- .default |
|||
res |
|||
}) -> out |
|||
|
|||
} |
|||
|
|||
if (length(nm) > 0) out <- set_names(out, nm) |
|||
|
|||
out |
|||
|
|||
} |
|||
|
|||
map2 <- function(.x, .y, .f, ..., .default) { |
|||
|
|||
default_exists <- !missing(.default) |
|||
|
|||
if (inherits(.f, "formula")) { |
|||
.body <- dimnames(attr(terms(.f), "factors"))[[1]] |
|||
.f <- function(.x, .y, . = .x) {} |
|||
body(.f) <- as.expression(parse(text=.body)) |
|||
} |
|||
|
|||
if (inherits(.f, "function")) { |
|||
mapply( |
|||
function(x, ...) { |
|||
res <- .f(x, ...) |
|||
if ((length(res) == 0) & default_exists) res <- .default |
|||
res |
|||
}, |
|||
.x, .y, |
|||
..., |
|||
SIMPLIFY=FALSE, USE.NAMES=FALSE |
|||
) |
|||
} |
|||
|
|||
} |
|||
|
|||
map_chr <- function(.x, .f, ...) { |
|||
nm <- names(.x) |
|||
out <- as.character((map(.x, .f, ..., .default = .default))) |
|||
if (length(nm) > 0) set_names(out, nm) else out |
|||
} |
|||
|
|||
map2_chr <- function(.x, .y, .f, ...) { |
|||
as.character(unlist(map2(.x, .y, .f, ..., .default = .default))) |
|||
} |
|||
|
|||
map_lgl <- function(.x, .f, ...) { |
|||
nm <- names(.x) |
|||
out <- as.logical(unlist(map(.x, .f, ..., .default = .default))) |
|||
if (length(nm) > 0) set_names(out, nm) else out |
|||
} |
|||
|
|||
map2_lgl <- function(.x, .y, .f, ...) { |
|||
as.logical(unlist(map2(.x, .y, .f, ..., .default = .default))) |
|||
} |
|||
|
|||
map_dbl <- function(.x, .f, ...) { |
|||
nm <- names(.x) |
|||
out <- as.double(unlist(map(.x, .f, ..., .default = .default))) |
|||
if (length(nm) > 0) set_names(out, nm) else out |
|||
} |
|||
|
|||
map2_dbl <- function(.x, .y, .f, ...) { |
|||
as.double(unlist(map2(.x, .y, .f, ..., .default = .default))) |
|||
} |
|||
|
|||
map_int <- function(.x, .f, ..., .default) { |
|||
nm <- names(.x) |
|||
out <- as.integer(unlist(map(.x, .f, ..., .default = .default))) |
|||
if (length(nm) > 0) set_names(out, nm) else out |
|||
} |
|||
|
|||
map2_int <- function(.x, .y, .f, ...) { |
|||
as.integer(unlist(map2(.x, .y, .f, ..., .default = .default))) |
|||
} |
|||
|
|||
|
|||
map_df <- function(.x, .f, ..., .id=NULL) { |
|||
|
|||
res <- map(.x, .f, ...) |
|||
out <- bind_rows(res, .id=.id) |
|||
out |
|||
|
|||
} |
|||
|
|||
map_dfr <- map_df |
|||
|
|||
map_dfc <- function(.x, .f, ...) { |
|||
|
|||
res <- map(.x, .f, ...) |
|||
out <- bind_cols(res) |
|||
out |
|||
|
|||
} |
|||
|
|||
map2_df <- function(.x, .y, .f, ..., .id=NULL) { |
|||
|
|||
res <- map2(.x, .y, .f, ...) |
|||
out <- bind_rows(res, .id = .id) |
|||
out |
|||
|
|||
} |
|||
|
|||
|
|||
map2_dfc <- function(.x, .y, .f, ...) { |
|||
|
|||
res <- map2(.x, .y, .f, ...) |
|||
out <- bind_cols(res) |
|||
out |
|||
|
|||
} |
|||
|
|||
# this has limitations and is more like 75% of dplyr::bind_rows() |
|||
# this is also orders of magnitude slower than dplyr::bind_rows() |
|||
bind_rows <- function(..., .id = NULL) { |
|||
|
|||
res <- list(...) |
|||
|
|||
if (length(res) == 1) res <- res[[1]] |
|||
|
|||
cols <- unique(unlist(lapply(res, names), use.names = FALSE)) |
|||
|
|||
if (!is.null(.id)) { |
|||
inthere <- cols[.id %in% cols] |
|||
if (length(inthere) > 0) { |
|||
.id <- make.unique(c(inthere, .id))[2] |
|||
} |
|||
} |
|||
|
|||
id_vals <- if (is.null(names(res))) 1:length(res) else names(res) |
|||
|
|||
saf <- default.stringsAsFactors() |
|||
options(stringsAsFactors = FALSE) |
|||
on.exit(options(stringsAsFactors = saf)) |
|||
|
|||
idx <- 1 |
|||
do.call( |
|||
rbind.data.frame, |
|||
lapply(res, function(.x) { |
|||
x_names <- names(.x) |
|||
moar_names <- setdiff(cols, x_names) |
|||
if (length(moar_names) > 0) { |
|||
for (i in 1:length(moar_names)) { |
|||
.x[[moar_names[i]]] <- rep(NA, length(.x[[1]])) |
|||
} |
|||
} |
|||
if (!is.null(.id)) { |
|||
.x[[.id]] <- id_vals[idx] |
|||
idx <<- idx + 1 |
|||
} |
|||
.x |
|||
}) |
|||
) -> out |
|||
|
|||
rownames(out) <- NULL |
|||
|
|||
class(out) <- c("tbl_df", "tbl", "data.frame") |
|||
|
|||
out |
|||
|
|||
} |
|||
|
|||
bind_cols <- function(...) { |
|||
|
|||
res <- list(...) |
|||
|
|||
row_mismatch <- lapply(res, nrow) != nrow(res[[1]]) |
|||
|
|||
if (any(row_mismatch)) { |
|||
first_mismatch_pos <- which(row_mismatch)[1] |
|||
stop(paste0("Argument ", first_mismatch_pos, |
|||
" must be length ", nrow(res[[1]]), |
|||
", not ", nrow(res[[first_mismatch_pos]]))) |
|||
} |
|||
|
|||
if (length(res) == 1) res <- res[[1]] |
|||
|
|||
col_names <- unlist(lapply(res, names), use.names = FALSE) |
|||
col_names <- make.unique(col_names, sep = "") |
|||
|
|||
saf <- default.stringsAsFactors() |
|||
options(stringsAsFactors = FALSE) |
|||
on.exit(options(stringsAsFactors = saf)) |
|||
|
|||
out <- do.call(cbind.data.frame, res) |
|||
|
|||
names(out) <- col_names |
|||
rownames(out) <- NULL |
|||
|
|||
class(out) <- c("tbl_df", "tbl", "data.frame") |
|||
|
|||
out |
|||
|
|||
} |
|||
|
|||
|
|||
# set.seed(1) |
|||
# 1:10 %>% |
|||
# map(rnorm, n = 10) %>% |
|||
# map_dbl(mean) |
|||
# |
|||
# set.seed(1) |
|||
# 1:10 %>% |
|||
# purrr::map(rnorm, n = 10) %>% |
|||
# purrr::map_dbl(mean) |
|||
# |
|||
# |
|||
# # Or use an anonymous function |
|||
# set.seed(1) |
|||
# 1:10 %>% |
|||
# map(function(x) rnorm(10, x)) |
|||
# |
|||
# set.seed(1) |
|||
# 1:10 %>% |
|||
# purrr::map(function(x) rnorm(10, x)) |
|||
# |
|||
# # Or a formula |
|||
# set.seed(1) |
|||
# 1:10 %>% |
|||
# map(~ rnorm(10, .x)) |
|||
# |
|||
# set.seed(1) |
|||
# 1:10 %>% |
|||
# purrr::map(~ rnorm(10, .x)) |
|||
# |
|||
# # Extract by name or position |
|||
# # .default specifies value for elements that are missing or NULL |
|||
# l1 <- list(list(a = 1L), list(a = NULL, b = 2L), list(b = 3L)) |
|||
# l1 %>% map("a", .default = "???") |
|||
# l1 %>% purrr::map("a", .default = "???") |
|||
# |
|||
# l1 %>% map_int("b", .default = NA) |
|||
# l1 %>% purrr::map_int("b", .default = NA) |
|||
# |
|||
# l1 %>% map_int(2, .default = NA) |
|||
# l1 %>% purrr::map_int(2, .default = NA) |
|||
# |
|||
# # Supply multiple values to index deeply into a list |
|||
# l2 <- list( |
|||
# list(num = 1:3, letters[1:3]), |
|||
# list(num = 101:103, letters[4:6]), |
|||
# list() |
|||
# ) |
|||
# l2 %>% map(c(2, 2)) |
|||
# l2 %>% purrr::map(c(2, 2)) |
|||
# |
|||
# |
|||
# # A more realistic example: split a data frame into pieces, fit a |
|||
# # model to each piece, summarise and extract R^2 |
|||
# mtcars %>% |
|||
# split(.$cyl) %>% |
|||
# map(~ lm(mpg ~ wt, data = .x)) %>% |
|||
# map(summary) %>% |
|||
# map_dbl("r.squared") |
|||
# |
|||
# mtcars %>% |
|||
# split(.$cyl) %>% |
|||
# purrr::map(~ lm(mpg ~ wt, data = .x)) %>% |
|||
# purrr::map(summary) %>% |
|||
# purrr::map_dbl("r.squared") |
|||
# |
|||
# |
|||
# # Use map_lgl(), map_dbl(), etc to reduce to a vector. |
|||
# # * list |
|||
# mtcars %>% map(sum) |
|||
# mtcars %>% purrr::map(sum) |
|||
# # * vector |
|||
# mtcars %>% map_dbl(sum) |
|||
# mtcars %>% purrr::map_dbl(sum) |
|||
# |
|||
# # If each element of the output is a data frame, use |
|||
# # map_dfr to row-bind them together: |
|||
# mtcars %>% |
|||
# split(.$cyl) %>% |
|||
# map(~ lm(mpg ~ wt, data = .x)) %>% |
|||
# map_dfr(~ as.data.frame(t(as.matrix(coef(.))))) |
|||
# |
|||
# mtcars %>% |
|||
# split(.$cyl) %>% |
|||
# purrr::map(~ lm(mpg ~ wt, data = .x)) %>% |
|||
# purrr::map_dfr(~ as.data.frame(t(as.matrix(coef(.))))) |
@ -0,0 +1,90 @@ |
|||
# Less cool counterparts to purrr's side-effect capture-rs |
|||
# |
|||
# Most of the helper functions are 100% from output.R in purrr repo |
|||
# |
|||
# @param quiet Hide errors (`TRUE`, the default), or display them |
|||
# as they occur? |
|||
# @param otherwise Default value to use when an error occurs. |
|||
# |
|||
# @return `safely`: wrapped function instead returns a list with |
|||
# components `result` and `error`. One value is always `NULL`. |
|||
# |
|||
# `quietly`: wrapped function instead returns a list with components |
|||
# `result`, `output`, `messages` and `warnings`. |
|||
# |
|||
# `possibly`: wrapped function uses a default value (`otherwise`) |
|||
# whenever an error occurs. |
|||
safely <- function(.f, otherwise = NULL, quiet = TRUE) { |
|||
function(...) capture_error(.f(...), otherwise, quiet) |
|||
} |
|||
|
|||
quietly <- function(.f) { |
|||
function(...) capture_output(.f(...)) |
|||
} |
|||
|
|||
possibly <- function(.f, otherwise, quiet = TRUE) { |
|||
force(otherwise) |
|||
function(...) { |
|||
tryCatch(.f(...), |
|||
error = function(e) { |
|||
if (!quiet) |
|||
message("Error: ", e$message) |
|||
otherwise |
|||
}, |
|||
interrupt = function(e) { |
|||
stop("Terminated by user", call. = FALSE) |
|||
} |
|||
) |
|||
} |
|||
} |
|||
|
|||
capture_error <- function(code, otherwise = NULL, quiet = TRUE) { |
|||
tryCatch( |
|||
list(result = code, error = NULL), |
|||
error = function(e) { |
|||
if (!quiet) |
|||
message("Error: ", e$message) |
|||
|
|||
list(result = otherwise, error = e) |
|||
}, |
|||
interrupt = function(e) { |
|||
stop("Terminated by user", call. = FALSE) |
|||
} |
|||
) |
|||
} |
|||
|
|||
capture_output <- function(code) { |
|||
warnings <- character() |
|||
wHandler <- function(w) { |
|||
warnings <<- c(warnings, w$message) |
|||
invokeRestart("muffleWarning") |
|||
} |
|||
|
|||
messages <- character() |
|||
mHandler <- function(m) { |
|||
messages <<- c(messages, m$message) |
|||
invokeRestart("muffleMessage") |
|||
} |
|||
|
|||
temp <- file() |
|||
sink(temp) |
|||
on.exit({ |
|||
sink() |
|||
close(temp) |
|||
}) |
|||
|
|||
result <- withCallingHandlers( |
|||
code, |
|||
warning = wHandler, |
|||
message = mHandler |
|||
) |
|||
|
|||
output <- paste0(readLines(temp, warn = FALSE), collapse = "\n") |
|||
|
|||
list( |
|||
result = result, |
|||
output = output, |
|||
warnings = warnings, |
|||
messages = messages |
|||
) |
|||
} |
@ -0,0 +1,40 @@ |
|||
open_socket <- possibly(socketConnection, "") |
|||
close_socket <- safely(close) |
|||
|
|||
process_pwhois_response <- function(resp) { |
|||
|
|||
tmp <- stri_split_fixed(resp, ": ", simplify = TRUE) |
|||
cols <- stri_replace_all_fixed(stri_trans_tolower(tmp[,1]), "-", "_") |
|||
out <- as.data.frame(as.list(setNames(tmp[,2], cols))) |
|||
|
|||
if (hasName(out, "route_originated_ts")) { |
|||
out[["route_originated_ts"]] <- anytime::anytime(as.numeric(out[["route_originated_ts"]])) |
|||
} |
|||
|
|||
if (hasName(out, "cache_date")) { |
|||
out[["cache_date"]] <- anytime::anytime(as.numeric(out[["cache_date"]])) |
|||
} |
|||
|
|||
if (hasName(out, "latitude")) { |
|||
out[["latitude"]] <- as.numeric(out[["latitude"]]) |
|||
} |
|||
|
|||
if (hasName(out, "longitude")) { |
|||
out[["longitude"]] <- as.numeric(out[["latitude"]]) |
|||
} |
|||
|
|||
if (hasName(out, "modify_date")) { |
|||
out[["modify_date"]] <- as.POSIXct(out[["modify_date"]], format = "%b %d %Y %H:%M:%S") |
|||
} |
|||
|
|||
if (hasName(out, "create_date")) { |
|||
out[["create_date"]] <- as.POSIXct(out[["create_date"]], format = "%b %d %Y %H:%M:%S") |
|||
} |
|||
|
|||
if (hasName(out, "can_allocate")) { |
|||
out[["can_allocate"]] <- as.logical(as.integer(out[["can_allocate"]])) |
|||
} |
|||
|
|||
out |
|||
|
|||
} |
@ -0,0 +1,85 @@ |
|||
#' Issue a 'WHOIS' query and retrieve the response |
|||
#' |
|||
#' WHOIS (pronounced as the phrase "who is") is a query and response protocol |
|||
#' that is widely used for querying databases that store the registered users |
|||
#' or assignees of an Internet resource, such as a domain name, an IP address |
|||
#' block or an autonomous system, but is also used for a wider range of other |
|||
#' information. The protocol stores and delivers database content in a human- |
|||
#' readable format. The current iteration of the WHOIS protocol was drafted |
|||
#' by the Internet Society, and is documented in RFC 3912. |
|||
#' |
|||
#' The default server — `whois.pwhois.org` — is [The Prefix WhoIs Project](https://pwhois.org/) |
|||
#' WHOIS server which provides a whois-compatible client and server framework for |
|||
#' disclosing various up-to-date routing information. Instead of using |
|||
#' registrar-originated network information (which is often unspecific or |
|||
#' inaccurate), Prefix WhoIs uses the Internet's global routing table as |
|||
#' gleaned from a number of routing peers around the world. Other sources of |
|||
#' information, such as imported data from ARIN are also supported. |
|||
#' |
|||
#' The pwhois service supports special query types including: |
|||
#' |
|||
#' - `registry key=value`: you can search the pwhois database for any registry |
|||
#' field. The ones available at the time of the date on |
|||
#' this package are: "`Origin-AS`", "`Prefix"`, "`AS-Path"`, |
|||
#' "`AS-Org-Name`", "`Org-Name`", "`Net-Name`", "`Cache-Date`", |
|||
#' "`Latitude`", "`Longitude`", "`City"`, "`Region"` , |
|||
#' "`Country"` , and "`Country-Code`" |
|||
#' - `type=cymru`: The record(s) will be returned in |
|||
#' [Team Cymru format](https://team-cymru.com/community-services/ip-asn-mapping/) |
|||
#' and [tidy_cymru()] can be used to post-process the response. |
|||
#' - `type=rpsl`: The record(s) will be returned in |
|||
#' [Routing Policy Specification Language](https://tools.ietf.org/html/rfc2650) |
|||
#' (RPSL) format. A 'tidy' post-processor is on the TODO list. PRs are welcome. |
|||
#' |
|||
#' This function can also be used to query the |
|||
#' [Team Cymru WHOIS](https://team-cymru.com/community-services/ip-asn-mapping/) |
|||
#' server which supports various enhanced queries. See the link for more |
|||
#' information. [tidy_cymru()] can be used to post-process these responses. |
|||
#' |
|||
#' @param query <chr> the text query to send to the destination whois `host` |
|||
#' @param host <chr> the WHOIS host to query; This defaults to |
|||
#' [The Prefix WhoIs Project](https://pwhois.org/) WHOIS server |
|||
#' (a.k.a. "pwhois") as it allows for more robust queries to be performed. |
|||
#' @param port <int> TCP port the `host` WHOIS server is running on; |
|||
#' defaults to `43` (the standard WHOIS port) |
|||
#' @param timeout connection timeout; see [connections] |
|||
#' @return raw character response from the `host` WHOIS server. Use |
|||
#' the built-in tidying functions to post-process the response. |
|||
#' @export |
|||
#' @examples |
|||
#' whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois |
|||
#' whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois |
|||
#' whois('type=rpsl 17.253.144.10') # 'type=rpsl' is only supported on pwhois |
|||
#' whois('-v AS23028', "whois.cymru.com") # this only works with the Team Cymru WHOIS service |
|||
#' whois("17.253.144.10") |
|||
whois <- function(query, host = "whois.pwhois.org", port = 43L, |
|||
timeout = getOption("timeout")) { |
|||
|
|||
query <- as.character(query[1]) |
|||
host <- as.character(host[1]) |
|||
port <- as.integer(port[1]) |
|||
|
|||
if (!endsWith(query, "\n")) query <- sprintf("%s\n", query) |
|||
|
|||
open_socket( |
|||
host = host, |
|||
port = port, |
|||
blocking = TRUE, |
|||
open = "r+", |
|||
timeout = timeout |
|||
) -> con |
|||
|
|||
on.exit(close_socket(con)) |
|||
|
|||
if (!inherits(con, "sockconn")) { |
|||
warning(sprintf("Error opening connection to %s:%s", query, port)) |
|||
return(NA_character_) |
|||
} |
|||
|
|||
cat(query, file = con) |
|||
|
|||
res <- readLines(con) |
|||
|
|||
res |
|||
|
|||
} |
@ -0,0 +1,221 @@ |
|||
|
|||
[![Project Status: Active – The project has reached a stable, usable |
|||
state and is being actively |
|||
developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) |
|||
[![Signed |
|||
by](https://img.shields.io/badge/Keybase-Verified-brightgreen.svg)](https://keybase.io/hrbrmstr) |
|||
![Signed commit |
|||
%](https://img.shields.io/badge/Signed_Commits-100%25-lightgrey.svg) |
|||
[![Linux build |
|||
Status](https://travis-ci.org/hrbrmstr/pwhois.svg?branch=master)](https://travis-ci.org/hrbrmstr/pwhois) |
|||
![Minimal R |
|||
Version](https://img.shields.io/badge/R%3E%3D-3.5.0-blue.svg) |
|||
![License](https://img.shields.io/badge/License-MIT-blue.svg) |
|||
|
|||
# pwhois |
|||
|
|||
Issue ‘WHOIS’ Queries and Process Various ‘WHOIS’ Resoponses |
|||
|
|||
## Description |
|||
|
|||
WHOIS (pronounced as the phrase “who is”) is a query and response |
|||
protocol that is widely used for querying databases that store the |
|||
registered users or assignees of an Internet resource, such as a domain |
|||
name, an IP address block or an autonomous system, but is also used for |
|||
a wider range of other information. The protocol stores and delivers |
|||
database content in a human- readable format. The current iteration of |
|||
the WHOIS protocol was drafted by the Internet Society, and is |
|||
documented in RFC 3912. Tools are provided to issue query requests and |
|||
process query responses. |
|||
|
|||
## What’s Inside The Tin |
|||
|
|||
The following functions are implemented: |
|||
|
|||
- `tidy_cymru`: Turn a Team Cymru server response into a data frame |
|||
- `tidy_pwhois`: Turn a Prefix WhoIs server response into a data frame |
|||
- `whois`: Issue a ‘WHOIS’ query and retrieve the response |
|||
|
|||
## Installation |
|||
|
|||
``` r |
|||
remotes::install_git("https://git.rud.is/hrbrmstr/pwhois.git") |
|||
# or |
|||
remotes::install_gitlab("hrbrmstr/pwhois") |
|||
# or |
|||
remotes::install_bitbucket("hrbrmstr/pwhois") |
|||
``` |
|||
|
|||
NOTE: To use the ‘remotes’ install options you will need to have the |
|||
[{remotes} package](https://github.com/r-lib/remotes) installed. |
|||
|
|||
## Usage |
|||
|
|||
``` r |
|||
library(pwhois) |
|||
|
|||
# current version |
|||
packageVersion("pwhois") |
|||
## [1] '0.1.0' |
|||
``` |
|||
|
|||
A basic query: |
|||
|
|||
``` {ex-01} |
|||
(res <- whois("17.253.144.10")) |
|||
|
|||
str(tidy_pwhois(res), 1) |
|||
``` |
|||
|
|||
The Prefix WhoIs project supports some advanced queries including the |
|||
ability to search on any of their WHOIS registry fields: |
|||
|
|||
``` r |
|||
(res <- whois("registry org-name=apple, inc")) |
|||
## [1] "Org-Record: 0" "Org-ID: GRNA" |
|||
## [3] "Org-Name: Green Apple, Inc." "Can-Allocate: 0" |
|||
## [5] "Street-1: 5222 33rd Street SE" "City: Grand Rapids" |
|||
## [7] "State: MI" "Postal-Code: 49512-2070" |
|||
## [9] "Country: US" "Register-Date: 1997-12-19" |
|||
## [11] "Update-Date: 2011-09-24" "Create-Date: Nov 23 2005 02:48:10" |
|||
## [13] "Modify-Date: Feb 13 2021 01:35:47" "Admin-0-Handle: INE-ARIN" |
|||
## [15] "NOC-0-Handle: INE-ARIN" "Abuse-0-Handle: INE-ARIN" |
|||
## [17] "Tech-0-Handle: INE-ARIN" "Referral-Server: rwhois://rwhois.iserv.net:4321" |
|||
## [19] "Comment: http://www.greenapple.com" "Org-Record: 1" |
|||
## [21] "Org-ID: Network of Apple, Inc." "Org-Name: Network of Apple, Inc." |
|||
## [23] "Can-Allocate: 1" "Register-Date: 2011-03-02" |
|||
## [25] "Update-Date: 2011-03-02" "Create-Date: Mar 01 2011 13:07:13" |
|||
## [27] "Modify-Date: Mar 01 2011 13:07:13" "Org-Record: 2" |
|||
## [29] "Org-ID: C03342993" "Org-Name: APPLE, INC - CORP PROJECT" |
|||
## [31] "Can-Allocate: 0" "Street-1: 1 INFINITE LOOP # MS60" |
|||
## [33] "City: CUPERTINO" "State: CA" |
|||
## [35] "Postal-Code: 95014-2083" "Country: US" |
|||
## [37] "Register-Date: 2013-03-14" "Update-Date: 2013-03-14" |
|||
## [39] "Create-Date: May 30 2013 16:03:51" "Modify-Date: Feb 13 2021 01:35:47" |
|||
## [41] "Org-Record: 3" "Org-ID: C03342938" |
|||
## [43] "Org-Name: APPLE, INC - CORP PROJECT" "Can-Allocate: 0" |
|||
## [45] "Street-1: 1 INFINITE LOOP # MS60" "City: CUPERTINO" |
|||
## [47] "State: CA" "Postal-Code: 95014-2083" |
|||
## [49] "Country: US" "Register-Date: 2013-03-14" |
|||
## [51] "Update-Date: 2013-03-14" "Create-Date: May 30 2013 16:03:51" |
|||
## [53] "Modify-Date: Feb 13 2021 01:35:47" "Org-Record: 4" |
|||
## [55] "Org-ID: C03342954" "Org-Name: APPLE, INC - CORP PROJECT" |
|||
## [57] "Can-Allocate: 0" "Street-1: 1 INFINITE LOOP # MS60" |
|||
## [59] "City: CUPERTINO" "State: CA" |
|||
## [61] "Postal-Code: 95014-2083" "Country: US" |
|||
## [63] "Register-Date: 2013-03-14" "Update-Date: 2013-03-14" |
|||
## [65] "Create-Date: May 30 2013 16:03:51" "Modify-Date: Feb 13 2021 01:35:47" |
|||
## [67] "Org-Record: 5" "Org-ID: Network of Apple, Inc." |
|||
## [69] "Org-Name: Network of Apple, Inc." "Can-Allocate: 1" |
|||
## [71] "Register-Date: 2013-05-31" "Update-Date: 2013-05-31" |
|||
## [73] "Create-Date: May 30 2013 18:55:56" "Modify-Date: May 30 2013 18:55:56" |
|||
## [75] "Org-Record: 6" "Org-ID: C04676970" |
|||
## [77] "Org-Name: APPLE, INC - CORP PROJECT" "Can-Allocate: 0" |
|||
## [79] "Street-1: 1 INFINITE LOOP # MS60" "City: CUPERTINO" |
|||
## [81] "State: CA" "Postal-Code: 95014-2083" |
|||
## [83] "Country: US" "Register-Date: 2013-08-21" |
|||
## [85] "Update-Date: 2013-08-21" "Create-Date: Aug 23 2013 02:32:19" |
|||
## [87] "Modify-Date: Feb 13 2021 01:35:47" "Org-Record: 7" |
|||
## [89] "Org-ID: C04716251" "Org-Name: APPLE, INC - RETAIL PROJECT" |
|||
## [91] "Can-Allocate: 0" "Street-1: 1500 POLARIS PKWY" |
|||
## [93] "City: COLUMBUS" "State: OH" |
|||
## [95] "Postal-Code: 43240" "Country: US" |
|||
## [97] "Register-Date: 2013-09-30" "Update-Date: 2013-09-30" |
|||
## [99] "Create-Date: Oct 02 2013 01:52:33" "Modify-Date: Mar 23 2015 08:01:17" |
|||
## [101] "Org-Record: 8" "Org-ID: Apple, Inc." |
|||
## [103] "Org-Name: Apple, Inc." "Can-Allocate: 1" |
|||
## [105] "Register-Date: 2014-10-19" "Update-Date: 2014-10-19" |
|||
## [107] "Create-Date: Oct 18 2014 05:49:09" "Modify-Date: Oct 18 2014 05:49:09" |
|||
## [109] "Org-Record: 9" "Org-ID: C06092907" |
|||
## [111] "Org-Name: Apple, Inc IP - MRC" "Can-Allocate: 0" |
|||
## [113] "Street-1: 21625 Gresham Dr" "City: Ashburn" |
|||
## [115] "State: VA" "Postal-Code: 20147" |
|||
## [117] "Country: US" "Register-Date: 2016-04-14" |
|||
## [119] "Update-Date: 2016-06-21" "Create-Date: Dec 13 2016 10:45:20" |
|||
## [121] "Modify-Date: Feb 13 2021 01:35:47" "Org-Record: 10" |
|||
## [123] "Org-ID: C07098191" "Org-Name: APPLE, INC.-COLOCATION - APPLE, INC." |
|||
## [125] "Can-Allocate: 0" "Street-1: 340 CUMBERLAND AVE" |
|||
## [127] "City: PORTLAND" "State: ME" |
|||
## [129] "Postal-Code: 04101" "Country: US" |
|||
## [131] "Register-Date: 2018-10-24" "Update-Date: 2018-10-24" |
|||
## [133] "Create-Date: Oct 26 2018 02:13:03" "Modify-Date: Feb 13 2021 01:35:47" |
|||
|
|||
tibble::as_tibble(tidy_pwhois(res)) |
|||
## # A tibble: 11 x 19 |
|||
## org_record org_id org_name can_allocate street_1 city state postal_code country register_date update_date |
|||
## <chr> <chr> <chr> <lgl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> |
|||
## 1 0 GRNA Green A… FALSE 5222 33… Gran… MI 49512-2070 US 1997-12-19 2011-09-24 |
|||
## 2 1 Netwo… Network… TRUE <NA> <NA> <NA> <NA> <NA> 2011-03-02 2011-03-02 |
|||
## 3 2 C0334… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-03-14 2013-03-14 |
|||
## 4 3 C0334… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-03-14 2013-03-14 |
|||
## 5 4 C0334… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-03-14 2013-03-14 |
|||
## 6 5 Netwo… Network… TRUE <NA> <NA> <NA> <NA> <NA> 2013-05-31 2013-05-31 |
|||
## 7 6 C0467… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-08-21 2013-08-21 |
|||
## 8 7 C0471… APPLE, … FALSE 1500 PO… COLU… OH 43240 US 2013-09-30 2013-09-30 |
|||
## 9 8 Apple… Apple, … TRUE <NA> <NA> <NA> <NA> <NA> 2014-10-19 2014-10-19 |
|||
## 10 9 C0609… Apple, … FALSE 21625 G… Ashb… VA 20147 US 2016-04-14 2016-06-21 |
|||
## 11 10 C0709… APPLE, … FALSE 340 CUM… PORT… ME 04101 US 2018-10-24 2018-10-24 |
|||
## # … with 8 more variables: create_date <dttm>, modify_date <dttm>, admin_0_handle <chr>, noc_0_handle <chr>, |
|||
## # abuse_0_handle <chr>, tech_0_handle <chr>, referral_server <chr>, comment <chr> |
|||
``` |
|||
|
|||
and can return results in different formats including Team Cymru tables: |
|||
|
|||
``` r |
|||
(res <- whois('type=cymru 17.253.144.10')) |
|||
## [1] "AS | IP | ORG NAME | CC | NET NAME | AS ORG NAME" |
|||
## [2] "714 | 17.253.144.10 | Apple Inc. | US | APPLE-WWNET | Apple Inc." |
|||
|
|||
str(tidy_cymru(res), 1) |
|||
## 'data.frame': 1 obs. of 6 variables: |
|||
## $ as : chr "714" |
|||
## $ ip : chr "17.253.144.10" |
|||
## $ org name : chr "Apple Inc." |
|||
## $ cc : chr "US" |
|||
## $ net name : chr "APPLE-WWNET" |
|||
## $ as org name: chr "Apple Inc." |
|||
``` |
|||
|
|||
That `tidy_cymru()` function can be used on responses from the Team |
|||
Cymru WHOIS service as well: |
|||
|
|||
``` r |
|||
(res <- whois('-v AS23028', "whois.cymru.com")) |
|||
## [1] "AS | CC | Registry | Allocated | AS Name" "23028 | US | arin | 2002-01-04 | TEAM-CYMRU, US" |
|||
|
|||
str(tidy_cymru(res)) |
|||
## 'data.frame': 1 obs. of 5 variables: |
|||
## $ as : chr "23028" |
|||
## $ cc : chr "US" |
|||
## $ registry : chr "arin" |
|||
## $ allocated: chr "2002-01-04" |
|||
## $ as name : chr "TEAM-CYMRU, US" |
|||
``` |
|||
|
|||
The Prefix WhoIs project can also return responses in Routing Policy |
|||
Specification Language (RPSL), but there is no ‘tidy’ function for this |
|||
format yet: |
|||
|
|||
``` r |
|||
whois('type=rpsl 17.253.144.10') |
|||
## [1] "Origin: AS714" "Route: 17.253.144.0/21" |
|||
## [3] "Date: 20210213" "AS-Org-Name: Apple Inc." |
|||
## [5] "Org-Name: Apple Inc." "Net-Name: APPLE-WWNET" |
|||
## [7] "Source: PWHOIS Server 208.74.248.120:43 at 20210213" |
|||
``` |
|||
|
|||
## pwhois Metrics |
|||
|
|||
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) | |
|||
|:-----|---------:|-----:|----:|-----:|------------:|----:|---------:|-----:| |
|||
| R | 7 | 0.44 | 303 | 0.47 | 110 | 0.4 | 196 | 0.41 | |
|||
| Rmd | 1 | 0.06 | 17 | 0.03 | 29 | 0.1 | 44 | 0.09 | |
|||
| SUM | 8 | 0.50 | 320 | 0.50 | 139 | 0.5 | 240 | 0.50 | |
|||
|
|||
clock Package Metrics for pwhois |
|||
|
|||
## Code of Conduct |
|||
|
|||
Please note that this project is released with a Contributor Code of |
|||
Conduct. By participating in this project you agree to abide by its |
|||
terms. |
@ -0,0 +1,21 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/tidy.R |
|||
\name{tidy_cymru} |
|||
\alias{tidy_cymru} |
|||
\title{Turn a Team Cymru server response into a data frame} |
|||
\usage{ |
|||
tidy_cymru(cymru_raw_response) |
|||
} |
|||
\arguments{ |
|||
\item{cymru_raw_response}{the raw response from a call to \code{\link[=whois]{whois()}}} |
|||
} |
|||
\value{ |
|||
data frame |
|||
} |
|||
\description{ |
|||
Turn a Team Cymru server response into a data frame |
|||
} |
|||
\examples{ |
|||
res <- whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois |
|||
tidy_pwhois(res) |
|||
} |
@ -0,0 +1,21 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/tidy.R |
|||
\name{tidy_pwhois} |
|||
\alias{tidy_pwhois} |
|||
\title{Turn a Prefix WhoIs server response into a data frame} |
|||
\usage{ |
|||
tidy_pwhois(pwhois_raw_response) |
|||
} |
|||
\arguments{ |
|||
\item{pwhois_raw_response}{the raw response from a call to \code{\link[=whois]{whois()}}} |
|||
} |
|||
\value{ |
|||
data frame |
|||
} |
|||
\description{ |
|||
Turn a Prefix WhoIs server response into a data frame |
|||
} |
|||
\examples{ |
|||
res <- whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois |
|||
tidy_pwhois(res) |
|||
} |
@ -0,0 +1,75 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/whois.R |
|||
\name{whois} |
|||
\alias{whois} |
|||
\title{Issue a 'WHOIS' query and retrieve the response} |
|||
\usage{ |
|||
whois( |
|||
query, |
|||
host = "whois.pwhois.org", |
|||
port = 43L, |
|||
timeout = getOption("timeout") |
|||
) |
|||
} |
|||
\arguments{ |
|||
\item{query}{<chr> the text query to send to the destination whois \code{host}} |
|||
|
|||
\item{host}{<chr> the WHOIS host to query; This defaults to |
|||
\href{https://pwhois.org/}{The Prefix WhoIs Project} WHOIS server |
|||
(a.k.a. "pwhois") as it allows for more robust queries to be performed.} |
|||
|
|||
\item{port}{<int> TCP port the \code{host} WHOIS server is running on; |
|||
defaults to \code{43} (the standard WHOIS port)} |
|||
|
|||
\item{timeout}{connection timeout; see \link{connections}} |
|||
} |
|||
\value{ |
|||
raw character response from the \code{host} WHOIS server. Use |
|||
the built-in tidying functions to post-process the response. |
|||
} |
|||
\description{ |
|||
WHOIS (pronounced as the phrase "who is") is a query and response protocol |
|||
that is widely used for querying databases that store the registered users |
|||
or assignees of an Internet resource, such as a domain name, an IP address |
|||
block or an autonomous system, but is also used for a wider range of other |
|||
information. The protocol stores and delivers database content in a human- |
|||
readable format. The current iteration of the WHOIS protocol was drafted |
|||
by the Internet Society, and is documented in RFC 3912. |
|||
} |
|||
\details{ |
|||
The default server — \code{whois.pwhois.org} — is \href{https://pwhois.org/}{The Prefix WhoIs Project} |
|||
WHOIS server which provides a whois-compatible client and server framework for |
|||
disclosing various up-to-date routing information. Instead of using |
|||
registrar-originated network information (which is often unspecific or |
|||
inaccurate), Prefix WhoIs uses the Internet's global routing table as |
|||
gleaned from a number of routing peers around the world. Other sources of |
|||
information, such as imported data from ARIN are also supported. |
|||
|
|||
The pwhois service supports special query types including: |
|||
\itemize{ |
|||
\item \verb{registry key=value}: you can search the pwhois database for any registry |
|||
field. The ones available at the time of the date on |
|||
this package are: "\code{Origin-AS}", "\verb{Prefix"}, "\verb{AS-Path"}, |
|||
"\code{AS-Org-Name}", "\code{Org-Name}", "\code{Net-Name}", "\code{Cache-Date}", |
|||
"\code{Latitude}", "\code{Longitude}", "\verb{City"}, "\verb{Region"} , |
|||
"\verb{Country"} , and "\code{Country-Code}" |
|||
\item \code{type=cymru}: The record(s) will be returned in |
|||
\href{https://team-cymru.com/community-services/ip-asn-mapping/}{Team Cymru format} |
|||
and \code{\link[=tidy_cymru]{tidy_cymru()}} can be used to post-process the response. |
|||
\item \code{type=rpsl}: The record(s) will be returned in |
|||
\href{https://tools.ietf.org/html/rfc2650}{Routing Policy Specification Language} |
|||
(RPSL) format. A 'tidy' post-processor is on the TODO list. PRs are welcome. |
|||
} |
|||
|
|||
This function can also be used to query the |
|||
\href{https://team-cymru.com/community-services/ip-asn-mapping/}{Team Cymru WHOIS} |
|||
server which supports various enhanced queries. See the link for more |
|||
information. \code{\link[=tidy_cymru]{tidy_cymru()}} can be used to post-process these responses. |
|||
} |
|||
\examples{ |
|||
whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois |
|||
whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois |
|||
whois('type=rpsl 17.253.144.10') # 'type=rpsl' is only supported on pwhois |
|||
whois('-v AS23028', "whois.cymru.com") # this only works with the Team Cymru WHOIS service |
|||
whois("17.253.144.10") |
|||
} |
Loading…
Reference in new issue