Browse Source

initial commit

master
boB Rudis 2 months ago
parent
commit
28aff2d54a
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
17 changed files with 1024 additions and 12 deletions
  1. +1
    -0
      .Rbuildignore
  2. +11
    -4
      DESCRIPTION
  3. +2
    -0
      LICENSE
  4. +21
    -0
      LICENSE.md
  5. +4
    -2
      NAMESPACE
  6. +12
    -4
      R/pwhois-package.R
  7. +52
    -0
      R/tidy.R
  8. +320
    -0
      R/utils-mappers.R
  9. +90
    -0
      R/utils-safely.R
  10. +40
    -0
      R/utils.R
  11. +85
    -0
      R/whois.R
  12. +39
    -0
      README.Rmd
  13. +221
    -0
      README.md
  14. +9
    -2
      man/pwhois.Rd
  15. +21
    -0
      man/tidy_cymru.Rd
  16. +21
    -0
      man/tidy_pwhois.Rd
  17. +75
    -0
      man/whois.Rd

+ 1
- 0
.Rbuildignore View File

@ -19,3 +19,4 @@
^CRAN-RELEASE$
^appveyor\.yml$
^tools$
^LICENSE\.md$

+ 11
- 4
DESCRIPTION View File

@ -1,6 +1,6 @@
Package: pwhois
Type: Package
Title: pwhois title goes here otherwise CRAN checks fail
Title: Issue 'WHOIS' Queries and Process Various 'WHOIS' Resoponses
Version: 0.1.0
Date: 2021-02-13
Authors@R: c(
@ -8,17 +8,24 @@ Authors@R: c(
comment = c(ORCID = "0000-0001-5670-2640"))
)
Maintainer: Bob Rudis <bob@rud.is>
Description: A good description goes here otherwise CRAN checks fail.
Description: WHOIS (pronounced as the phrase "who is") is a query and response protocol
that is widely used for querying databases that store the registered users
or assignees of an Internet resource, such as a domain name, an IP address
block or an autonomous system, but is also used for a wider range of other
information. The protocol stores and delivers database content in a human-
readable format. The current iteration of the WHOIS protocol was drafted
by the Internet Society, and is documented in RFC 3912. Tools are provided
to issue query requests and process query responses.
URL: https://git.rud.is/hrbrmstr/pwhois
BugReports: https://git.rud.is/hrbrmstr/pwhois/issues
Encoding: UTF-8
License: AGPL
License: MIT + file LICENSE
Suggests:
covr, tinytest
Depends:
R (>= 3.5.0)
Imports:
httr,
stringi,
jsonlite
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1

+ 2
- 0
LICENSE View File

@ -0,0 +1,2 @@
YEAR: 2021
COPYRIGHT HOLDER: Bob Rudis

+ 21
- 0
LICENSE.md View File

@ -0,0 +1,21 @@
# MIT License
Copyright (c) 2021 Bob Rudis
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

+ 4
- 2
NAMESPACE View File

@ -1,4 +1,6 @@
# Generated by roxygen2: do not edit by hand
import(httr)
importFrom(jsonlite,fromJSON)
export(tidy_cymru)
export(tidy_pwhois)
export(whois)
import(stringi)

+ 12
- 4
R/pwhois-package.R View File

@ -1,9 +1,17 @@
#' ...
#'
#' pwhois
#'
#' WHOIS (pronounced as the phrase "who is") is a query and response protocol
#' that is widely used for querying databases that store the registered users
#' or assignees of an Internet resource, such as a domain name, an IP address
#' block or an autonomous system, but is also used for a wider range of other
#' information. The protocol stores and delivers database content in a human-
#' readable format. The current iteration of the WHOIS protocol was drafted
#' by the Internet Society, and is documented in RFC 3912. Tools are provided
#' to issue query requests and process query responses.
#'
#' @md
#' @name pwhois
#' @keywords internal
#' @author Bob Rudis (bob@@rud.is)
#' @import httr
#' @importFrom jsonlite fromJSON
#' @import stringi
"_PACKAGE"

+ 52
- 0
R/tidy.R View File

@ -0,0 +1,52 @@
#' Turn a Prefix WhoIs server response into a data frame
#'
#' @param pwhois_raw_response the raw response from a call to [whois()]
#' @return data frame
#' @export
#' @examples
#' res <- whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois
#' tidy_pwhois(res)
tidy_pwhois <- function(pwhois_raw_response) {
if (any(grepl("Org-Record", pwhois_raw_response))) {
starts <- which(grepl("Org-Record", pwhois_raw_response))
ends <- c(starts[-1]-1, length(pwhois_raw_response))
mapply(
FUN = function(start, end) process_pwhois_response(pwhois_raw_response[start:end]),
starts, ends, SIMPLIFY = FALSE
) -> out
bind_rows(out)
} else {
process_pwhois_response(pwhois_raw_response)
}
}
#' Turn a Team Cymru server response into a data frame
#'
#' @param cymru_raw_response the raw response from a call to [whois()]
#' @return data frame
#' @export
#' @examples
#' res <- whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois
#' tidy_pwhois(res)
tidy_cymru <- function(cymru_raw_response) {
tmp <- stri_split_fixed(cymru_raw_response, "|", simplify = TRUE)
tmp <- apply(tmp, c(1, 2), stri_trim_both)
stri_replace_all_regex(
stri_trans_tolower(tmp[1,]),
"[ -]", " "
) -> cols
set_names(
as.data.frame(tmp[2:nrow(tmp),1:ncol(tmp), drop=FALSE]),
cols
)
}

+ 320
- 0
R/utils-mappers.R View File

@ -0,0 +1,320 @@
# NOTE: At the bottom of this source file show the equivalents to purrr mappers
#
# NOTE these aren't 100% equivalent to the purrr mappers but cover very common use-cases
#
# NOTE formula function (e.g. ~{}) are 100% supported
#
# NOTE: THESE DO NOT SUPPORT list EXTRACTORS
set_names <- function(object = nm, nm) {
names(object) <- nm
object
}
map <- function(.x, .f, ..., .default) {
default_exists <- !missing(.default)
if (inherits(.f, "formula")) {
.body <- dimnames(attr(terms(.f), "factors"))[[1]]
.f <- function(.x, . = .x) {}
body(.f) <- as.expression(parse(text=.body))
}
nm <- names(.x)
if (inherits(.f, "function")) {
lapply(.x, function(x) {
res <- .f(x, ...)
if ((length(res) == 0) & default_exists) res <- .default
res
}) -> out
} else if (is.numeric(.f) | is.character(.f)) {
lapply(.x, function(x) {
res <- try(x[[.f]], silent = TRUE)
if (inherits(res, "try-error")) res <- NULL
if ((length(res) == 0) & default_exists) res <- .default
res
}) -> out
}
if (length(nm) > 0) out <- set_names(out, nm)
out
}
map2 <- function(.x, .y, .f, ..., .default) {
default_exists <- !missing(.default)
if (inherits(.f, "formula")) {
.body <- dimnames(attr(terms(.f), "factors"))[[1]]
.f <- function(.x, .y, . = .x) {}
body(.f) <- as.expression(parse(text=.body))
}
if (inherits(.f, "function")) {
mapply(
function(x, ...) {
res <- .f(x, ...)
if ((length(res) == 0) & default_exists) res <- .default
res
},
.x, .y,
...,
SIMPLIFY=FALSE, USE.NAMES=FALSE
)
}
}
map_chr <- function(.x, .f, ...) {
nm <- names(.x)
out <- as.character((map(.x, .f, ..., .default = .default)))
if (length(nm) > 0) set_names(out, nm) else out
}
map2_chr <- function(.x, .y, .f, ...) {
as.character(unlist(map2(.x, .y, .f, ..., .default = .default)))
}
map_lgl <- function(.x, .f, ...) {
nm <- names(.x)
out <- as.logical(unlist(map(.x, .f, ..., .default = .default)))
if (length(nm) > 0) set_names(out, nm) else out
}
map2_lgl <- function(.x, .y, .f, ...) {
as.logical(unlist(map2(.x, .y, .f, ..., .default = .default)))
}
map_dbl <- function(.x, .f, ...) {
nm <- names(.x)
out <- as.double(unlist(map(.x, .f, ..., .default = .default)))
if (length(nm) > 0) set_names(out, nm) else out
}
map2_dbl <- function(.x, .y, .f, ...) {
as.double(unlist(map2(.x, .y, .f, ..., .default = .default)))
}
map_int <- function(.x, .f, ..., .default) {
nm <- names(.x)
out <- as.integer(unlist(map(.x, .f, ..., .default = .default)))
if (length(nm) > 0) set_names(out, nm) else out
}
map2_int <- function(.x, .y, .f, ...) {
as.integer(unlist(map2(.x, .y, .f, ..., .default = .default)))
}
map_df <- function(.x, .f, ..., .id=NULL) {
res <- map(.x, .f, ...)
out <- bind_rows(res, .id=.id)
out
}
map_dfr <- map_df
map_dfc <- function(.x, .f, ...) {
res <- map(.x, .f, ...)
out <- bind_cols(res)
out
}
map2_df <- function(.x, .y, .f, ..., .id=NULL) {
res <- map2(.x, .y, .f, ...)
out <- bind_rows(res, .id = .id)
out
}
map2_dfc <- function(.x, .y, .f, ...) {
res <- map2(.x, .y, .f, ...)
out <- bind_cols(res)
out
}
# this has limitations and is more like 75% of dplyr::bind_rows()
# this is also orders of magnitude slower than dplyr::bind_rows()
bind_rows <- function(..., .id = NULL) {
res <- list(...)
if (length(res) == 1) res <- res[[1]]
cols <- unique(unlist(lapply(res, names), use.names = FALSE))
if (!is.null(.id)) {
inthere <- cols[.id %in% cols]
if (length(inthere) > 0) {
.id <- make.unique(c(inthere, .id))[2]
}
}
id_vals <- if (is.null(names(res))) 1:length(res) else names(res)
saf <- default.stringsAsFactors()
options(stringsAsFactors = FALSE)
on.exit(options(stringsAsFactors = saf))
idx <- 1
do.call(
rbind.data.frame,
lapply(res, function(.x) {
x_names <- names(.x)
moar_names <- setdiff(cols, x_names)
if (length(moar_names) > 0) {
for (i in 1:length(moar_names)) {
.x[[moar_names[i]]] <- rep(NA, length(.x[[1]]))
}
}
if (!is.null(.id)) {
.x[[.id]] <- id_vals[idx]
idx <<- idx + 1
}
.x
})
) -> out
rownames(out) <- NULL
class(out) <- c("tbl_df", "tbl", "data.frame")
out
}
bind_cols <- function(...) {
res <- list(...)
row_mismatch <- lapply(res, nrow) != nrow(res[[1]])
if (any(row_mismatch)) {
first_mismatch_pos <- which(row_mismatch)[1]
stop(paste0("Argument ", first_mismatch_pos,
" must be length ", nrow(res[[1]]),
", not ", nrow(res[[first_mismatch_pos]])))
}
if (length(res) == 1) res <- res[[1]]
col_names <- unlist(lapply(res, names), use.names = FALSE)
col_names <- make.unique(col_names, sep = "")
saf <- default.stringsAsFactors()
options(stringsAsFactors = FALSE)
on.exit(options(stringsAsFactors = saf))
out <- do.call(cbind.data.frame, res)
names(out) <- col_names
rownames(out) <- NULL
class(out) <- c("tbl_df", "tbl", "data.frame")
out
}
# set.seed(1)
# 1:10 %>%
# map(rnorm, n = 10) %>%
# map_dbl(mean)
#
# set.seed(1)
# 1:10 %>%
# purrr::map(rnorm, n = 10) %>%
# purrr::map_dbl(mean)
#
#
# # Or use an anonymous function
# set.seed(1)
# 1:10 %>%
# map(function(x) rnorm(10, x))
#
# set.seed(1)
# 1:10 %>%
# purrr::map(function(x) rnorm(10, x))
#
# # Or a formula
# set.seed(1)
# 1:10 %>%
# map(~ rnorm(10, .x))
#
# set.seed(1)
# 1:10 %>%
# purrr::map(~ rnorm(10, .x))
#
# # Extract by name or position
# # .default specifies value for elements that are missing or NULL
# l1 <- list(list(a = 1L), list(a = NULL, b = 2L), list(b = 3L))
# l1 %>% map("a", .default = "???")
# l1 %>% purrr::map("a", .default = "???")
#
# l1 %>% map_int("b", .default = NA)
# l1 %>% purrr::map_int("b", .default = NA)
#
# l1 %>% map_int(2, .default = NA)
# l1 %>% purrr::map_int(2, .default = NA)
#
# # Supply multiple values to index deeply into a list
# l2 <- list(
# list(num = 1:3, letters[1:3]),
# list(num = 101:103, letters[4:6]),
# list()
# )
# l2 %>% map(c(2, 2))
# l2 %>% purrr::map(c(2, 2))
#
#
# # A more realistic example: split a data frame into pieces, fit a
# # model to each piece, summarise and extract R^2
# mtcars %>%
# split(.$cyl) %>%
# map(~ lm(mpg ~ wt, data = .x)) %>%
# map(summary) %>%
# map_dbl("r.squared")
#
# mtcars %>%
# split(.$cyl) %>%
# purrr::map(~ lm(mpg ~ wt, data = .x)) %>%
# purrr::map(summary) %>%
# purrr::map_dbl("r.squared")
#
#
# # Use map_lgl(), map_dbl(), etc to reduce to a vector.
# # * list
# mtcars %>% map(sum)
# mtcars %>% purrr::map(sum)
# # * vector
# mtcars %>% map_dbl(sum)
# mtcars %>% purrr::map_dbl(sum)
#
# # If each element of the output is a data frame, use
# # map_dfr to row-bind them together:
# mtcars %>%
# split(.$cyl) %>%
# map(~ lm(mpg ~ wt, data = .x)) %>%
# map_dfr(~ as.data.frame(t(as.matrix(coef(.)))))
#
# mtcars %>%
# split(.$cyl) %>%
# purrr::map(~ lm(mpg ~ wt, data = .x)) %>%
# purrr::map_dfr(~ as.data.frame(t(as.matrix(coef(.)))))

+ 90
- 0
R/utils-safely.R View File

@ -0,0 +1,90 @@
# Less cool counterparts to purrr's side-effect capture-rs
#
# Most of the helper functions are 100% from output.R in purrr repo
#
# @param quiet Hide errors (`TRUE`, the default), or display them
# as they occur?
# @param otherwise Default value to use when an error occurs.
#
# @return `safely`: wrapped function instead returns a list with
# components `result` and `error`. One value is always `NULL`.
#
# `quietly`: wrapped function instead returns a list with components
# `result`, `output`, `messages` and `warnings`.
#
# `possibly`: wrapped function uses a default value (`otherwise`)
# whenever an error occurs.
safely <- function(.f, otherwise = NULL, quiet = TRUE) {
function(...) capture_error(.f(...), otherwise, quiet)
}
quietly <- function(.f) {
function(...) capture_output(.f(...))
}
possibly <- function(.f, otherwise, quiet = TRUE) {
force(otherwise)
function(...) {
tryCatch(.f(...),
error = function(e) {
if (!quiet)
message("Error: ", e$message)
otherwise
},
interrupt = function(e) {
stop("Terminated by user", call. = FALSE)
}
)
}
}
capture_error <- function(code, otherwise = NULL, quiet = TRUE) {
tryCatch(
list(result = code, error = NULL),
error = function(e) {
if (!quiet)
message("Error: ", e$message)
list(result = otherwise, error = e)
},
interrupt = function(e) {
stop("Terminated by user", call. = FALSE)
}
)
}
capture_output <- function(code) {
warnings <- character()
wHandler <- function(w) {
warnings <<- c(warnings, w$message)
invokeRestart("muffleWarning")
}
messages <- character()
mHandler <- function(m) {
messages <<- c(messages, m$message)
invokeRestart("muffleMessage")
}
temp <- file()
sink(temp)
on.exit({
sink()
close(temp)
})
result <- withCallingHandlers(
code,
warning = wHandler,
message = mHandler
)
output <- paste0(readLines(temp, warn = FALSE), collapse = "\n")
list(
result = result,
output = output,
warnings = warnings,
messages = messages
)
}

+ 40
- 0
R/utils.R View File

@ -0,0 +1,40 @@
open_socket <- possibly(socketConnection, "")
close_socket <- safely(close)
process_pwhois_response <- function(resp) {
tmp <- stri_split_fixed(resp, ": ", simplify = TRUE)
cols <- stri_replace_all_fixed(stri_trans_tolower(tmp[,1]), "-", "_")
out <- as.data.frame(as.list(setNames(tmp[,2], cols)))
if (hasName(out, "route_originated_ts")) {
out[["route_originated_ts"]] <- anytime::anytime(as.numeric(out[["route_originated_ts"]]))
}
if (hasName(out, "cache_date")) {
out[["cache_date"]] <- anytime::anytime(as.numeric(out[["cache_date"]]))
}
if (hasName(out, "latitude")) {
out[["latitude"]] <- as.numeric(out[["latitude"]])
}
if (hasName(out, "longitude")) {
out[["longitude"]] <- as.numeric(out[["latitude"]])
}
if (hasName(out, "modify_date")) {
out[["modify_date"]] <- as.POSIXct(out[["modify_date"]], format = "%b %d %Y %H:%M:%S")
}
if (hasName(out, "create_date")) {
out[["create_date"]] <- as.POSIXct(out[["create_date"]], format = "%b %d %Y %H:%M:%S")
}
if (hasName(out, "can_allocate")) {
out[["can_allocate"]] <- as.logical(as.integer(out[["can_allocate"]]))
}
out
}

+ 85
- 0
R/whois.R View File

@ -0,0 +1,85 @@
#' Issue a 'WHOIS' query and retrieve the response
#'
#' WHOIS (pronounced as the phrase "who is") is a query and response protocol
#' that is widely used for querying databases that store the registered users
#' or assignees of an Internet resource, such as a domain name, an IP address
#' block or an autonomous system, but is also used for a wider range of other
#' information. The protocol stores and delivers database content in a human-
#' readable format. The current iteration of the WHOIS protocol was drafted
#' by the Internet Society, and is documented in RFC 3912.
#'
#' The default server — `whois.pwhois.org` — is [The Prefix WhoIs Project](https://pwhois.org/)
#' WHOIS server which provides a whois-compatible client and server framework for
#' disclosing various up-to-date routing information. Instead of using
#' registrar-originated network information (which is often unspecific or
#' inaccurate), Prefix WhoIs uses the Internet's global routing table as
#' gleaned from a number of routing peers around the world. Other sources of
#' information, such as imported data from ARIN are also supported.
#'
#' The pwhois service supports special query types including:
#'
#' - `registry key=value`: you can search the pwhois database for any registry
#' field. The ones available at the time of the date on
#' this package are: "`Origin-AS`", "`Prefix"`, "`AS-Path"`,
#' "`AS-Org-Name`", "`Org-Name`", "`Net-Name`", "`Cache-Date`",
#' "`Latitude`", "`Longitude`", "`City"`, "`Region"` ,
#' "`Country"` , and "`Country-Code`"
#' - `type=cymru`: The record(s) will be returned in
#' [Team Cymru format](https://team-cymru.com/community-services/ip-asn-mapping/)
#' and [tidy_cymru()] can be used to post-process the response.
#' - `type=rpsl`: The record(s) will be returned in
#' [Routing Policy Specification Language](https://tools.ietf.org/html/rfc2650)
#' (RPSL) format. A 'tidy' post-processor is on the TODO list. PRs are welcome.
#'
#' This function can also be used to query the
#' [Team Cymru WHOIS](https://team-cymru.com/community-services/ip-asn-mapping/)
#' server which supports various enhanced queries. See the link for more
#' information. [tidy_cymru()] can be used to post-process these responses.
#'
#' @param query &lt;chr> the text query to send to the destination whois `host`
#' @param host &lt;chr> the WHOIS host to query; This defaults to
#' [The Prefix WhoIs Project](https://pwhois.org/) WHOIS server
#' (a.k.a. "pwhois") as it allows for more robust queries to be performed.
#' @param port &lt;int> TCP port the `host` WHOIS server is running on;
#' defaults to `43` (the standard WHOIS port)
#' @param timeout connection timeout; see [connections]
#' @return raw character response from the `host` WHOIS server. Use
#' the built-in tidying functions to post-process the response.
#' @export
#' @examples
#' whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois
#' whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois
#' whois('type=rpsl 17.253.144.10') # 'type=rpsl' is only supported on pwhois
#' whois('-v AS23028', "whois.cymru.com") # this only works with the Team Cymru WHOIS service
#' whois("17.253.144.10")
whois <- function(query, host = "whois.pwhois.org", port = 43L,
timeout = getOption("timeout")) {
query <- as.character(query[1])
host <- as.character(host[1])
port <- as.integer(port[1])
if (!endsWith(query, "\n")) query <- sprintf("%s\n", query)
open_socket(
host = host,
port = port,
blocking = TRUE,
open = "r+",
timeout = timeout
) -> con
on.exit(close_socket(con))
if (!inherits(con, "sockconn")) {
warning(sprintf("Error opening connection to %s:%s", query, port))
return(NA_character_)
}
cat(query, file = con)
res <- readLines(con)
res
}

+ 39
- 0
README.Rmd View File

@ -39,6 +39,45 @@ packageVersion("pwhois")
```
A basic query:
```{ex-01}
(res <- whois("17.253.144.10"))
str(tidy_pwhois(res), 1)
```
The Prefix WhoIs project supports some advanced queries including the ability to search on any of their WHOIS registry fields:
```{r ex-02}
(res <- whois("registry org-name=apple, inc"))
tibble::as_tibble(tidy_pwhois(res))
```
and can return results in different formats including Team Cymru tables:
```{r ex-03}
(res <- whois('type=cymru 17.253.144.10'))
str(tidy_cymru(res), 1)
```
That `tidy_cymru()` function can be used on responses from the Team Cymru
WHOIS service as well:
```{r ex-03a}
(res <- whois('-v AS23028', "whois.cymru.com"))
str(tidy_cymru(res))
```
The Prefix WhoIs project can also return responses in Routing Policy Specification Language (RPSL), but there is no 'tidy' function for this format yet:
```{r ex-04}
whois('type=rpsl 17.253.144.10')
```
## pwhois Metrics
```{r cloc, echo=FALSE}


+ 221
- 0
README.md View File

@ -0,0 +1,221 @@
[![Project Status: Active – The project has reached a stable, usable
state and is being actively
developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
[![Signed
by](https://img.shields.io/badge/Keybase-Verified-brightgreen.svg)](https://keybase.io/hrbrmstr)
![Signed commit
%](https://img.shields.io/badge/Signed_Commits-100%25-lightgrey.svg)
[![Linux build
Status](https://travis-ci.org/hrbrmstr/pwhois.svg?branch=master)](https://travis-ci.org/hrbrmstr/pwhois)
![Minimal R
Version](https://img.shields.io/badge/R%3E%3D-3.5.0-blue.svg)
![License](https://img.shields.io/badge/License-MIT-blue.svg)
# pwhois
Issue ‘WHOIS’ Queries and Process Various ‘WHOIS’ Resoponses
## Description
WHOIS (pronounced as the phrase “who is”) is a query and response
protocol that is widely used for querying databases that store the
registered users or assignees of an Internet resource, such as a domain
name, an IP address block or an autonomous system, but is also used for
a wider range of other information. The protocol stores and delivers
database content in a human- readable format. The current iteration of
the WHOIS protocol was drafted by the Internet Society, and is
documented in RFC 3912. Tools are provided to issue query requests and
process query responses.
## What’s Inside The Tin
The following functions are implemented:
- `tidy_cymru`: Turn a Team Cymru server response into a data frame
- `tidy_pwhois`: Turn a Prefix WhoIs server response into a data frame
- `whois`: Issue a ‘WHOIS’ query and retrieve the response
## Installation
``` r
remotes::install_git("https://git.rud.is/hrbrmstr/pwhois.git")
# or
remotes::install_gitlab("hrbrmstr/pwhois")
# or
remotes::install_bitbucket("hrbrmstr/pwhois")
```
NOTE: To use the ‘remotes’ install options you will need to have the
[{remotes} package](https://github.com/r-lib/remotes) installed.
## Usage
``` r
library(pwhois)
# current version
packageVersion("pwhois")
## [1] '0.1.0'
```
A basic query:
``` {ex-01}
(res <- whois("17.253.144.10"))
str(tidy_pwhois(res), 1)
```
The Prefix WhoIs project supports some advanced queries including the
ability to search on any of their WHOIS registry fields:
``` r
(res <- whois("registry org-name=apple, inc"))
## [1] "Org-Record: 0" "Org-ID: GRNA"
## [3] "Org-Name: Green Apple, Inc." "Can-Allocate: 0"
## [5] "Street-1: 5222 33rd Street SE" "City: Grand Rapids"
## [7] "State: MI" "Postal-Code: 49512-2070"
## [9] "Country: US" "Register-Date: 1997-12-19"
## [11] "Update-Date: 2011-09-24" "Create-Date: Nov 23 2005 02:48:10"
## [13] "Modify-Date: Feb 13 2021 01:35:47" "Admin-0-Handle: INE-ARIN"
## [15] "NOC-0-Handle: INE-ARIN" "Abuse-0-Handle: INE-ARIN"
## [17] "Tech-0-Handle: INE-ARIN" "Referral-Server: rwhois://rwhois.iserv.net:4321"
## [19] "Comment: http://www.greenapple.com" "Org-Record: 1"
## [21] "Org-ID: Network of Apple, Inc." "Org-Name: Network of Apple, Inc."
## [23] "Can-Allocate: 1" "Register-Date: 2011-03-02"
## [25] "Update-Date: 2011-03-02" "Create-Date: Mar 01 2011 13:07:13"
## [27] "Modify-Date: Mar 01 2011 13:07:13" "Org-Record: 2"
## [29] "Org-ID: C03342993" "Org-Name: APPLE, INC - CORP PROJECT"
## [31] "Can-Allocate: 0" "Street-1: 1 INFINITE LOOP # MS60"
## [33] "City: CUPERTINO" "State: CA"
## [35] "Postal-Code: 95014-2083" "Country: US"
## [37] "Register-Date: 2013-03-14" "Update-Date: 2013-03-14"
## [39] "Create-Date: May 30 2013 16:03:51" "Modify-Date: Feb 13 2021 01:35:47"
## [41] "Org-Record: 3" "Org-ID: C03342938"
## [43] "Org-Name: APPLE, INC - CORP PROJECT" "Can-Allocate: 0"
## [45] "Street-1: 1 INFINITE LOOP # MS60" "City: CUPERTINO"
## [47] "State: CA" "Postal-Code: 95014-2083"
## [49] "Country: US" "Register-Date: 2013-03-14"
## [51] "Update-Date: 2013-03-14" "Create-Date: May 30 2013 16:03:51"
## [53] "Modify-Date: Feb 13 2021 01:35:47" "Org-Record: 4"
## [55] "Org-ID: C03342954" "Org-Name: APPLE, INC - CORP PROJECT"
## [57] "Can-Allocate: 0" "Street-1: 1 INFINITE LOOP # MS60"
## [59] "City: CUPERTINO" "State: CA"
## [61] "Postal-Code: 95014-2083" "Country: US"
## [63] "Register-Date: 2013-03-14" "Update-Date: 2013-03-14"
## [65] "Create-Date: May 30 2013 16:03:51" "Modify-Date: Feb 13 2021 01:35:47"
## [67] "Org-Record: 5" "Org-ID: Network of Apple, Inc."
## [69] "Org-Name: Network of Apple, Inc." "Can-Allocate: 1"
## [71] "Register-Date: 2013-05-31" "Update-Date: 2013-05-31"
## [73] "Create-Date: May 30 2013 18:55:56" "Modify-Date: May 30 2013 18:55:56"
## [75] "Org-Record: 6" "Org-ID: C04676970"
## [77] "Org-Name: APPLE, INC - CORP PROJECT" "Can-Allocate: 0"
## [79] "Street-1: 1 INFINITE LOOP # MS60" "City: CUPERTINO"
## [81] "State: CA" "Postal-Code: 95014-2083"
## [83] "Country: US" "Register-Date: 2013-08-21"
## [85] "Update-Date: 2013-08-21" "Create-Date: Aug 23 2013 02:32:19"
## [87] "Modify-Date: Feb 13 2021 01:35:47" "Org-Record: 7"
## [89] "Org-ID: C04716251" "Org-Name: APPLE, INC - RETAIL PROJECT"
## [91] "Can-Allocate: 0" "Street-1: 1500 POLARIS PKWY"
## [93] "City: COLUMBUS" "State: OH"
## [95] "Postal-Code: 43240" "Country: US"
## [97] "Register-Date: 2013-09-30" "Update-Date: 2013-09-30"
## [99] "Create-Date: Oct 02 2013 01:52:33" "Modify-Date: Mar 23 2015 08:01:17"
## [101] "Org-Record: 8" "Org-ID: Apple, Inc."
## [103] "Org-Name: Apple, Inc." "Can-Allocate: 1"
## [105] "Register-Date: 2014-10-19" "Update-Date: 2014-10-19"
## [107] "Create-Date: Oct 18 2014 05:49:09" "Modify-Date: Oct 18 2014 05:49:09"
## [109] "Org-Record: 9" "Org-ID: C06092907"
## [111] "Org-Name: Apple, Inc IP - MRC" "Can-Allocate: 0"
## [113] "Street-1: 21625 Gresham Dr" "City: Ashburn"
## [115] "State: VA" "Postal-Code: 20147"
## [117] "Country: US" "Register-Date: 2016-04-14"
## [119] "Update-Date: 2016-06-21" "Create-Date: Dec 13 2016 10:45:20"
## [121] "Modify-Date: Feb 13 2021 01:35:47" "Org-Record: 10"
## [123] "Org-ID: C07098191" "Org-Name: APPLE, INC.-COLOCATION - APPLE, INC."
## [125] "Can-Allocate: 0" "Street-1: 340 CUMBERLAND AVE"
## [127] "City: PORTLAND" "State: ME"
## [129] "Postal-Code: 04101" "Country: US"
## [131] "Register-Date: 2018-10-24" "Update-Date: 2018-10-24"
## [133] "Create-Date: Oct 26 2018 02:13:03" "Modify-Date: Feb 13 2021 01:35:47"
tibble::as_tibble(tidy_pwhois(res))
## # A tibble: 11 x 19
## org_record org_id org_name can_allocate street_1 city state postal_code country register_date update_date
## <chr> <chr> <chr> <lgl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 0 GRNA Green A… FALSE 5222 33… Gran… MI 49512-2070 US 1997-12-19 2011-09-24
## 2 1 Netwo… Network… TRUE <NA> <NA> <NA> <NA> <NA> 2011-03-02 2011-03-02
## 3 2 C0334… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-03-14 2013-03-14
## 4 3 C0334… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-03-14 2013-03-14
## 5 4 C0334… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-03-14 2013-03-14
## 6 5 Netwo… Network… TRUE <NA> <NA> <NA> <NA> <NA> 2013-05-31 2013-05-31
## 7 6 C0467… APPLE, … FALSE 1 INFIN… CUPE… CA 95014-2083 US 2013-08-21 2013-08-21
## 8 7 C0471… APPLE, … FALSE 1500 PO… COLU… OH 43240 US 2013-09-30 2013-09-30
## 9 8 Apple… Apple, … TRUE <NA> <NA> <NA> <NA> <NA> 2014-10-19 2014-10-19
## 10 9 C0609… Apple, … FALSE 21625 G… Ashb… VA 20147 US 2016-04-14 2016-06-21
## 11 10 C0709… APPLE, … FALSE 340 CUM… PORT… ME 04101 US 2018-10-24 2018-10-24
## # … with 8 more variables: create_date <dttm>, modify_date <dttm>, admin_0_handle <chr>, noc_0_handle <chr>,
## # abuse_0_handle <chr>, tech_0_handle <chr>, referral_server <chr>, comment <chr>
```
and can return results in different formats including Team Cymru tables:
``` r
(res <- whois('type=cymru 17.253.144.10'))
## [1] "AS | IP | ORG NAME | CC | NET NAME | AS ORG NAME"
## [2] "714 | 17.253.144.10 | Apple Inc. | US | APPLE-WWNET | Apple Inc."
str(tidy_cymru(res), 1)
## 'data.frame': 1 obs. of 6 variables:
## $ as : chr "714"
## $ ip : chr "17.253.144.10"
## $ org name : chr "Apple Inc."
## $ cc : chr "US"
## $ net name : chr "APPLE-WWNET"
## $ as org name: chr "Apple Inc."
```
That `tidy_cymru()` function can be used on responses from the Team
Cymru WHOIS service as well:
``` r
(res <- whois('-v AS23028', "whois.cymru.com"))
## [1] "AS | CC | Registry | Allocated | AS Name" "23028 | US | arin | 2002-01-04 | TEAM-CYMRU, US"
str(tidy_cymru(res))
## 'data.frame': 1 obs. of 5 variables:
## $ as : chr "23028"
## $ cc : chr "US"
## $ registry : chr "arin"
## $ allocated: chr "2002-01-04"
## $ as name : chr "TEAM-CYMRU, US"
```
The Prefix WhoIs project can also return responses in Routing Policy
Specification Language (RPSL), but there is no ‘tidy’ function for this
format yet:
``` r
whois('type=rpsl 17.253.144.10')
## [1] "Origin: AS714" "Route: 17.253.144.0/21"
## [3] "Date: 20210213" "AS-Org-Name: Apple Inc."
## [5] "Org-Name: Apple Inc." "Net-Name: APPLE-WWNET"
## [7] "Source: PWHOIS Server 208.74.248.120:43 at 20210213"
```
## pwhois Metrics
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
|:-----|---------:|-----:|----:|-----:|------------:|----:|---------:|-----:|
| R | 7 | 0.44 | 303 | 0.47 | 110 | 0.4 | 196 | 0.41 |
| Rmd | 1 | 0.06 | 17 | 0.03 | 29 | 0.1 | 44 | 0.09 |
| SUM | 8 | 0.50 | 320 | 0.50 | 139 | 0.5 | 240 | 0.50 |
clock Package Metrics for pwhois
## Code of Conduct
Please note that this project is released with a Contributor Code of
Conduct. By participating in this project you agree to abide by its
terms.

+ 9
- 2
man/pwhois.Rd View File

@ -4,9 +4,16 @@
\name{pwhois}
\alias{pwhois}
\alias{pwhois-package}
\title{...}
\title{pwhois}
\description{
A good description goes here otherwise CRAN checks fail.
WHOIS (pronounced as the phrase "who is") is a query and response protocol
that is widely used for querying databases that store the registered users
or assignees of an Internet resource, such as a domain name, an IP address
block or an autonomous system, but is also used for a wider range of other
information. The protocol stores and delivers database content in a human-
readable format. The current iteration of the WHOIS protocol was drafted
by the Internet Society, and is documented in RFC 3912. Tools are provided
to issue query requests and process query responses.
}
\seealso{
Useful links:


+ 21
- 0
man/tidy_cymru.Rd View File

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidy.R
\name{tidy_cymru}
\alias{tidy_cymru}
\title{Turn a Team Cymru server response into a data frame}
\usage{
tidy_cymru(cymru_raw_response)
}
\arguments{
\item{cymru_raw_response}{the raw response from a call to \code{\link[=whois]{whois()}}}
}
\value{
data frame
}
\description{
Turn a Team Cymru server response into a data frame
}
\examples{
res <- whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois
tidy_pwhois(res)
}

+ 21
- 0
man/tidy_pwhois.Rd View File

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidy.R
\name{tidy_pwhois}
\alias{tidy_pwhois}
\title{Turn a Prefix WhoIs server response into a data frame}
\usage{
tidy_pwhois(pwhois_raw_response)
}
\arguments{
\item{pwhois_raw_response}{the raw response from a call to \code{\link[=whois]{whois()}}}
}
\value{
data frame
}
\description{
Turn a Prefix WhoIs server response into a data frame
}
\examples{
res <- whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois
tidy_pwhois(res)
}

+ 75
- 0
man/whois.Rd View File

@ -0,0 +1,75 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/whois.R
\name{whois}
\alias{whois}
\title{Issue a 'WHOIS' query and retrieve the response}
\usage{
whois(
query,
host = "whois.pwhois.org",
port = 43L,
timeout = getOption("timeout")
)
}
\arguments{
\item{query}{<chr> the text query to send to the destination whois \code{host}}
\item{host}{<chr> the WHOIS host to query; This defaults to
\href{https://pwhois.org/}{The Prefix WhoIs Project} WHOIS server
(a.k.a. "pwhois") as it allows for more robust queries to be performed.}
\item{port}{<int> TCP port the \code{host} WHOIS server is running on;
defaults to \code{43} (the standard WHOIS port)}
\item{timeout}{connection timeout; see \link{connections}}
}
\value{
raw character response from the \code{host} WHOIS server. Use
the built-in tidying functions to post-process the response.
}
\description{
WHOIS (pronounced as the phrase "who is") is a query and response protocol
that is widely used for querying databases that store the registered users
or assignees of an Internet resource, such as a domain name, an IP address
block or an autonomous system, but is also used for a wider range of other
information. The protocol stores and delivers database content in a human-
readable format. The current iteration of the WHOIS protocol was drafted
by the Internet Society, and is documented in RFC 3912.
}
\details{
The default server — \code{whois.pwhois.org} — is \href{https://pwhois.org/}{The Prefix WhoIs Project}
WHOIS server which provides a whois-compatible client and server framework for
disclosing various up-to-date routing information. Instead of using
registrar-originated network information (which is often unspecific or
inaccurate), Prefix WhoIs uses the Internet's global routing table as
gleaned from a number of routing peers around the world. Other sources of
information, such as imported data from ARIN are also supported.
The pwhois service supports special query types including:
\itemize{
\item \verb{registry key=value}: you can search the pwhois database for any registry
field. The ones available at the time of the date on
this package are: "\code{Origin-AS}", "\verb{Prefix"}, "\verb{AS-Path"},
"\code{AS-Org-Name}", "\code{Org-Name}", "\code{Net-Name}", "\code{Cache-Date}",
"\code{Latitude}", "\code{Longitude}", "\verb{City"}, "\verb{Region"} ,
"\verb{Country"} , and "\code{Country-Code}"
\item \code{type=cymru}: The record(s) will be returned in
\href{https://team-cymru.com/community-services/ip-asn-mapping/}{Team Cymru format}
and \code{\link[=tidy_cymru]{tidy_cymru()}} can be used to post-process the response.
\item \code{type=rpsl}: The record(s) will be returned in
\href{https://tools.ietf.org/html/rfc2650}{Routing Policy Specification Language}
(RPSL) format. A 'tidy' post-processor is on the TODO list. PRs are welcome.
}
This function can also be used to query the
\href{https://team-cymru.com/community-services/ip-asn-mapping/}{Team Cymru WHOIS}
server which supports various enhanced queries. See the link for more
information. \code{\link[=tidy_cymru]{tidy_cymru()}} can be used to post-process these responses.
}
\examples{
whois("registry org-name=apple, inc") # 'registry' is only supported on pwhois
whois('type=cymru 17.253.144.10') # 'type=cymru' is only supported on pwhois
whois('type=rpsl 17.253.144.10') # 'type=rpsl' is only supported on pwhois
whois('-v AS23028', "whois.cymru.com") # this only works with the Team Cymru WHOIS service
whois("17.253.144.10")
}

Loading…
Cancel
Save