Browse Source

package rename as requested by CRAN

master
boB Rudis 7 years ago
parent
commit
cb6f9b9989
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 10
      DESCRIPTION
  2. 2
      NAMESPACE
  3. 10
      R/RcppExports.R
  4. 2
      R/can-fetch.r
  5. 4
      R/crawl-delay.r
  6. 2
      R/robxp.r
  7. 8
      R/spiderbar-package.R
  8. 16
      README.Rmd
  9. 19
      README.md
  10. 2
      man/can_fetch.Rd
  11. 4
      man/crawl_delays.Rd
  12. 15
      man/rep.Rd
  13. 2
      man/robxp.Rd
  14. 16
      man/spiderbar.Rd
  15. 0
      spiderbar.Rproj
  16. 22
      src/RcppExports.cpp
  17. 2
      tests/test-all.R
  18. 10
      tests/testthat/test-spiderbar.R

10
DESCRIPTION

@ -1,18 +1,18 @@
Package: rep Package: spiderbar
Type: Package Type: Package
Title: Tools to Parse and Test Robots Exclusion Protocol Files and Rules Title: Parse and Test Robots Exclusion Protocol Files and Rules
Version: 0.2.0 Version: 0.2.0
Date: 2017-09-23 Date: 2017-09-23
Author: Bob Rudis (bob@rud.is) [aut, cre], SEOmoz, Inc [aut] Author: Bob Rudis (bob@rud.is) [aut, cre], SEOmoz, Inc [aut]
Maintainer: Bob Rudis <bob@rud.is> Maintainer: Bob Rudis <bob@rud.is>
Description: The 'Robots Exclusion Protocol' <http://www.robotstxt.org/orig.html> documents Description: The 'Robots Exclusion Protocol' <http://www.robotstxt.org/orig.html> documents
a set of standards for allowing or excluding robot/spider crawling of different areas of a set of standards for allowing or excluding robot/spider crawling of different areas of
site content. Tools are provided which wrap The 'rep-cpp` <https://github.com/seomoz/rep-cpp> site content. Tools are provided which wrap The 'rep-cpp' <https://github.com/seomoz/rep-cpp>
C++ library for processing these 'robots.txt' files. C++ library for processing these 'robots.txt' files.
SystemRequirements: C++11 SystemRequirements: C++11
NeedsCompilation: yes NeedsCompilation: yes
URL: https://github.com/hrbrmstr/rep URL: https://github.com/hrbrmstr/spiderbar
BugReports: https://github.com/hrbrmstr/rep/issues BugReports: https://github.com/hrbrmstr/spiderbar/issues
License: MIT + file LICENSE License: MIT + file LICENSE
Suggests: Suggests:
testthat, testthat,

2
NAMESPACE

@ -6,4 +6,4 @@ export(crawl_delays)
export(robxp) export(robxp)
export(sitemaps) export(sitemaps)
importFrom(Rcpp,sourceCpp) importFrom(Rcpp,sourceCpp)
useDynLib(rep, .registration=TRUE) useDynLib(spiderbar, .registration=TRUE)

10
R/RcppExports.R

@ -6,7 +6,7 @@
#' @noRd #' @noRd
#' #'
rep_parse <- function(content) { rep_parse <- function(content) {
.Call(`_rep_rep_parse`, content) .Call(`_spiderbar_rep_parse`, content)
} }
#' Get delays #' Get delays
@ -14,7 +14,7 @@ rep_parse <- function(content) {
#' @noRd #' @noRd
#' #'
rep_crawl_delays <- function(xp) { rep_crawl_delays <- function(xp) {
.Call(`_rep_rep_crawl_delays`, xp) .Call(`_spiderbar_rep_crawl_delays`, xp)
} }
#' Retrieve a character vector of sitemaps from a parsed robots.txt object #' Retrieve a character vector of sitemaps from a parsed robots.txt object
@ -28,7 +28,7 @@ rep_crawl_delays <- function(xp) {
#' rt <- robxp(imdb) #' rt <- robxp(imdb)
#' sitemaps(rt) #' sitemaps(rt)
sitemaps <- function(xp) { sitemaps <- function(xp) {
.Call(`_rep_sitemaps`, xp) .Call(`_spiderbar_sitemaps`, xp)
} }
#' Retrieve a character vector of sitemaps from a parsed robots.txt object #' Retrieve a character vector of sitemaps from a parsed robots.txt object
@ -36,7 +36,7 @@ sitemaps <- function(xp) {
#' @noRd #' @noRd
#' #'
rep_as_string <- function(xp) { rep_as_string <- function(xp) {
.Call(`_rep_rep_as_string`, xp) .Call(`_spiderbar_rep_as_string`, xp)
} }
#' Path allowed #' Path allowed
@ -44,6 +44,6 @@ rep_as_string <- function(xp) {
#' @noRd #' @noRd
#' #'
rep_path_allowed <- function(xp, path, agent = "*") { rep_path_allowed <- function(xp, path, agent = "*") {
.Call(`_rep_rep_path_allowed`, xp, path, agent) .Call(`_spiderbar_rep_path_allowed`, xp, path, agent)
} }

2
R/can-fetch.r

@ -10,7 +10,7 @@
#' @param user_agent user agent to test #' @param user_agent user agent to test
#' @export #' @export
#' @examples #' @examples
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n") #' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\n")
#' gh_rt <- robxp(gh) #' gh_rt <- robxp(gh)
#' #'
#' can_fetch(gh_rt, "/humans.txt", "*") # TRUE #' can_fetch(gh_rt, "/humans.txt", "*") # TRUE

4
R/crawl-delay.r

@ -6,11 +6,11 @@
#' @note `-1` will be returned for any listed agent _without_ a crawl delay setting #' @note `-1` will be returned for any listed agent _without_ a crawl delay setting
#' @export #' @export
#' @examples #' @examples
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n") #' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\n")
#' gh_rt <- robxp(gh) #' gh_rt <- robxp(gh)
#' crawl_delays(gh_rt) #' crawl_delays(gh_rt)
#' #'
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n") #' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\n")
#' imdb_rt <- robxp(imdb) #' imdb_rt <- robxp(imdb)
#' crawl_delays(imdb_rt) #' crawl_delays(imdb_rt)
crawl_delays <- function(obj) { crawl_delays <- function(obj) {

2
R/robxp.r

@ -9,7 +9,7 @@
#' will be concatenated into a single string and parsed and the connection will be closed. #' will be concatenated into a single string and parsed and the connection will be closed.
#' @export #' @export
#' @examples #' @examples
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n") #' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\n")
#' rt <- robxp(imdb) #' rt <- robxp(imdb)
robxp <- function(x) { robxp <- function(x) {

8
R/rep-package.R → R/spiderbar-package.R

@ -1,14 +1,14 @@
#' Tools to Parse and Test Robots Exclusion Protocol Files and Rules #' Parse and Test Robots Exclusion Protocol Files and Rules
#' #'
#' The 'Robots Exclusion Protocol' (<http://www.robotstxt.org/orig.html>) documents a set #' The 'Robots Exclusion Protocol' (<http://www.robotstxt.org/orig.html>) documents a set
#' of standards for allowing or excluding robot/spider crawling of different areas of #' of standards for allowing or excluding robot/spider crawling of different areas of
#' site content. Tools are provided which wrap The 'rep-cpp` <https://github.com/seomoz/rep-cpp> #' site content. Tools are provided which wrap The `rep-cpp` <https://github.com/seomoz/rep-cpp>
#' C++ library for processing these `robots.txt`` files. #' C++ library for processing these `robots.txt`` files.
#' #'
#' @md #' @md
#' @name rep #' @name spiderbar
#' @docType package #' @docType package
#' @author Bob Rudis (bob@@rud.is) #' @author Bob Rudis (bob@@rud.is)
#' @useDynLib rep, .registration=TRUE #' @useDynLib spiderbar, .registration=TRUE
#' @importFrom Rcpp sourceCpp #' @importFrom Rcpp sourceCpp
NULL NULL

16
README.Rmd

@ -2,13 +2,13 @@
output: rmarkdown::github_document output: rmarkdown::github_document
--- ---
[![Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) <!-- [![Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) -->
[![Build status](https://ci.appveyor.com/api/projects/status/dakiw5y0xpq1m3bk?svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) <!-- [![Build status](https://ci.appveyor.com/api/projects/status/dakiw5y0xpq1m3bk?svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) -->
![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg) <!-- ![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg) -->
# rep # spiderbar
Tools to Parse and Test Robots Exclusion Protocol Files and Rules Parse and Test Robots Exclusion Protocol Files and Rules
## Description ## Description
@ -29,7 +29,7 @@ The following functions are implemented:
## Installation ## Installation
```{r eval=FALSE} ```{r eval=FALSE}
devtools::install_github("hrbrmstr/rep") devtools::install_github("hrbrmstr/spiderbar")
``` ```
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} ```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
@ -39,11 +39,11 @@ options(width=120)
## Usage ## Usage
```{r message=FALSE, warning=FALSE, error=FALSE} ```{r message=FALSE, warning=FALSE, error=FALSE}
library(rep) library(spiderbar)
library(robotstxt) library(robotstxt)
# current verison # current verison
packageVersion("rep") packageVersion("spiderbar")
# use helpers from the robotstxt package # use helpers from the robotstxt package

19
README.md

@ -1,10 +1,11 @@
[![Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) [![Build status](https://ci.appveyor.com/api/projects/status/dakiw5y0xpq1m3bk?svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) ![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg) <!-- [![Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) -->
<!-- [![Build status](https://ci.appveyor.com/api/projects/status/dakiw5y0xpq1m3bk?svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) -->
<!-- ![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg) -->
spiderbar
=========
rep Parse and Test Robots Exclusion Protocol Files and Rules
===
Tools to Parse and Test Robots Exclusion Protocol Files and Rules
Description Description
----------- -----------
@ -28,18 +29,18 @@ Installation
------------ ------------
``` r ``` r
devtools::install_github("hrbrmstr/rep") devtools::install_github("hrbrmstr/spiderbar")
``` ```
Usage Usage
----- -----
``` r ``` r
library(rep) library(spiderbar)
library(robotstxt) library(robotstxt)
# current verison # current verison
packageVersion("rep") packageVersion("spiderbar")
``` ```
## [1] '0.2.0' ## [1] '0.2.0'
@ -154,7 +155,7 @@ library(testthat)
date() date()
``` ```
## [1] "Sat Sep 23 13:07:16 2017" ## [1] "Sun Sep 24 08:28:30 2017"
``` r ``` r
test_dir("tests/") test_dir("tests/")

2
man/can_fetch.Rd

@ -19,7 +19,7 @@ return a logical vector indicating whether you have permission to fetch the cont
at the respective path. at the respective path.
} }
\examples{ \examples{
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\\n") gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\\n")
gh_rt <- robxp(gh) gh_rt <- robxp(gh)
can_fetch(gh_rt, "/humans.txt", "*") # TRUE can_fetch(gh_rt, "/humans.txt", "*") # TRUE

4
man/crawl_delays.Rd

@ -19,11 +19,11 @@ Retrive all agent crawl delay values in a \code{robxp} \code{robots.txt} object
\code{-1} will be returned for any listed agent \emph{without} a crawl delay setting \code{-1} will be returned for any listed agent \emph{without} a crawl delay setting
} }
\examples{ \examples{
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\\n") gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\\n")
gh_rt <- robxp(gh) gh_rt <- robxp(gh)
crawl_delays(gh_rt) crawl_delays(gh_rt)
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\\n") imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\\n")
imdb_rt <- robxp(imdb) imdb_rt <- robxp(imdb)
crawl_delays(imdb_rt) crawl_delays(imdb_rt)
} }

15
man/rep.Rd

@ -1,15 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rep-package.R
\docType{package}
\name{rep}
\alias{rep}
\alias{rep-package}
\title{Tools to Parse and Test Robots Exclusion Protocol Files and Rules}
\description{
The 'Robots Exclusion Protocol' (\url{http://www.robotstxt.org/orig.html}) documents a set
of standards for allowing or excluding robot/spider crawling of different areas of
site content. Tools are provided which wrap The 'rep-cpp\code{<https://github.com/seomoz/rep-cpp> C++ library for processing these}robots.txt`` files.
}
\author{
Bob Rudis (bob@rud.is)
}

2
man/robxp.Rd

@ -17,6 +17,6 @@ This function takes in a single element character vector and parses it into
a `robxp` object. a `robxp` object.
} }
\examples{ \examples{
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\\n") imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\\n")
rt <- robxp(imdb) rt <- robxp(imdb)
} }

16
man/spiderbar.Rd

@ -0,0 +1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/spiderbar-package.R
\docType{package}
\name{spiderbar}
\alias{spiderbar}
\alias{spiderbar-package}
\title{Parse and Test Robots Exclusion Protocol Files and Rules}
\description{
The 'Robots Exclusion Protocol' (\url{http://www.robotstxt.org/orig.html}) documents a set
of standards for allowing or excluding robot/spider crawling of different areas of
site content. Tools are provided which wrap The \code{rep-cpp} \url{https://github.com/seomoz/rep-cpp}
C++ library for processing these `robots.txt`` files.
}
\author{
Bob Rudis (bob@rud.is)
}

0
rep.Rproj → spiderbar.Rproj

22
src/RcppExports.cpp

@ -7,7 +7,7 @@ using namespace Rcpp;
// rep_parse // rep_parse
SEXP rep_parse(std::string content); SEXP rep_parse(std::string content);
RcppExport SEXP _rep_rep_parse(SEXP contentSEXP) { RcppExport SEXP _spiderbar_rep_parse(SEXP contentSEXP) {
BEGIN_RCPP BEGIN_RCPP
Rcpp::RObject rcpp_result_gen; Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::RNGScope rcpp_rngScope_gen;
@ -18,7 +18,7 @@ END_RCPP
} }
// rep_crawl_delays // rep_crawl_delays
DataFrame rep_crawl_delays(SEXP xp); DataFrame rep_crawl_delays(SEXP xp);
RcppExport SEXP _rep_rep_crawl_delays(SEXP xpSEXP) { RcppExport SEXP _spiderbar_rep_crawl_delays(SEXP xpSEXP) {
BEGIN_RCPP BEGIN_RCPP
Rcpp::RObject rcpp_result_gen; Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::RNGScope rcpp_rngScope_gen;
@ -29,7 +29,7 @@ END_RCPP
} }
// sitemaps // sitemaps
std::vector<std::string> sitemaps(SEXP xp); std::vector<std::string> sitemaps(SEXP xp);
RcppExport SEXP _rep_sitemaps(SEXP xpSEXP) { RcppExport SEXP _spiderbar_sitemaps(SEXP xpSEXP) {
BEGIN_RCPP BEGIN_RCPP
Rcpp::RObject rcpp_result_gen; Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::RNGScope rcpp_rngScope_gen;
@ -40,7 +40,7 @@ END_RCPP
} }
// rep_as_string // rep_as_string
std::string rep_as_string(SEXP xp); std::string rep_as_string(SEXP xp);
RcppExport SEXP _rep_rep_as_string(SEXP xpSEXP) { RcppExport SEXP _spiderbar_rep_as_string(SEXP xpSEXP) {
BEGIN_RCPP BEGIN_RCPP
Rcpp::RObject rcpp_result_gen; Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::RNGScope rcpp_rngScope_gen;
@ -51,7 +51,7 @@ END_RCPP
} }
// rep_path_allowed // rep_path_allowed
bool rep_path_allowed(SEXP xp, std::string path, std::string agent); bool rep_path_allowed(SEXP xp, std::string path, std::string agent);
RcppExport SEXP _rep_rep_path_allowed(SEXP xpSEXP, SEXP pathSEXP, SEXP agentSEXP) { RcppExport SEXP _spiderbar_rep_path_allowed(SEXP xpSEXP, SEXP pathSEXP, SEXP agentSEXP) {
BEGIN_RCPP BEGIN_RCPP
Rcpp::RObject rcpp_result_gen; Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::RNGScope rcpp_rngScope_gen;
@ -64,15 +64,15 @@ END_RCPP
} }
static const R_CallMethodDef CallEntries[] = { static const R_CallMethodDef CallEntries[] = {
{"_rep_rep_parse", (DL_FUNC) &_rep_rep_parse, 1}, {"_spiderbar_rep_parse", (DL_FUNC) &_spiderbar_rep_parse, 1},
{"_rep_rep_crawl_delays", (DL_FUNC) &_rep_rep_crawl_delays, 1}, {"_spiderbar_rep_crawl_delays", (DL_FUNC) &_spiderbar_rep_crawl_delays, 1},
{"_rep_sitemaps", (DL_FUNC) &_rep_sitemaps, 1}, {"_spiderbar_sitemaps", (DL_FUNC) &_spiderbar_sitemaps, 1},
{"_rep_rep_as_string", (DL_FUNC) &_rep_rep_as_string, 1}, {"_spiderbar_rep_as_string", (DL_FUNC) &_spiderbar_rep_as_string, 1},
{"_rep_rep_path_allowed", (DL_FUNC) &_rep_rep_path_allowed, 3}, {"_spiderbar_rep_path_allowed", (DL_FUNC) &_spiderbar_rep_path_allowed, 3},
{NULL, NULL, 0} {NULL, NULL, 0}
}; };
RcppExport void R_init_rep(DllInfo *dll) { RcppExport void R_init_spiderbar(DllInfo *dll) {
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE); R_useDynamicSymbols(dll, FALSE);
} }

2
tests/test-all.R

@ -1,3 +1,3 @@
library(testthat) library(testthat)
library(robotstxt) library(robotstxt)
test_check("rep") test_check("spiderbar")

10
tests/testthat/test-rep.R → tests/testthat/test-spiderbar.R

@ -1,7 +1,7 @@
context("basic functionality") context("basic functionality")
test_that("parsing and fetch testing and sitemaps work", { test_that("parsing and fetch testing and sitemaps work", {
cdc <- paste0(readLines(system.file("extdata", "cdc-robots.txt", package="rep")), collapse="\n") cdc <- paste0(readLines(system.file("extdata", "cdc-robots.txt", package="spiderbar")), collapse="\n")
rt1 <- robxp(cdc) rt1 <- robxp(cdc)
expect_that(rt1, is_a("robxp")) expect_that(rt1, is_a("robxp"))
@ -9,20 +9,20 @@ test_that("parsing and fetch testing and sitemaps work", {
expect_that(can_fetch(rt1, "/asthma/asthma_stats/default.htm", "*"), equals(TRUE)) expect_that(can_fetch(rt1, "/asthma/asthma_stats/default.htm", "*"), equals(TRUE))
expect_that(can_fetch(rt1, "/_borders", "*"), equals(FALSE)) expect_that(can_fetch(rt1, "/_borders", "*"), equals(FALSE))
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n") imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\n")
rt2 <- robxp(imdb) rt2 <- robxp(imdb)
cd <- crawl_delays(rt2) cd <- crawl_delays(rt2)
expect_that(cd, is_a("data.frame")) expect_that(cd, is_a("data.frame"))
expect_equal(cd$crawl_delay, c(0.1, 3.0, -1.0)) expect_equal(cd$crawl_delay, c(0.1, 3.0, -1.0))
imdb <- readLines(system.file("extdata", "imdb-robots.txt", package="rep")) imdb <- readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar"))
rt2 <- robxp(imdb) rt2 <- robxp(imdb)
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n") gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\n")
rt3 <- robxp(gh) rt3 <- robxp(gh)
rt3 <- robxp(file(system.file("extdata", "github-robots.txt", package="rep"))) rt3 <- robxp(file(system.file("extdata", "github-robots.txt", package="spiderbar")))
expect_equal(sitemaps(rt1), "http://www.cdc.gov/niosh/sitemaps/sitemapsNIOSH.xml") expect_equal(sitemaps(rt1), "http://www.cdc.gov/niosh/sitemaps/sitemapsNIOSH.xml")
expect_equal(sitemaps(rt2), "http://www.imdb.com/sitemap_US_index.xml.gz") expect_equal(sitemaps(rt2), "http://www.imdb.com/sitemap_US_index.xml.gz")
Loading…
Cancel
Save