Browse Source

package rename as requested by CRAN

master
boB Rudis 7 years ago
parent
commit
cb6f9b9989
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 10
      DESCRIPTION
  2. 2
      NAMESPACE
  3. 10
      R/RcppExports.R
  4. 2
      R/can-fetch.r
  5. 4
      R/crawl-delay.r
  6. 2
      R/robxp.r
  7. 8
      R/spiderbar-package.R
  8. 16
      README.Rmd
  9. 19
      README.md
  10. 2
      man/can_fetch.Rd
  11. 4
      man/crawl_delays.Rd
  12. 15
      man/rep.Rd
  13. 2
      man/robxp.Rd
  14. 16
      man/spiderbar.Rd
  15. 0
      spiderbar.Rproj
  16. 22
      src/RcppExports.cpp
  17. 2
      tests/test-all.R
  18. 10
      tests/testthat/test-spiderbar.R

10
DESCRIPTION

@ -1,18 +1,18 @@
Package: rep
Package: spiderbar
Type: Package
Title: Tools to Parse and Test Robots Exclusion Protocol Files and Rules
Title: Parse and Test Robots Exclusion Protocol Files and Rules
Version: 0.2.0
Date: 2017-09-23
Author: Bob Rudis (bob@rud.is) [aut, cre], SEOmoz, Inc [aut]
Maintainer: Bob Rudis <bob@rud.is>
Description: The 'Robots Exclusion Protocol' <http://www.robotstxt.org/orig.html> documents
a set of standards for allowing or excluding robot/spider crawling of different areas of
site content. Tools are provided which wrap The 'rep-cpp` <https://github.com/seomoz/rep-cpp>
site content. Tools are provided which wrap The 'rep-cpp' <https://github.com/seomoz/rep-cpp>
C++ library for processing these 'robots.txt' files.
SystemRequirements: C++11
NeedsCompilation: yes
URL: https://github.com/hrbrmstr/rep
BugReports: https://github.com/hrbrmstr/rep/issues
URL: https://github.com/hrbrmstr/spiderbar
BugReports: https://github.com/hrbrmstr/spiderbar/issues
License: MIT + file LICENSE
Suggests:
testthat,

2
NAMESPACE

@ -6,4 +6,4 @@ export(crawl_delays)
export(robxp)
export(sitemaps)
importFrom(Rcpp,sourceCpp)
useDynLib(rep, .registration=TRUE)
useDynLib(spiderbar, .registration=TRUE)

10
R/RcppExports.R

@ -6,7 +6,7 @@
#' @noRd
#'
rep_parse <- function(content) {
.Call(`_rep_rep_parse`, content)
.Call(`_spiderbar_rep_parse`, content)
}
#' Get delays
@ -14,7 +14,7 @@ rep_parse <- function(content) {
#' @noRd
#'
rep_crawl_delays <- function(xp) {
.Call(`_rep_rep_crawl_delays`, xp)
.Call(`_spiderbar_rep_crawl_delays`, xp)
}
#' Retrieve a character vector of sitemaps from a parsed robots.txt object
@ -28,7 +28,7 @@ rep_crawl_delays <- function(xp) {
#' rt <- robxp(imdb)
#' sitemaps(rt)
sitemaps <- function(xp) {
.Call(`_rep_sitemaps`, xp)
.Call(`_spiderbar_sitemaps`, xp)
}
#' Retrieve a character vector of sitemaps from a parsed robots.txt object
@ -36,7 +36,7 @@ sitemaps <- function(xp) {
#' @noRd
#'
rep_as_string <- function(xp) {
.Call(`_rep_rep_as_string`, xp)
.Call(`_spiderbar_rep_as_string`, xp)
}
#' Path allowed
@ -44,6 +44,6 @@ rep_as_string <- function(xp) {
#' @noRd
#'
rep_path_allowed <- function(xp, path, agent = "*") {
.Call(`_rep_rep_path_allowed`, xp, path, agent)
.Call(`_spiderbar_rep_path_allowed`, xp, path, agent)
}

2
R/can-fetch.r

@ -10,7 +10,7 @@
#' @param user_agent user agent to test
#' @export
#' @examples
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n")
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\n")
#' gh_rt <- robxp(gh)
#'
#' can_fetch(gh_rt, "/humans.txt", "*") # TRUE

4
R/crawl-delay.r

@ -6,11 +6,11 @@
#' @note `-1` will be returned for any listed agent _without_ a crawl delay setting
#' @export
#' @examples
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n")
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\n")
#' gh_rt <- robxp(gh)
#' crawl_delays(gh_rt)
#'
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n")
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\n")
#' imdb_rt <- robxp(imdb)
#' crawl_delays(imdb_rt)
crawl_delays <- function(obj) {

2
R/robxp.r

@ -9,7 +9,7 @@
#' will be concatenated into a single string and parsed and the connection will be closed.
#' @export
#' @examples
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n")
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\n")
#' rt <- robxp(imdb)
robxp <- function(x) {

8
R/rep-package.R → R/spiderbar-package.R

@ -1,14 +1,14 @@
#' Tools to Parse and Test Robots Exclusion Protocol Files and Rules
#' Parse and Test Robots Exclusion Protocol Files and Rules
#'
#' The 'Robots Exclusion Protocol' (<http://www.robotstxt.org/orig.html>) documents a set
#' of standards for allowing or excluding robot/spider crawling of different areas of
#' site content. Tools are provided which wrap The 'rep-cpp` <https://github.com/seomoz/rep-cpp>
#' site content. Tools are provided which wrap The `rep-cpp` <https://github.com/seomoz/rep-cpp>
#' C++ library for processing these `robots.txt`` files.
#'
#' @md
#' @name rep
#' @name spiderbar
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @useDynLib rep, .registration=TRUE
#' @useDynLib spiderbar, .registration=TRUE
#' @importFrom Rcpp sourceCpp
NULL

16
README.Rmd

@ -2,13 +2,13 @@
output: rmarkdown::github_document
---
[![Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep)
[![Build status](https://ci.appveyor.com/api/projects/status/dakiw5y0xpq1m3bk?svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep)
![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg)
<!-- [![Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) -->
<!-- [![Build status](https://ci.appveyor.com/api/projects/status/dakiw5y0xpq1m3bk?svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) -->
<!-- ![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg) -->
# rep
# spiderbar
Tools to Parse and Test Robots Exclusion Protocol Files and Rules
Parse and Test Robots Exclusion Protocol Files and Rules
## Description
@ -29,7 +29,7 @@ The following functions are implemented:
## Installation
```{r eval=FALSE}
devtools::install_github("hrbrmstr/rep")
devtools::install_github("hrbrmstr/spiderbar")
```
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
@ -39,11 +39,11 @@ options(width=120)
## Usage
```{r message=FALSE, warning=FALSE, error=FALSE}
library(rep)
library(spiderbar)
library(robotstxt)
# current verison
packageVersion("rep")
packageVersion("spiderbar")
# use helpers from the robotstxt package

19
README.md

@ -1,10 +1,11 @@
[![Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) [![Build status](https://ci.appveyor.com/api/projects/status/dakiw5y0xpq1m3bk?svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) ![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg)
<!-- [![Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) -->
<!-- [![Build status](https://ci.appveyor.com/api/projects/status/dakiw5y0xpq1m3bk?svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) -->
<!-- ![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg) -->
spiderbar
=========
rep
===
Tools to Parse and Test Robots Exclusion Protocol Files and Rules
Parse and Test Robots Exclusion Protocol Files and Rules
Description
-----------
@ -28,18 +29,18 @@ Installation
------------
``` r
devtools::install_github("hrbrmstr/rep")
devtools::install_github("hrbrmstr/spiderbar")
```
Usage
-----
``` r
library(rep)
library(spiderbar)
library(robotstxt)
# current verison
packageVersion("rep")
packageVersion("spiderbar")
```
## [1] '0.2.0'
@ -154,7 +155,7 @@ library(testthat)
date()
```
## [1] "Sat Sep 23 13:07:16 2017"
## [1] "Sun Sep 24 08:28:30 2017"
``` r
test_dir("tests/")

2
man/can_fetch.Rd

@ -19,7 +19,7 @@ return a logical vector indicating whether you have permission to fetch the cont
at the respective path.
}
\examples{
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\\n")
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\\n")
gh_rt <- robxp(gh)
can_fetch(gh_rt, "/humans.txt", "*") # TRUE

4
man/crawl_delays.Rd

@ -19,11 +19,11 @@ Retrive all agent crawl delay values in a \code{robxp} \code{robots.txt} object
\code{-1} will be returned for any listed agent \emph{without} a crawl delay setting
}
\examples{
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\\n")
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\\n")
gh_rt <- robxp(gh)
crawl_delays(gh_rt)
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\\n")
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\\n")
imdb_rt <- robxp(imdb)
crawl_delays(imdb_rt)
}

15
man/rep.Rd

@ -1,15 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rep-package.R
\docType{package}
\name{rep}
\alias{rep}
\alias{rep-package}
\title{Tools to Parse and Test Robots Exclusion Protocol Files and Rules}
\description{
The 'Robots Exclusion Protocol' (\url{http://www.robotstxt.org/orig.html}) documents a set
of standards for allowing or excluding robot/spider crawling of different areas of
site content. Tools are provided which wrap The 'rep-cpp\code{<https://github.com/seomoz/rep-cpp> C++ library for processing these}robots.txt`` files.
}
\author{
Bob Rudis (bob@rud.is)
}

2
man/robxp.Rd

@ -17,6 +17,6 @@ This function takes in a single element character vector and parses it into
a `robxp` object.
}
\examples{
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\\n")
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\\n")
rt <- robxp(imdb)
}

16
man/spiderbar.Rd

@ -0,0 +1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/spiderbar-package.R
\docType{package}
\name{spiderbar}
\alias{spiderbar}
\alias{spiderbar-package}
\title{Parse and Test Robots Exclusion Protocol Files and Rules}
\description{
The 'Robots Exclusion Protocol' (\url{http://www.robotstxt.org/orig.html}) documents a set
of standards for allowing or excluding robot/spider crawling of different areas of
site content. Tools are provided which wrap The \code{rep-cpp} \url{https://github.com/seomoz/rep-cpp}
C++ library for processing these `robots.txt`` files.
}
\author{
Bob Rudis (bob@rud.is)
}

0
rep.Rproj → spiderbar.Rproj

22
src/RcppExports.cpp

@ -7,7 +7,7 @@ using namespace Rcpp;
// rep_parse
SEXP rep_parse(std::string content);
RcppExport SEXP _rep_rep_parse(SEXP contentSEXP) {
RcppExport SEXP _spiderbar_rep_parse(SEXP contentSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
@ -18,7 +18,7 @@ END_RCPP
}
// rep_crawl_delays
DataFrame rep_crawl_delays(SEXP xp);
RcppExport SEXP _rep_rep_crawl_delays(SEXP xpSEXP) {
RcppExport SEXP _spiderbar_rep_crawl_delays(SEXP xpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
@ -29,7 +29,7 @@ END_RCPP
}
// sitemaps
std::vector<std::string> sitemaps(SEXP xp);
RcppExport SEXP _rep_sitemaps(SEXP xpSEXP) {
RcppExport SEXP _spiderbar_sitemaps(SEXP xpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
@ -40,7 +40,7 @@ END_RCPP
}
// rep_as_string
std::string rep_as_string(SEXP xp);
RcppExport SEXP _rep_rep_as_string(SEXP xpSEXP) {
RcppExport SEXP _spiderbar_rep_as_string(SEXP xpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
@ -51,7 +51,7 @@ END_RCPP
}
// rep_path_allowed
bool rep_path_allowed(SEXP xp, std::string path, std::string agent);
RcppExport SEXP _rep_rep_path_allowed(SEXP xpSEXP, SEXP pathSEXP, SEXP agentSEXP) {
RcppExport SEXP _spiderbar_rep_path_allowed(SEXP xpSEXP, SEXP pathSEXP, SEXP agentSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
@ -64,15 +64,15 @@ END_RCPP
}
static const R_CallMethodDef CallEntries[] = {
{"_rep_rep_parse", (DL_FUNC) &_rep_rep_parse, 1},
{"_rep_rep_crawl_delays", (DL_FUNC) &_rep_rep_crawl_delays, 1},
{"_rep_sitemaps", (DL_FUNC) &_rep_sitemaps, 1},
{"_rep_rep_as_string", (DL_FUNC) &_rep_rep_as_string, 1},
{"_rep_rep_path_allowed", (DL_FUNC) &_rep_rep_path_allowed, 3},
{"_spiderbar_rep_parse", (DL_FUNC) &_spiderbar_rep_parse, 1},
{"_spiderbar_rep_crawl_delays", (DL_FUNC) &_spiderbar_rep_crawl_delays, 1},
{"_spiderbar_sitemaps", (DL_FUNC) &_spiderbar_sitemaps, 1},
{"_spiderbar_rep_as_string", (DL_FUNC) &_spiderbar_rep_as_string, 1},
{"_spiderbar_rep_path_allowed", (DL_FUNC) &_spiderbar_rep_path_allowed, 3},
{NULL, NULL, 0}
};
RcppExport void R_init_rep(DllInfo *dll) {
RcppExport void R_init_spiderbar(DllInfo *dll) {
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE);
}

2
tests/test-all.R

@ -1,3 +1,3 @@
library(testthat)
library(robotstxt)
test_check("rep")
test_check("spiderbar")

10
tests/testthat/test-rep.R → tests/testthat/test-spiderbar.R

@ -1,7 +1,7 @@
context("basic functionality")
test_that("parsing and fetch testing and sitemaps work", {
cdc <- paste0(readLines(system.file("extdata", "cdc-robots.txt", package="rep")), collapse="\n")
cdc <- paste0(readLines(system.file("extdata", "cdc-robots.txt", package="spiderbar")), collapse="\n")
rt1 <- robxp(cdc)
expect_that(rt1, is_a("robxp"))
@ -9,20 +9,20 @@ test_that("parsing and fetch testing and sitemaps work", {
expect_that(can_fetch(rt1, "/asthma/asthma_stats/default.htm", "*"), equals(TRUE))
expect_that(can_fetch(rt1, "/_borders", "*"), equals(FALSE))
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n")
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\n")
rt2 <- robxp(imdb)
cd <- crawl_delays(rt2)
expect_that(cd, is_a("data.frame"))
expect_equal(cd$crawl_delay, c(0.1, 3.0, -1.0))
imdb <- readLines(system.file("extdata", "imdb-robots.txt", package="rep"))
imdb <- readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar"))
rt2 <- robxp(imdb)
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n")
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\n")
rt3 <- robxp(gh)
rt3 <- robxp(file(system.file("extdata", "github-robots.txt", package="rep")))
rt3 <- robxp(file(system.file("extdata", "github-robots.txt", package="spiderbar")))
expect_equal(sitemaps(rt1), "http://www.cdc.gov/niosh/sitemaps/sitemapsNIOSH.xml")
expect_equal(sitemaps(rt2), "http://www.imdb.com/sitemap_US_index.xml.gz")
Loading…
Cancel
Save