From 4277d704f7aae7ae01f3f9fd2bd27b1749d48085 Mon Sep 17 00:00:00 2001 From: boB Rudis Date: Sat, 23 Sep 2017 09:15:41 -0400 Subject: [PATCH] Getting closer to CRAN (ref #1) --- DESCRIPTION | 4 +- NEWS.md | 4 + R/can-fetch.r | 22 + R/cd.r | 11 +- R/rep.r | 25 +- README.Rmd | 36 +- README.md | 95 ++- inst/extdata/cdc-robots.txt | 46 ++ inst/extdata/github-robots.txt | 1375 +++++++++++++++++++++++++++++++++++++ inst/extdata/imdb-robots.txt | 263 +++++++ inst/extdata/wikipedia-robots.txt | 703 +++++++++++++++++++ man/can_fetch.Rd | 10 +- man/crawl_delays.Rd | 19 +- man/robxp.Rd | 5 +- src/RcppExports.cpp | 2 +- src/repmain.cpp | 8 +- tests/testthat/test-rep.R | 10 +- 17 files changed, 2580 insertions(+), 58 deletions(-) create mode 100644 R/can-fetch.r create mode 100644 inst/extdata/cdc-robots.txt create mode 100644 inst/extdata/github-robots.txt create mode 100644 inst/extdata/imdb-robots.txt create mode 100644 inst/extdata/wikipedia-robots.txt diff --git a/DESCRIPTION b/DESCRIPTION index 2b400ee..6c2b1b2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: rep Type: Package Title: Tools to Parse and Test Robots Exclusion Protocol Files and Rules -Version: 0.1.0 -Date: 2017-08-14 +Version: 0.2.0 +Date: 2017-09-23 Author: Bob Rudis (bob@rud.is) [aut, cre], SEOmoz, Inc [aut] Maintainer: Bob Rudis Description: The 'Robots Exclusion Protocol' documents diff --git a/NEWS.md b/NEWS.md index 9b4679b..2383281 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,2 +1,6 @@ +0.2.0 +* Added crawl delay extraction +* Made all examples local so CRAN doesn't have to hit actual websites + 0.1.0 * Initial release diff --git a/R/can-fetch.r b/R/can-fetch.r new file mode 100644 index 0000000..9b93e4b --- /dev/null +++ b/R/can-fetch.r @@ -0,0 +1,22 @@ +#' Test URL path against robots.txt +#' +#' @md +#' @param obj `robxp` object +#' @param path path to test +#' @param user_agent user agent to test +#' @export +#' @examples +#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n") +#' gh_rt <- robxp(gh) +#' can_fetch(gh_rt, "/humans.txt", "*") # TRUE +#' can_fetch(gh_rt, "/login", "*") # FALSE +#' can_fetch(gh_rt, "/oembed", "CCBot") # FALSE +can_fetch <- function(obj, path="/", user_agent="*") { + + if (inherits(obj, "robxp")) { + rep_path_allowed(obj, path, user_agent) + } else { + return(NULL) + } + +} \ No newline at end of file diff --git a/R/cd.r b/R/cd.r index 1f64a82..69c7ae7 100644 --- a/R/cd.r +++ b/R/cd.r @@ -1,9 +1,18 @@ -#' Get all crawl_delay +#' Get all agent crawl delay values #' #' @md #' @param obj `robxp` object +#' @return data frame of agents and their crawl delays +#' @note `-1` will be returned for any listed agent without a crawl delay setting #' @export #' @examples +#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n") +#' gh_rt <- robxp(gh) +#' crawl_delays(gh_rt) +#' +#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n") +#' imdb_rt <- robxp(imdb) +#' crawl_delays(imdb_rt) crawl_delays <- function(obj) { if (inherits(obj, "robxp")) { diff --git a/R/rep.r b/R/rep.r index 490dbe7..0a6cd8e 100644 --- a/R/rep.r +++ b/R/rep.r @@ -3,9 +3,8 @@ #' @param x atomic character vector containing a complete robots.txt file #' @export #' @examples -#' library(robotstxt) -#' can_fetch(rt, "/asthma/asthma_stats/default.htm", "*") # TRUE -#' can_fetch(rt, "/_borders", "*") # FALSE +#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n") +#' rt <- robxp(imdb) robxp <- function(x) { robxp <- rep_parse(x) @@ -15,26 +14,6 @@ robxp <- function(x) { } -#' Test URL path against robots.txt -#' -#' @md -#' @param obj `robxp` object -#' @param path path to test -#' @param user_agent user agent to test -#' @export -#' @examples -#' library(robotstxt) -#' can_fetch(rt, "/asthma/asthma_stats/default.htm", "*") # TRUE -#' can_fetch(rt, "/_borders", "*") # FALSE -can_fetch <- function(obj, path="/", user_agent="*") { - - if (inherits(obj, "robxp")) { - rep_path_allowed(obj, path, user_agent) - } else { - return(NULL) - } - -} #' Custom printer for 'robexp' objects #' diff --git a/README.Rmd b/README.Rmd index 57e060e..77b7cff 100644 --- a/README.Rmd +++ b/README.Rmd @@ -2,23 +2,31 @@ output: rmarkdown::github_document --- -[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) -[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/rep?branch=master&svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) -[![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg)](https://codecov.io/github/hrbrmstr/rep?branch=master) +[Travis-CI Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master) | +[AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/rep?branch=master&svg=true) | +[Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg) -`rep` : Tools to Parse and Test Robots Exclusion Protocol Files and Rules +# rep + +Tools to Parse and Test Robots Exclusion Protocol Files and Rules + +## Description The 'Robots Exclusion Protocol' documents a set of standards for allowing or excluding robot/spider crawling of different areas of site content. Tools are provided which wrap The 'rep-cpp` C++ library for processing these 'robots.txt' files. - [`rep-cpp`](https://github.com/seomoz/rep-cpp) - [`url-cpp`](https://github.com/seomoz/url-cpp) +## Tools + The following functions are implemented: -- `robxp`: Create a robots.txt object - `can_fetch`: Test URL path against robots.txt +- `crawl_delays`: Get all agent crawl delay values +- `print.robxp`: Custom printer for 'robexp' objects +- `robxp`: Create a robots.txt object -### Installation +## Installation ```{r eval=FALSE} devtools::install_github("hrbrmstr/rep") @@ -28,7 +36,7 @@ devtools::install_github("hrbrmstr/rep") options(width=120) ``` -### Usage +## Usage ```{r message=FALSE, warning=FALSE, error=FALSE} library(rep) @@ -44,9 +52,19 @@ print(rt) can_fetch(rt, "/asthma/asthma_stats/default.htm", "*") can_fetch(rt, "/_borders", "*") + +gh_rt <- robxp(robotstxt::get_robotstxt("github.com")) +can_fetch(gh_rt, "/humans.txt", "*") # TRUE +can_fetch(gh_rt, "/login", "*") # FALSE +can_fetch(gh_rt, "/oembed", "CCBot") # FALSE + +crawl_delays(gh_rt) + +imdb_rt <- robxp(robotstxt::get_robotstxt("imdb.com")) +crawl_delays(imdb_rt) ``` -### Test Results +## Test Results ```{r message=FALSE, warning=FALSE, error=FALSE} library(rep) @@ -57,6 +75,6 @@ date() test_dir("tests/") ``` -### Code of Conduct +## Code of Conduct Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. \ No newline at end of file diff --git a/README.md b/README.md index eddf854..cf01c55 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,38 @@ -[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/rep?branch=master&svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) [![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg)](https://codecov.io/github/hrbrmstr/rep?branch=master) +[Travis-CI Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master) | [AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/rep?branch=master&svg=true) | [Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg) -`rep` : Tools to Parse and Test Robots Exclusion Protocol Files and Rules +rep +=== + +Tools to Parse and Test Robots Exclusion Protocol Files and Rules + +Description +----------- The 'Robots Exclusion Protocol' documents a set of standards for allowing or excluding robot/spider crawling of different areas of site content. Tools are provided which wrap The 'rep-cpp\` C++ library for processing these 'robots.txt' files. - [`rep-cpp`](https://github.com/seomoz/rep-cpp) - [`url-cpp`](https://github.com/seomoz/url-cpp) +Tools +----- + The following functions are implemented: -- `robxp`: Create a robots.txt object - `can_fetch`: Test URL path against robots.txt +- `crawl_delays`: Get all agent crawl delay values +- `print.robxp`: Custom printer for 'robexp' objects +- `robxp`: Create a robots.txt object -### Installation +Installation +------------ ``` r devtools::install_github("hrbrmstr/rep") ``` -### Usage +Usage +----- ``` r library(rep) @@ -29,7 +42,7 @@ library(robotstxt) packageVersion("rep") ``` - ## [1] '0.1.0' + ## [1] '0.2.0' ``` r rt <- robxp(get_robotstxt("https://cdc.gov")) @@ -51,7 +64,68 @@ can_fetch(rt, "/_borders", "*") ## [1] FALSE -### Test Results +``` r +gh_rt <- robxp(robotstxt::get_robotstxt("github.com")) +can_fetch(gh_rt, "/humans.txt", "*") # TRUE +``` + + ## [1] TRUE + +``` r +can_fetch(gh_rt, "/login", "*") # FALSE +``` + + ## [1] FALSE + +``` r +can_fetch(gh_rt, "/oembed", "CCBot") # FALSE +``` + + ## [1] FALSE + +``` r +crawl_delays(gh_rt) +``` + + ## agent crawl_delay + ## 1 yandex -1 + ## 2 twitterbot -1 + ## 3 ccbot -1 + ## 4 mail.ru_bot -1 + ## 5 telefonica -1 + ## 6 slurp -1 + ## 7 seznambot -1 + ## 8 sanddollar -1 + ## 9 coccoc -1 + ## 10 ia_archiver -1 + ## 11 swiftbot -1 + ## 12 red-app-gsa-p-one -1 + ## 13 naverbot -1 + ## 14 msnbot -1 + ## 15 teoma -1 + ## 16 * -1 + ## 17 intuitgsacrawler -1 + ## 18 bingbot -1 + ## 19 daumoa -1 + ## 20 googlebot -1 + ## 21 httrack -1 + ## 22 duckduckbot -1 + ## 23 etaospider -1 + ## 24 rogerbot -1 + ## 25 dotbot -1 + +``` r +imdb_rt <- robxp(robotstxt::get_robotstxt("imdb.com")) +crawl_delays(imdb_rt) +``` + + ## agent crawl_delay + ## 1 slurp 0.1 + ## 2 scoutjet 3.0 + ## 3 * -1.0 + +Test Results +------------ ``` r library(rep) @@ -60,17 +134,18 @@ library(testthat) date() ``` - ## [1] "Mon Aug 14 16:35:08 2017" + ## [1] "Sat Sep 23 09:14:02 2017" ``` r test_dir("tests/") ``` ## testthat results ======================================================================================================== - ## OK: 3 SKIPPED: 0 FAILED: 0 + ## OK: 5 SKIPPED: 0 FAILED: 0 ## ## DONE =================================================================================================================== -### Code of Conduct +Code of Conduct +--------------- Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. diff --git a/inst/extdata/cdc-robots.txt b/inst/extdata/cdc-robots.txt new file mode 100644 index 0000000..a09b60c --- /dev/null +++ b/inst/extdata/cdc-robots.txt @@ -0,0 +1,46 @@ +# Ignore FrontPage files +User-agent: * +Disallow: /_borders +Disallow: /_derived +Disallow: /_fpclass +Disallow: /_overlay +Disallow: /_private +Disallow: /_themes +Disallow: /_vti_bin +Disallow: /_vti_cnf +Disallow: /_vti_log +Disallow: /_vti_map +Disallow: /_vti_pvt +Disallow: /_vti_txt + +# Do not index the following URLs +Disallow: /travel/ +Disallow: /flu/espanol/ +Disallow: /migration/ +Disallow: /Features/SpinaBifidaProgram/ +Disallow: /concussion/HeadsUp/training/ + +# Don't spider search pages +Disallow: /search.do + +# Don't spider email-this-page pages +Disallow: /email.do + +# Don't spider printer-friendly versions of pages +Disallow: /print.do + +# Rover is a bad dog +User-agent: Roverbot +Disallow: / + +# EmailSiphon is a hunter/gatherer which extracts email addresses for spam-mailers to use +User-agent: EmailSiphon +Disallow: / + +# Exclude MindSpider since it appears to be ill-behaved +User-agent: MindSpider +Disallow: / + +# Sitemap link per CR14586 +Sitemap: http://www.cdc.gov/niosh/sitemaps/sitemapsNIOSH.xml + diff --git a/inst/extdata/github-robots.txt b/inst/extdata/github-robots.txt new file mode 100644 index 0000000..6ec030e --- /dev/null +++ b/inst/extdata/github-robots.txt @@ -0,0 +1,1375 @@ +# If you would like to crawl GitHub contact us at support@github.com. +# We also provide an extensive API: https://developer.github.com/ + +User-agent: CCBot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: coccoc +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: Daumoa +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: dotbot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: duckduckbot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: EtaoSpider +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: Googlebot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: HTTrack +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: ia_archiver +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: IntuitGSACrawler +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: Mail.RU_Bot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: msnbot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: Bingbot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: naverbot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: red-app-gsa-p-one +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: rogerbot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: SandDollar +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: seznambot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: Slurp +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: Swiftbot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: Telefonica +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: teoma +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: Twitterbot +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + +User-agent: Yandex +Allow: /*/*/tree/master +Allow: /*/*/blob/master +Disallow: /ekansa/Open-Context-Data +Disallow: /ekansa/opencontext-* +Disallow: /*/*/pulse +Disallow: /*/*/tree/* +Disallow: /*/*/blob/* +Disallow: /*/*/wiki/*/* +Disallow: /gist/*/*/* +Disallow: /oembed +Disallow: /*/forks +Disallow: /*/stars +Disallow: /*/download +Disallow: /*/revisions +Disallow: /*/*/issues/new +Disallow: /*/*/issues/search +Disallow: /*/*/commits/*/* +Disallow: /*/*/commits/*?author +Disallow: /*/*/commits/*?path +Disallow: /*/*/branches +Disallow: /*/*/tags +Disallow: /*/*/contributors +Disallow: /*/*/comments +Disallow: /*/*/stargazers +Disallow: /*/*/search +Disallow: /*/tarball/ +Disallow: /*/zipball/ +Disallow: /*/*/archive/ +Disallow: /raw/* +Disallow: /*/followers +Disallow: /*/following +Disallow: /stars/* +Disallow: /*/blame/ +Disallow: /*/watchers +Disallow: /*/network +Disallow: /*/graphs +Disallow: /*/raw/ +Disallow: /*/compare/ +Disallow: /*/cache/ +Disallow: /*/*/blame/ +Disallow: /*/*/watchers +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/raw/ +Disallow: /*/*/compare/ +Disallow: /*/*/cache/ +Disallow: /.git/ +Disallow: /*/.git/ +Disallow: /*.git$ +Disallow: /*/sitemap.xml +Disallow: /search/advanced +Disallow: /search +Disallow: /*q= +Disallow: /*.atom +Disallow: /login + + +User-agent: * +Allow: /humans.txt +Disallow: / diff --git a/inst/extdata/imdb-robots.txt b/inst/extdata/imdb-robots.txt new file mode 100644 index 0000000..f250670 --- /dev/null +++ b/inst/extdata/imdb-robots.txt @@ -0,0 +1,263 @@ +# robots.txt for IMDb properties +# +# +# Limit ScoutJet's crawl rate +# +User-agent: ScoutJet +Crawl-delay: 3 +# +# +# Yahoo! +User-agent: Slurp +Crawl-delay: .1 +Disallow: /tvschedule +Disallow: /ActorSearch +Disallow: /ActressSearch +Disallow: /AddRecommendation +Disallow: /ads/ +Disallow: /AlternateVersions +Disallow: /AName +Disallow: /Awards +Disallow: /BAgent +Disallow: /Ballot/ +Disallow: /BornInYear +Disallow: /BornWhere +Disallow: /BPublicity +Disallow: /BQuotes +Disallow: /BTrivia +Disallow: /BusinessThisDay +Disallow: /BWorks +Disallow: /careers +Disallow: /help/show_leaf?careeratimdb +Disallow: /CommentsAuthor +Disallow: /CommentsEnter +Disallow: /CommentsIndex +Disallow: /Companies +Disallow: /CrazyCredits +Disallow: /Credits +Disallow: /DiedInYear +Disallow: /DiedWhere +Disallow: /DVD +Disallow: /ExciteTitle +Disallow: /Find +Disallow: /FName +Disallow: /GName +Disallow: /Guests +Disallow: /harvest_me +Disallow: /HelpPage +Disallow: /Icons/ +Disallow: /JointVentures +Disallow: /Laserdisc +Disallow: /List +Disallow: /Literature +Disallow: /Locations +Disallow: /LocationTree +Disallow: /Lookup +Disallow: /M/ +Disallow: /Maltin +Disallow: /MarriedInYear +Disallow: /MetaSearch +Disallow: /Mlinks +Disallow: /More +Disallow: /Movies +Disallow: /Movies/ +Disallow: /MyMovies +Disallow: /mymovies/ +Disallow: /name_pick_n_mix +Disallow: /Nsearch +Disallow: /NUrls +Disallow: /OnThisDay +Disallow: /Ontv +Disallow: /OnTV +Disallow: /Overlap +Disallow: /Pawards +Disallow: /pick_n_mix +Disallow: /PName +Disallow: /Posters +Disallow: /prepare_data +Disallow: /Psales +Disallow: /Quiz +Disallow: /r/ +Disallow: /ra/ +Disallow: /Ratings +Disallow: /rd/ +Disallow: /Recommendations +Disallow: /register +Disallow: /ReleaseDates +Disallow: /ReleasedInYear +Disallow: /Reviews +Disallow: /rg/ +Disallow: /ri/ +Disallow: /RName +Disallow: /Sales +Disallow: /SearchAwards +Disallow: /SearchBios +Disallow: /SearchBusiness +Disallow: /SearchCrazy +Disallow: /SearchDVD +Disallow: /SearchGoofs +Disallow: /SearchLaserdisc +Disallow: /SearchLiterature +Disallow: /SearchPlots +Disallow: /SearchPlotWriters +Disallow: /SearchQuotes +Disallow: /SearchRatios +Disallow: /SearchSongs +Disallow: /SearchTaglines +Disallow: /SearchTechnical +Disallow: /SearchTrivia +Disallow: /SearchVersions +Disallow: /ShowAll +Disallow: /Showing +Disallow: /SName +Disallow: /Soundtracks +Disallow: /Taglines +Disallow: /Tawards +Disallow: /Technical +Disallow: /tiger_redirect +Disallow: /Title/ASIN +Disallow: /TitleBrowse +Disallow: /Trailers +Disallow: /Tsearch +Disallow: /TUrls +Disallow: /VName +Disallow: /Vote +Disallow: /WorkedWith +Disallow: /updates +Disallow: /board +Disallow: /boards +Disallow: /name/*/board +Disallow: /title/*/board +Disallow: /character/*/select-* +Disallow: /character/*/update +# +# Everyone else +# +User-agent: * +Disallow: /tvschedule +Disallow: /ActorSearch +Disallow: /ActressSearch +Disallow: /AddRecommendation +Disallow: /ads/ +Disallow: /AlternateVersions +Disallow: /AName +Disallow: /Awards +Disallow: /BAgent +Disallow: /Ballot/ +Disallow: /BornInYear +Disallow: /BornWhere +Disallow: /BPublicity +Disallow: /BQuotes +Disallow: /BTrivia +Disallow: /BusinessThisDay +Disallow: /BWorks +Disallow: /careers +Disallow: /help/show_leaf?careeratimdb +Disallow: /CommentsAuthor +Disallow: /CommentsEnter +Disallow: /CommentsIndex +Disallow: /Companies +Disallow: /CrazyCredits +Disallow: /Credits +Disallow: /DiedInYear +Disallow: /DiedWhere +Disallow: /DVD +Disallow: /ExciteTitle +Disallow: /Find +Disallow: /FName +Disallow: /GName +Disallow: /Guests +Disallow: /harvest_me +Disallow: /HelpPage +Disallow: /Icons/ +Disallow: /JointVentures +Disallow: /Laserdisc +Disallow: /List +Disallow: /Literature +Disallow: /Locations +Disallow: /LocationTree +Disallow: /Lookup +Disallow: /M/ +Disallow: /Maltin +Disallow: /MarriedInYear +Disallow: /MetaSearch +Disallow: /Mlinks +Disallow: /More +Disallow: /Movies +Disallow: /Movies/ +Disallow: /MyMovies +Disallow: /mymovies/ +Disallow: /name_pick_n_mix +Disallow: /Nsearch +Disallow: /NUrls +Disallow: /OnThisDay +Disallow: /Ontv +Disallow: /OnTV +Disallow: /Overlap +Disallow: /Pawards +Disallow: /pick_n_mix +Disallow: /PName +Disallow: /Posters +Disallow: /prepare_data +Disallow: /Psales +Disallow: /Quiz +Disallow: /r/ +Disallow: /ra/ +Disallow: /Ratings +Disallow: /rd/ +Disallow: /Recommendations +Disallow: /register +Disallow: /ReleaseDates +Disallow: /ReleasedInYear +Disallow: /Reviews +Disallow: /rg/ +Disallow: /ri/ +Disallow: /RName +Disallow: /Sales +Disallow: /SearchAwards +Disallow: /SearchBios +Disallow: /SearchBusiness +Disallow: /SearchCrazy +Disallow: /SearchDVD +Disallow: /SearchGoofs +Disallow: /SearchLaserdisc +Disallow: /SearchLiterature +Disallow: /SearchPlots +Disallow: /SearchPlotWriters +Disallow: /SearchQuotes +Disallow: /SearchRatios +Disallow: /SearchSongs +Disallow: /SearchTaglines +Disallow: /SearchTechnical +Disallow: /SearchTrivia +Disallow: /SearchVersions +Disallow: /ShowAll +Disallow: /Showing +Disallow: /SName +Disallow: /Soundtracks +Disallow: /Taglines +Disallow: /Tawards +Disallow: /Technical +Disallow: /tiger_redirect +Disallow: /Title/ASIN +Disallow: /TitleBrowse +Disallow: /Trailers +Disallow: /Tsearch +Disallow: /TUrls +Disallow: /VName +Disallow: /Vote +Disallow: /WorkedWith +Disallow: /updates +Disallow: /board +Disallow: /boards +Disallow: /name/*/board +Disallow: /title/*/board +Disallow: /user/*/boards +Disallow: /user/*/boards/ +Disallow: /lists/tt* +Disallow: /lists/nm* +Disallow: /character/*/select-* +Disallow: /character/*/update +# +# +Sitemap: http://www.imdb.com/sitemap_US_index.xml.gz diff --git a/inst/extdata/wikipedia-robots.txt b/inst/extdata/wikipedia-robots.txt new file mode 100644 index 0000000..f499fd9 --- /dev/null +++ b/inst/extdata/wikipedia-robots.txt @@ -0,0 +1,703 @@ +# robots.txt for http://www.wikipedia.org/ and friends +# +# Please note: There are a lot of pages on this site, and there are +# some misbehaved spiders out there that go _way_ too fast. If you're +# irresponsible, your access to the site may be blocked. +# + +# advertising-related bots: +User-agent: Mediapartners-Google* +Disallow: / + +# Wikipedia work bots: +User-agent: IsraBot +Disallow: + +User-agent: Orthogaffe +Disallow: + +# Crawlers that are kind enough to obey, but which we'd rather not have +# unless they're feeding search engines. +User-agent: UbiCrawler +Disallow: / + +User-agent: DOC +Disallow: / + +User-agent: Zao +Disallow: / + +# Some bots are known to be trouble, particularly those designed to copy +# entire sites. Please obey robots.txt. +User-agent: sitecheck.internetseer.com +Disallow: / + +User-agent: Zealbot +Disallow: / + +User-agent: MSIECrawler +Disallow: / + +User-agent: SiteSnagger +Disallow: / + +User-agent: WebStripper +Disallow: / + +User-agent: WebCopier +Disallow: / + +User-agent: Fetch +Disallow: / + +User-agent: Offline Explorer +Disallow: / + +User-agent: Teleport +Disallow: / + +User-agent: TeleportPro +Disallow: / + +User-agent: WebZIP +Disallow: / + +User-agent: linko +Disallow: / + +User-agent: HTTrack +Disallow: / + +User-agent: Microsoft.URL.Control +Disallow: / + +User-agent: Xenu +Disallow: / + +User-agent: larbin +Disallow: / + +User-agent: libwww +Disallow: / + +User-agent: ZyBORG +Disallow: / + +User-agent: Download Ninja +Disallow: / + +# Misbehaving: requests much too fast: +User-agent: fast +Disallow: / + +# +# Sorry, wget in its recursive mode is a frequent problem. +# Please read the man page and use it properly; there is a +# --wait option you can use to set the delay between hits, +# for instance. +# +User-agent: wget +Disallow: / + +# +# The 'grub' distributed client has been *very* poorly behaved. +# +User-agent: grub-client +Disallow: / + +# +# Doesn't follow robots.txt anyway, but... +# +User-agent: k2spider +Disallow: / + +# +# Hits many times per second, not acceptable +# http://www.nameprotect.com/botinfo.html +User-agent: NPBot +Disallow: / + +# A capture bot, downloads gazillions of pages with no public benefit +# http://www.webreaper.net/ +User-agent: WebReaper +Disallow: / + +# Wayback Machine: defaults and whether to index user-pages +# FIXME: Complete the removal of this block, per T7582. +# User-agent: archive.org_bot +# Allow: / + + +# +# Friendly, low-speed bots are welcome viewing article pages, but not +# dynamically-generated pages please. +# +# Inktomi's "Slurp" can read a minimum delay between hits; if your +# bot supports such a thing using the 'Crawl-delay' or another +# instruction, please let us know. +# +# There is a special exception for API mobileview to allow dynamic +# mobile web & app views to load section content. +# These views aren't HTTP-cached but use parser cache aggressively +# and don't expose special: pages etc. +# +# Another exception is for REST API documentation, located at +# /api/rest_v1/?doc. +# +User-agent: * +Allow: /w/api.php?action=mobileview& +Allow: /w/load.php? +Allow: /api/rest_v1/?doc +Disallow: /w/ +Disallow: /api/ +Disallow: /trap/ +# +# ar: +Disallow: /wiki/%D8%AE%D8%A7%D8%B5:Search +Disallow: /wiki/%D8%AE%D8%A7%D8%B5%3ASearch +# +# dewiki: +# T6937 +# sensible deletion and meta user discussion pages: +Disallow: /wiki/Wikipedia:L%C3%B6schkandidaten/ +Disallow: /wiki/Wikipedia:Löschkandidaten/ +Disallow: /wiki/Wikipedia:Vandalensperrung/ +Disallow: /wiki/Wikipedia:Benutzersperrung/ +Disallow: /wiki/Wikipedia:Vermittlungsausschuss/ +Disallow: /wiki/Wikipedia:Administratoren/Probleme/ +Disallow: /wiki/Wikipedia:Adminkandidaturen/ +Disallow: /wiki/Wikipedia:Qualitätssicherung/ +Disallow: /wiki/Wikipedia:Qualit%C3%A4tssicherung/ +# Search- and random-page +Disallow: /wiki/Spezial:Suche +Disallow: /wiki/Special:Suche +Disallow: /wiki/Spezial:Zufällige_Seite +Disallow: /wiki/Spezial:Zuf%C3%A4llige_Seite +Disallow: /wiki/Special:Zufällige_Seite +Disallow: /wiki/Special:Zuf%C3%A4llige_Seite +# 4937#5 +Disallow: /wiki/Wikipedia:Vandalismusmeldung/ +Disallow: /wiki/Wikipedia:Gesperrte_Lemmata/ +Disallow: /wiki/Wikipedia:Löschprüfung/ +Disallow: /wiki/Wikipedia:L%C3%B6schprüfung/ +Disallow: /wiki/Wikipedia:Administratoren/Notizen/ +Disallow: /wiki/Wikipedia:Schiedsgericht/Anfragen/ +Disallow: /wiki/Wikipedia:L%C3%B6schpr%C3%BCfung/ +# T14111 +Disallow: /wiki/Wikipedia:Checkuser/ +Disallow: /wiki/Wikipedia_Diskussion:Checkuser/ +Disallow: /wiki/Wikipedia_Diskussion:Adminkandidaturen/ +# T15961 +Disallow: /wiki/Wikipedia:Spam-Blacklist-Log +Disallow: /wiki/Wikipedia%3ASpam-Blacklist-Log +Disallow: /wiki/Wikipedia_Diskussion:Spam-Blacklist-Log +Disallow: /wiki/Wikipedia_Diskussion%3ASpam-Blacklist-Log +# +# enwiki: +# Folks get annoyed when VfD discussions end up the number 1 google hit for +# their name. See T6776 +Disallow: /wiki/Wikipedia:Articles_for_deletion/ +Disallow: /wiki/Wikipedia%3AArticles_for_deletion/ +Disallow: /wiki/Wikipedia:Votes_for_deletion/ +Disallow: /wiki/Wikipedia%3AVotes_for_deletion/ +Disallow: /wiki/Wikipedia:Pages_for_deletion/ +Disallow: /wiki/Wikipedia%3APages_for_deletion/ +Disallow: /wiki/Wikipedia:Miscellany_for_deletion/ +Disallow: /wiki/Wikipedia%3AMiscellany_for_deletion/ +Disallow: /wiki/Wikipedia:Miscellaneous_deletion/ +Disallow: /wiki/Wikipedia%3AMiscellaneous_deletion/ +Disallow: /wiki/Wikipedia:Copyright_problems +Disallow: /wiki/Wikipedia%3ACopyright_problems +Disallow: /wiki/Wikipedia:Protected_titles/ +Disallow: /wiki/Wikipedia%3AProtected_titles/ +# T15398 +Disallow: /wiki/Wikipedia:WikiProject_Spam/ +Disallow: /wiki/Wikipedia%3AWikiProject_Spam/ +# T16075 +Disallow: /wiki/MediaWiki:Spam-blacklist +Disallow: /wiki/MediaWiki%3ASpam-blacklist +Disallow: /wiki/MediaWiki_talk:Spam-blacklist +Disallow: /wiki/MediaWiki_talk%3ASpam-blacklist +# T13261 +Disallow: /wiki/Wikipedia:Requests_for_arbitration/ +Disallow: /wiki/Wikipedia%3ARequests_for_arbitration/ +Disallow: /wiki/Wikipedia:Requests_for_comment/ +Disallow: /wiki/Wikipedia%3ARequests_for_comment/ +Disallow: /wiki/Wikipedia:Requests_for_adminship/ +Disallow: /wiki/Wikipedia%3ARequests_for_adminship/ +# T12288 +Disallow: /wiki/Wikipedia_talk:Articles_for_deletion/ +Disallow: /wiki/Wikipedia_talk%3AArticles_for_deletion/ +Disallow: /wiki/Wikipedia_talk:Votes_for_deletion/ +Disallow: /wiki/Wikipedia_talk%3AVotes_for_deletion/ +Disallow: /wiki/Wikipedia_talk:Pages_for_deletion/ +Disallow: /wiki/Wikipedia_talk%3APages_for_deletion/ +Disallow: /wiki/Wikipedia_talk:Miscellany_for_deletion/ +Disallow: /wiki/Wikipedia_talk%3AMiscellany_for_deletion/ +Disallow: /wiki/Wikipedia_talk:Miscellaneous_deletion/ +Disallow: /wiki/Wikipedia_talk%3AMiscellaneous_deletion/ +# T16793 +Disallow: /wiki/Wikipedia:Changing_username +Disallow: /wiki/Wikipedia%3AChanging_username +Disallow: /wiki/Wikipedia:Changing_username/ +Disallow: /wiki/Wikipedia%3AChanging_username/ +Disallow: /wiki/Wikipedia_talk:Changing_username +Disallow: /wiki/Wikipedia_talk%3AChanging_username +Disallow: /wiki/Wikipedia_talk:Changing_username/ +Disallow: /wiki/Wikipedia_talk%3AChanging_username/ +# +# eswiki: +# T8746 +Disallow: /wiki/Wikipedia:Consultas_de_borrado/ +Disallow: /wiki/Wikipedia%3AConsultas_de_borrado/ +# +# fiwiki: +# T10695 +Disallow: /wiki/Wikipedia:Poistettavat_sivut +Disallow: /wiki/K%C3%A4ytt%C3%A4j%C3%A4: +Disallow: /wiki/Käyttäjä: +Disallow: /wiki/Keskustelu_k%C3%A4ytt%C3%A4j%C3%A4st%C3%A4: +Disallow: /wiki/Keskustelu_käyttäjästä: +Disallow: /wiki/Wikipedia:Yll%C3%A4pit%C3%A4j%C3%A4t/ +Disallow: /wiki/Wikipedia:Ylläpitäjät/ +# +# frwiki: +Disallow: /wiki/Wikip%C3%A9dia:Pages_%C3%A0_supprimer/ +Disallow: /wiki/Wikip%C3%A9dia:Pages_soup%C3%A7onn%C3%A9es_de_violation_de_copyright/ +# +# hewiki: +Disallow: /wiki/%D7%9E%D7%99%D7%95%D7%97%D7%93:Search +Disallow: /wiki/%D7%9E%D7%99%D7%95%D7%97%D7%93%3ASearch +#T11517 +Disallow: /wiki/ויקיפדיה:רשימת_מועמדים_למחיקה/ +Disallow: /wiki/ויקיפדיה%3Aרשימת_מועמדים_למחיקה/ +Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%A8%D7%A9%D7%99%D7%9E%D7%AA_%D7%9E%D7%95%D7%A2%D7%9E%D7%93%D7%99%D7%9D_%D7%9C%D7%9E%D7%97%D7%99%D7%A7%D7%94/ +Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94%3A%D7%A8%D7%A9%D7%99%D7%9E%D7%AA_%D7%9E%D7%95%D7%A2%D7%9E%D7%93%D7%99%D7%9D_%D7%9C%D7%9E%D7%97%D7%99%D7%A7%D7%94/ +Disallow: /wiki/ויקיפדיה:ערכים_לא_קיימים_ומוגנים +Disallow: /wiki/ויקיפדיה%3Aערכים_לא_קיימים_ומוגנים +Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%A2%D7%A8%D7%9B%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D +Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94%3A%D7%A2%D7%A8%D7%9B%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D +Disallow: /wiki/ויקיפדיה:דפים_לא_קיימים_ומוגנים +Disallow: /wiki/ויקיפדיה%3Aדפים_לא_קיימים_ומוגנים +Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%93%D7%A4%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D +Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94%3A%D7%93%D7%A4%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D +# +# huwiki: +Disallow: /wiki/Speci%C3%A1lis:Search +Disallow: /wiki/Speci%C3%A1lis%3ASearch +# +# itwiki: +# T7545 +Disallow: /wiki/Wikipedia:Pagine_da_cancellare +Disallow: /wiki/Wikipedia%3APagine_da_cancellare +Disallow: /wiki/Wikipedia:Utenti_problematici +Disallow: /wiki/Wikipedia%3AUtenti_problematici +Disallow: /wiki/Wikipedia:Vandalismi_in_corso +Disallow: /wiki/Wikipedia%3AVandalismi_in_corso +Disallow: /wiki/Wikipedia:Amministratori +Disallow: /wiki/Wikipedia%3AAmministratori +Disallow: /wiki/Wikipedia:Proposte_di_cancellazione_semplificata +Disallow: /wiki/Wikipedia%3AProposte_di_cancellazione_semplificata +Disallow: /wiki/Categoria:Da_cancellare_subito +Disallow: /wiki/Categoria%3ADa_cancellare_subito +Disallow: /wiki/Wikipedia:Sospette_violazioni_di_copyright +Disallow: /wiki/Wikipedia%3ASospette_violazioni_di_copyright +Disallow: /wiki/Categoria:Da_controllare_per_copyright +Disallow: /wiki/Categoria%3ADa_controllare_per_copyright +Disallow: /wiki/Progetto:Rimozione_contributi_sospetti +Disallow: /wiki/Progetto%3ARimozione_contributi_sospetti +Disallow: /wiki/Categoria:Da_cancellare_subito_per_violazione_integrale_copyright +Disallow: /wiki/Categoria%3ADa_cancellare_subito_per_violazione_integrale_copyright +Disallow: /wiki/Progetto:Cococo +Disallow: /wiki/Progetto%3ACococo +Disallow: /wiki/Discussioni_progetto:Cococo +Disallow: /wiki/Discussioni_progetto%3ACococo +# +# jawiki +Disallow: /wiki/%E7%89%B9%E5%88%A5:Search +Disallow: /wiki/%E7%89%B9%E5%88%A5%3ASearch +# T7239 +Disallow: /wiki/Wikipedia:%E5%89%8A%E9%99%A4%E4%BE%9D%E9%A0%BC/ +Disallow: /wiki/Wikipedia%3A%E5%89%8A%E9%99%A4%E4%BE%9D%E9%A0%BC/ +Disallow: /wiki/Wikipedia:%E5%88%A9%E7%94%A8%E8%80%85%E3%83%9A%E3%83%BC%E3%82%B8%E3%81%AE%E5%89%8A%E9%99%A4%E4%BE%9D%E9%A0%BC +Disallow: /wiki/Wikipedia%3A%E5%88%A9%E7%94%A8%E8%80%85%E3%83%9A%E3%83%BC%E3%82%B8%E3%81%AE%E5%89%8A%E9%99%A4%E4%BE%9D%E9%A0%BC +# nowiki +# T13432 +Disallow: /wiki/Bruker: +Disallow: /wiki/Bruker%3A +Disallow: /wiki/Brukerdiskusjon +Disallow: /wiki/Wikipedia:Administratorer +Disallow: /wiki/Wikipedia%3AAdministratorer +Disallow: /wiki/Wikipedia-diskusjon:Administratorer +Disallow: /wiki/Wikipedia-diskusjon%3AAdministratorer +Disallow: /wiki/Wikipedia:Sletting +Disallow: /wiki/Wikipedia%3ASletting +Disallow: /wiki/Wikipedia-diskusjon:Sletting +Disallow: /wiki/Wikipedia-diskusjon%3ASletting +Disallow: /wiki/Spesial: +Disallow: /wiki/Spesial%3A +# +# plwiki +# T10067 +Disallow: /wiki/Wikipedia:Strony_do_usuni%C4%99cia +Disallow: /wiki/Wikipedia%3AStrony_do_usuni%C4%99cia +Disallow: /wiki/Wikipedia:Do_usuni%C4%99cia +Disallow: /wiki/Wikipedia%3ADo_usuni%C4%99cia +Disallow: /wiki/Wikipedia:SDU/ +Disallow: /wiki/Wikipedia%3ASDU/ +Disallow: /wiki/Wikipedia:Strony_podejrzane_o_naruszenie_praw_autorskich +Disallow: /wiki/Wikipedia%3AStrony_podejrzane_o_naruszenie_praw_autorskich +# +# ptwiki: +# T7394 +Disallow: /wiki/Wikipedia:Páginas_para_eliminar/ +Disallow: /wiki/Wikipedia:P%C3%A1ginas_para_eliminar/ +Disallow: /wiki/Wikipedia%3AP%C3%A1ginas_para_eliminar/ +Disallow: /wiki/Wikipedia_Discussão:Páginas_para_eliminar/ +Disallow: /wiki/Wikipedia_Discuss%C3%A3o:P%C3%A1ginas_para_eliminar/ +Disallow: /wiki/Wikipedia_Discuss%C3%A3o%3AP%C3%A1ginas_para_eliminar/ +# +# rowiki: +# T14546 +Disallow: /wiki/Wikipedia:Pagini_de_%C5%9Fters +Disallow: /wiki/Wikipedia%3APagini_de_%C5%9Fters +Disallow: /wiki/Discu%C5%A3ie_Wikipedia:Pagini_de_%C5%9Fters +Disallow: /wiki/Discu%C5%A3ie_Wikipedia%3APagini_de_%C5%9Fters +# +# ruwiki: +Disallow: /wiki/%D0%A1%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D1%8C%D0%BD%D1%8B%D0%B5:Search +Disallow: /wiki/%D0%A1%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D1%8C%D0%BD%D1%8B%D0%B5%3ASearch +# +# svwiki: +# T12229 +Disallow: /wiki/Wikipedia%3ASidor_f%C3%B6reslagna_f%C3%B6r_radering +Disallow: /wiki/Wikipedia:Sidor_f%C3%B6reslagna_f%C3%B6r_radering +Disallow: /wiki/Wikipedia:Sidor_föreslagna_för_radering +Disallow: /wiki/Användare +Disallow: /wiki/Anv%C3%A4ndare +Disallow: /wiki/Användardiskussion +Disallow: /wiki/Anv%C3%A4ndardiskussion +Disallow: /wiki/Wikipedia:Skyddade_sidnamn +Disallow: /wiki/Wikipedia%3ASkyddade_sidnamn +# T13291 +Disallow: /wiki/Wikipedia:Sidor_som_bör_raderas +Disallow: /wiki/Wikipedia:Sidor_som_b%C3%B6r_raderas +Disallow: /wiki/Wikipedia%3ASidor_som_b%C3%B6r_raderas +# +# zhwiki: +# T7104 +Disallow: /wiki/Wikipedia:删除投票/侵权 +Disallow: /wiki/Wikipedia:%E5%88%A0%E9%99%A4%E6%8A%95%E7%A5%A8/%E4%BE%B5%E6%9D%83 +Disallow: /wiki/Wikipedia:删除投票和请求 +Disallow: /wiki/Wikipedia:%E5%88%A0%E9%99%A4%E6%8A%95%E7%A5%A8%E5%92%8C%E8%AF%B7%E6%B1%82 +Disallow: /wiki/Category:快速删除候选 +Disallow: /wiki/Category:%E5%BF%AB%E9%80%9F%E5%88%A0%E9%99%A4%E5%80%99%E9%80%89 +Disallow: /wiki/Category:维基百科需要翻译的文章 +Disallow: /wiki/Category:%E7%BB%B4%E5%9F%BA%E7%99%BE%E7%A7%91%E9%9C%80%E8%A6%81%E7%BF%BB%E8%AF%91%E7%9A%84%E6%96%87%E7%AB%A0 +# +# sister projects +# +# enwikinews: +# T7340 +Disallow: /wiki/Portal:Prepared_stories/ +Disallow: /wiki/Portal%3APrepared_stories/ +# +# itwikinews +# T11138 +Disallow: /wiki/Wikinotizie:Richieste_di_cancellazione +Disallow: /wiki/Wikinotizie:Sospette_violazioni_di_copyright +Disallow: /wiki/Categoria:Da_cancellare_subito +Disallow: /wiki/Categoria:Da_cancellare_subito_per_violazione_integrale_copyright +Disallow: /wiki/Wikinotizie:Storie_in_preparazione +# +# enwikiquote: +# T17095 +Disallow: /wiki/Wikiquote:Votes_for_deletion/ +Disallow: /wiki/Wikiquote%3AVotes_for_deletion/ +Disallow: /wiki/Wikiquote_talk:Votes_for_deletion/ +Disallow: /wiki/Wikiquote_talk%3AVotes_for_deletion/ +Disallow: /wiki/Wikiquote:Votes_for_deletion_archive/ +Disallow: /wiki/Wikiquote%3AVotes_for_deletion_archive/ +Disallow: /wiki/Wikiquote_talk:Votes_for_deletion_archive/ +Disallow: /wiki/Wikiquote_talk%3AVotes_for_deletion_archive/ +# +# enwikibooks +Disallow: /wiki/Wikibooks:Votes_for_deletion +# +# working... +Disallow: /wiki/Fundraising_2007/comments +# +Disallow: /wiki/Special:Maintenance +# Do not show banner content or record hits +Disallow: /wiki/Special:BannerLoader +Disallow: /wiki/Special:RecordImpression +# +# +#----------------------------------------------------------# +# +# +# + #
+#
+# Localisable part of robots.txt for en.wikipedia.org
+#
+# Edit at https://en.wikipedia.org/w/index.php?title=MediaWiki:Robots.txt&action=edit
+# Don't add newlines here. All rules set here are active for every user-agent.
+#
+# Please check any changes using a syntax validator such as http://tool.motoricerca.info/robots-checker.phtml
+# Enter https://en.wikipedia.org/robots.txt as the URL to check.
+#
+# https://bugzilla.wikimedia.org/show_bug.cgi?id=14075
+Disallow: /wiki/MediaWiki:Spam-blacklist
+Disallow: /wiki/MediaWiki%3ASpam-blacklist
+Disallow: /wiki/MediaWiki_talk:Spam-blacklist
+Disallow: /wiki/MediaWiki_talk%3ASpam-blacklist
+Disallow: /wiki/Wikipedia:WikiProject_Spam
+Disallow: /wiki/Wikipedia_talk:WikiProject_Spam
+#
+# Folks get annoyed when XfD discussions end up the number 1 google hit for
+# their name.
+# https://phabricator.wikimedia.org/T16075
+Disallow: /wiki/Wikipedia:Articles_for_deletion
+Disallow: /wiki/Wikipedia%3AArticles_for_deletion
+Disallow: /wiki/Wikipedia:Votes_for_deletion
+Disallow: /wiki/Wikipedia%3AVotes_for_deletion
+Disallow: /wiki/Wikipedia:Pages_for_deletion
+Disallow: /wiki/Wikipedia%3APages_for_deletion
+Disallow: /wiki/Wikipedia:Miscellany_for_deletion
+Disallow: /wiki/Wikipedia%3AMiscellany_for_deletion
+Disallow: /wiki/Wikipedia:Miscellaneous_deletion
+Disallow: /wiki/Wikipedia%3AMiscellaneous_deletion
+Disallow: /wiki/Wikipedia:Categories_for_discussion
+Disallow: /wiki/Wikipedia%3ACategories_for_discussion
+Disallow: /wiki/Wikipedia:Templates_for_deletion
+Disallow: /wiki/Wikipedia%3ATemplates_for_deletion
+Disallow: /wiki/Wikipedia:Redirects_for_discussion
+Disallow: /wiki/Wikipedia%3ARedirects_for_discussion
+Disallow: /wiki/Wikipedia:Deletion_review
+Disallow: /wiki/Wikipedia%3ADeletion_review
+Disallow: /wiki/Wikipedia:WikiProject_Deletion_sorting
+Disallow: /wiki/Wikipedia%3AWikiProject_Deletion_sorting
+Disallow: /wiki/Wikipedia:Files_for_deletion
+Disallow: /wiki/Wikipedia%3AFiles_for_deletion
+Disallow: /wiki/Wikipedia:Files_for_discussion
+Disallow: /wiki/Wikipedia%3AFiles_for_discussion
+Disallow: /wiki/Wikipedia:Possibly_unfree_files
+Disallow: /wiki/Wikipedia%3APossibly_unfree_files
+#
+# https://phabricator.wikimedia.org/T12288
+Disallow: /wiki/Wikipedia_talk:Articles_for_deletion
+Disallow: /wiki/Wikipedia_talk%3AArticles_for_deletion
+Disallow: /wiki/Wikipedia_talk:Votes_for_deletion
+Disallow: /wiki/Wikipedia_talk%3AVotes_for_deletion
+Disallow: /wiki/Wikipedia_talk:Pages_for_deletion
+Disallow: /wiki/Wikipedia_talk%3APages_for_deletion
+Disallow: /wiki/Wikipedia_talk:Miscellany_for_deletion
+Disallow: /wiki/Wikipedia_talk%3AMiscellany_for_deletion
+Disallow: /wiki/Wikipedia_talk:Miscellaneous_deletion
+Disallow: /wiki/Wikipedia_talk%3AMiscellaneous_deletion
+Disallow: /wiki/Wikipedia_talk:Templates_for_deletion
+Disallow: /wiki/Wikipedia_talk%3ATemplates_for_deletion
+Disallow: /wiki/Wikipedia_talk:Categories_for_discussion
+Disallow: /wiki/Wikipedia_talk%3ACategories_for_discussion
+Disallow: /wiki/Wikipedia_talk:Deletion_review
+Disallow: /wiki/Wikipedia_talk%3ADeletion_review
+Disallow: /wiki/Wikipedia_talk:WikiProject_Deletion_sorting
+Disallow: /wiki/Wikipedia_talk%3AWikiProject_Deletion_sorting
+Disallow: /wiki/Wikipedia_talk:Files_for_deletion
+Disallow: /wiki/Wikipedia_talk%3AFiles_for_deletion
+Disallow: /wiki/Wikipedia_talk:Files_for_discussion
+Disallow: /wiki/Wikipedia_talk%3AFiles_for_discussion
+Disallow: /wiki/Wikipedia_talk:Possibly_unfree_files
+Disallow: /wiki/Wikipedia_talk%3APossibly_unfree_files
+#
+Disallow: /wiki/Wikipedia:Copyright_problems
+Disallow: /wiki/Wikipedia%3ACopyright_problems
+Disallow: /wiki/Wikipedia_talk:Copyright_problems
+Disallow: /wiki/Wikipedia_talk%3ACopyright_problems
+Disallow: /wiki/Wikipedia:Suspected_copyright_violations
+Disallow: /wiki/Wikipedia%3ASuspected_copyright_violations
+Disallow: /wiki/Wikipedia_talk:Suspected_copyright_violations
+Disallow: /wiki/Wikipedia_talk%3ASuspected_copyright_violations
+Disallow: /wiki/Wikipedia:Contributor_copyright_investigations
+Disallow: /wiki/Wikipedia%3AContributor_copyright_investigations
+Disallow: /wiki/Wikipedia:Contributor_copyright_investigations
+Disallow: /wiki/Wikipedia%3AContributor_copyright_investigations
+Disallow: /wiki/Wikipedia_talk:Contributor_copyright_investigations
+Disallow: /wiki/Wikipedia_talk%3AContributor_copyright_investigations
+Disallow: /wiki/Wikipedia_talk:Contributor_copyright_investigations
+Disallow: /wiki/Wikipedia_talk%3AContributor_copyright_investigations
+Disallow: /wiki/Wikipedia:Protected_titles
+Disallow: /wiki/Wikipedia%3AProtected_titles
+Disallow: /wiki/Wikipedia_talk:Protected_titles
+Disallow: /wiki/Wikipedia_talk%3AProtected_titles
+Disallow: /wiki/Wikipedia:Articles_for_creation
+Disallow: /wiki/Wikipedia%3AArticles_for_creation
+Disallow: /wiki/Wikipedia_talk:Articles_for_creation
+Disallow: /wiki/Wikipedia_talk%3AArticles_for_creation
+Disallow: /wiki/Wikipedia_talk:Article_wizard
+Disallow: /wiki/Wikipedia_talk%3AArticle_wizard
+#
+# https://phabricator.wikimedia.org/T13261
+Disallow: /wiki/Wikipedia:Requests_for_arbitration
+Disallow: /wiki/Wikipedia%3ARequests_for_arbitration
+Disallow: /wiki/Wikipedia_talk:Requests_for_arbitration
+Disallow: /wiki/Wikipedia_talk%3ARequests_for_arbitration
+Disallow: /wiki/Wikipedia:Requests_for_comment
+Disallow: /wiki/Wikipedia%3ARequests_for_comment
+Disallow: /wiki/Wikipedia_talk:Requests_for_comment
+Disallow: /wiki/Wikipedia_talk%3ARequests_for_comment
+Disallow: /wiki/Wikipedia:Requests_for_adminship
+Disallow: /wiki/Wikipedia%3ARequests_for_adminship
+Disallow: /wiki/Wikipedia_talk:Requests_for_adminship
+Disallow: /wiki/Wikipedia_talk%3ARequests_for_adminship
+#
+# https://phabricator.wikimedia.org/T14111
+Disallow: /wiki/Wikipedia:Requests_for_checkuser
+Disallow: /wiki/Wikipedia%3ARequests_for_checkuser
+Disallow: /wiki/Wikipedia_talk:Requests_for_checkuser
+Disallow: /wiki/Wikipedia_talk%3ARequests_for_checkuser
+#
+# https://phabricator.wikimedia.org/T15398
+Disallow: /wiki/Wikipedia:WikiProject_Spam
+Disallow: /wiki/Wikipedia%3AWikiProject_Spam
+#
+# https://phabricator.wikimedia.org/T16793
+Disallow: /wiki/Wikipedia:Changing_username
+Disallow: /wiki/Wikipedia%3AChanging_username
+Disallow: /wiki/Wikipedia:Changing_username
+Disallow: /wiki/Wikipedia%3AChanging_username
+Disallow: /wiki/Wikipedia_talk:Changing_username
+Disallow: /wiki/Wikipedia_talk%3AChanging_username
+Disallow: /wiki/Wikipedia_talk:Changing_username
+Disallow: /wiki/Wikipedia_talk%3AChanging_username
+#
+Disallow: /wiki/Wikipedia:Administrators%27_noticeboard
+Disallow: /wiki/Wikipedia%3AAdministrators%27_noticeboard
+Disallow: /wiki/Wikipedia_talk:Administrators%27_noticeboard
+Disallow: /wiki/Wikipedia_talk%3AAdministrators%27_noticeboard
+Disallow: /wiki/Wikipedia:Community_sanction_noticeboard
+Disallow: /wiki/Wikipedia%3ACommunity_sanction_noticeboard
+Disallow: /wiki/Wikipedia_talk:Community_sanction_noticeboard
+Disallow: /wiki/Wikipedia_talk%3ACommunity_sanction_noticeboard
+Disallow: /wiki/Wikipedia:Bureaucrats%27_noticeboard
+Disallow: /wiki/Wikipedia%3ABureaucrats%27_noticeboard
+Disallow: /wiki/Wikipedia_talk:Bureaucrats%27_noticeboard
+Disallow: /wiki/Wikipedia_talk%3ABureaucrats%27_noticeboard
+#
+Disallow: /wiki/Wikipedia:Sockpuppet_investigations
+Disallow: /wiki/Wikipedia%3ASockpuppet_investigations
+Disallow: /wiki/Wikipedia_talk:Sockpuppet_investigations
+Disallow: /wiki/Wikipedia_talk%3ASockpuppet_investigations
+#
+Disallow: /wiki/Wikipedia:Neutral_point_of_view/Noticeboard
+Disallow: /wiki/Wikipedia%3ANeutral_point_of_view/Noticeboard
+Disallow: /wiki/Wikipedia_talk:Neutral_point_of_view/Noticeboard
+Disallow: /wiki/Wikipedia_talk%3ANeutral_point_of_view/Noticeboard
+#
+Disallow: /wiki/Wikipedia:No_original_research/noticeboard
+Disallow: /wiki/Wikipedia%3ANo_original_research/noticeboard
+Disallow: /wiki/Wikipedia_talk:No_original_research/noticeboard
+Disallow: /wiki/Wikipedia_talk%3ANo_original_research/noticeboard
+#
+Disallow: /wiki/Wikipedia:Fringe_theories/Noticeboard
+Disallow: /wiki/Wikipedia%3AFringe_theories/Noticeboard
+Disallow: /wiki/Wikipedia_talk:Fringe_theories/Noticeboard
+Disallow: /wiki/Wikipedia_talk%3AFringe_theories/Noticeboard
+#
+Disallow: /wiki/Wikipedia:Conflict_of_interest/Noticeboard
+Disallow: /wiki/Wikipedia%3AConflict_of_interest/Noticeboard
+Disallow: /wiki/Wikipedia_talk:Conflict_of_interest/Noticeboard
+Disallow: /wiki/Wikipedia_talk%3AConflict_of_interest/Noticeboard
+#
+Disallow: /wiki/Wikipedia:Long-term_abuse
+Disallow: /wiki/Wikipedia%3ALong-term_abuse
+Disallow: /wiki/Wikipedia_talk:Long-term_abuse
+Disallow: /wiki/Wikipedia_talk%3ALong-term_abuse
+Disallow: /wiki/Wikipedia:Long_term_abuse
+Disallow: /wiki/Wikipedia%3ALong_term_abuse
+Disallow: /wiki/Wikipedia_talk:Long_term_abuse
+Disallow: /wiki/Wikipedia_talk%3ALong_term_abuse
+#
+Disallow: /wiki/Wikipedia:Wikiquette_assistance
+Disallow: /wiki/Wikipedia%3AWikiquette_assistance
+#
+Disallow: /wiki/Wikipedia:Abuse_reports
+Disallow: /wiki/Wikipedia%3AAbuse_reports
+Disallow: /wiki/Wikipedia_talk:Abuse_reports
+Disallow: /wiki/Wikipedia_talk%3AAbuse_reports
+Disallow: /wiki/Wikipedia:Abuse_response
+Disallow: /wiki/Wikipedia%3AAbuse_response
+Disallow: /wiki/Wikipedia_talk:Abuse_response
+Disallow: /wiki/Wikipedia_talk%3AAbuse_response
+#
+Disallow: /wiki/Wikipedia:Reliable_sources/Noticeboard
+Disallow: /wiki/Wikipedia%3AReliable_sources/Noticeboard
+Disallow: /wiki/Wikipedia_talk:Reliable_sources/Noticeboard
+Disallow: /wiki/Wikipedia_talk%3AReliable_sources/Noticeboard
+#
+Disallow: /wiki/Wikipedia:Suspected_sock_puppets
+Disallow: /wiki/Wikipedia%3ASuspected_sock_puppets
+Disallow: /wiki/Wikipedia_talk:Suspected_sock_puppets
+Disallow: /wiki/Wikipedia_talk%3ASuspected_sock_puppets
+#
+Disallow: /wiki/Wikipedia:Biographies_of_living_persons/Noticeboard
+Disallow: /wiki/Wikipedia%3ABiographies_of_living_persons/Noticeboard
+Disallow: /wiki/Wikipedia_talk:Biographies_of_living_persons/Noticeboard
+Disallow: /wiki/Wikipedia_talk%3ABiographies_of_living_persons/Noticeboard
+#
+Disallow: /wiki/Wikipedia:Content_noticeboard
+Disallow: /wiki/Wikipedia%3AContent_noticeboard
+Disallow: /wiki/Wikipedia_talk:Content_noticeboard
+Disallow: /wiki/Wikipedia_talk%3AContent_noticeboard
+#
+Disallow: /wiki/Template:Editnotices
+Disallow: /wiki/Template%3AEditnotices
+#
+Disallow: /wiki/Wikipedia:Arbitration
+Disallow: /wiki/Wikipedia%3AArbitration
+Disallow: /wiki/Wikipedia_talk:Arbitration
+Disallow: /wiki/Wikipedia_talk%3AArbitration
+#
+Disallow: /wiki/Wikipedia:Arbitration_Committee
+Disallow: /wiki/Wikipedia%3AArbitration_Committee
+Disallow: /wiki/Wikipedia_talk:Arbitration_Committee
+Disallow: /wiki/Wikipedia_talk%3AArbitration_Committee
+#
+Disallow: /wiki/Wikipedia:Arbitration_Committee_Elections
+Disallow: /wiki/Wikipedia%3AArbitration_Committee_Elections
+Disallow: /wiki/Wikipedia_talk:Arbitration_Committee_Elections
+Disallow: /wiki/Wikipedia_talk%3AArbitration_Committee_Elections
+#
+Disallow: /wiki/Wikipedia:Mediation_Committee
+Disallow: /wiki/Wikipedia%3AMediation_Committee
+Disallow: /wiki/Wikipedia_talk:Mediation_Committee
+Disallow: /wiki/Wikipedia_talk%3AMediation_Committee
+#
+Disallow: /wiki/Wikipedia:Mediation_Cabal/Cases
+Disallow: /wiki/Wikipedia%3AMediation_Cabal/Cases
+#
+Disallow: /wiki/Wikipedia:Requests_for_bureaucratship
+Disallow: /wiki/Wikipedia%3ARequests_for_bureaucratship
+Disallow: /wiki/Wikipedia_talk:Requests_for_bureaucratship
+Disallow: /wiki/Wikipedia_talk%3ARequests_for_bureaucratship
+#
+Disallow: /wiki/Wikipedia:Administrator_review
+Disallow: /wiki/Wikipedia%3AAdministrator_review
+Disallow: /wiki/Wikipedia_talk:Administrator_review
+Disallow: /wiki/Wikipedia_talk%3AAdministrator_review
+#
+Disallow: /wiki/Wikipedia:Editor_review
+Disallow: /wiki/Wikipedia%3AEditor_review
+Disallow: /wiki/Wikipedia_talk:Editor_review
+Disallow: /wiki/Wikipedia_talk%3AEditor_review
+#
+Disallow: /wiki/Wikipedia:Article_Incubator
+Disallow: /wiki/Wikipedia%3AArticle_Incubator
+Disallow: /wiki/Wikipedia_talk:Article_Incubator
+Disallow: /wiki/Wikipedia_talk%3AArticle_Incubator
+#
+Disallow: /wiki/Category:Noindexed_pages
+Disallow: /wiki/Category%3ANoindexed_pages
+#
+# 
diff --git a/man/can_fetch.Rd b/man/can_fetch.Rd index e440838..9b3538f 100644 --- a/man/can_fetch.Rd +++ b/man/can_fetch.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rep.r +% Please edit documentation in R/can-fetch.r \name{can_fetch} \alias{can_fetch} \title{Test URL path against robots.txt} @@ -17,7 +17,9 @@ can_fetch(obj, path = "/", user_agent = "*") Test URL path against robots.txt } \examples{ -library(robotstxt) -can_fetch(rt, "/asthma/asthma_stats/default.htm", "*") # TRUE -can_fetch(rt, "/_borders", "*") # FALSE +gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\\n") +gh_rt <- robxp(gh) +can_fetch(gh_rt, "/humans.txt", "*") # TRUE +can_fetch(gh_rt, "/login", "*") # FALSE +can_fetch(gh_rt, "/oembed", "CCBot") # FALSE } diff --git a/man/crawl_delays.Rd b/man/crawl_delays.Rd index fb63576..b5833be 100644 --- a/man/crawl_delays.Rd +++ b/man/crawl_delays.Rd @@ -2,13 +2,28 @@ % Please edit documentation in R/cd.r \name{crawl_delays} \alias{crawl_delays} -\title{Get all crawl_delay} +\title{Get all agent crawl delay values} \usage{ crawl_delays(obj) } \arguments{ \item{obj}{\code{robxp} object} } +\value{ +data frame of agents and their crawl delays +} \description{ -Get all crawl_delay +Get all agent crawl delay values +} +\note{ +\code{-1} will be returned for any listed agent without a crawl delay setting +} +\examples{ +gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\\n") +gh_rt <- robxp(gh) +crawl_delays(gh_rt) + +imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\\n") +imdb_rt <- robxp(imdb) +crawl_delays(imdb_rt) } diff --git a/man/robxp.Rd b/man/robxp.Rd index edc787f..ee6dad2 100644 --- a/man/robxp.Rd +++ b/man/robxp.Rd @@ -13,7 +13,6 @@ robxp(x) Create a robots.txt object } \examples{ -library(robotstxt) -can_fetch(rt, "/asthma/asthma_stats/default.htm", "*") # TRUE -can_fetch(rt, "/_borders", "*") # FALSE +imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\\n") +rt <- robxp(imdb) } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index b6a6090..830ae93 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -17,7 +17,7 @@ BEGIN_RCPP END_RCPP } // rep_crawl_delays -std::vector rep_crawl_delays(SEXP xp); +DataFrame rep_crawl_delays(SEXP xp); RcppExport SEXP _rep_rep_crawl_delays(SEXP xpSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; diff --git a/src/repmain.cpp b/src/repmain.cpp index 71be6ae..3ca71cd 100644 --- a/src/repmain.cpp +++ b/src/repmain.cpp @@ -21,18 +21,22 @@ SEXP rep_parse(std::string content) { //' @noRd //' // [[Rcpp::export]] -std::vector rep_crawl_delays(SEXP xp) { +DataFrame rep_crawl_delays(SEXP xp) { Rcpp::XPtr ptr(xp); + std::vector agents; std::vector vals; + + agents.reserve(ptr->agents_.size()); vals.reserve(ptr->agents_.size()); for(auto kv : ptr->agents_) { + agents.push_back(kv.first); vals.push_back(kv.second.delay()); } - return(vals); + return(DataFrame::create(_["agent"] = agents, _["crawl_delay"] = vals)); } diff --git a/tests/testthat/test-rep.R b/tests/testthat/test-rep.R index 38aa2f7..07382d3 100644 --- a/tests/testthat/test-rep.R +++ b/tests/testthat/test-rep.R @@ -1,11 +1,19 @@ context("basic functionality") test_that("parsing and testing works", { - rt <- robxp(robotstxt::get_robotstxt("https://cdc.gov")) + cdc <- paste0(readLines(system.file("extdata", "cdc-robots.txt", package="rep")), collapse="\n") + rt <- robxp(cdc) expect_that(rt, is_a("robxp")) expect_that(can_fetch(rt, "/asthma/asthma_stats/default.htm", "*"), equals(TRUE)) expect_that(can_fetch(rt, "/_borders", "*"), equals(FALSE)) + imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n") + rt <- robxp(imdb) + cd <- crawl_delays(rt) + + expect_that(cd, is_a("data.frame")) + expect_equal(cd$crawl_delay, c(0.1, 3.0, -1.0)) + })