diff --git a/.Rbuildignore b/.Rbuildignore index c9a5c92..19cdbd8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -19,3 +19,4 @@ ^CRAN-RELEASE$ ^appveyor\.yml$ ^tools$ +^LICENSE\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 248a775..bf853dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unjuris Type: Package -Title: unjuris title goes here otherwise CRAN checks fail +Title: Search the U.N. Jurisprudence Database Version: 0.1.0 Date: 2020-03-05 Authors@R: c( @@ -8,17 +8,27 @@ Authors@R: c( comment = c(ORCID = "0000-0001-5670-2640")) ) Maintainer: Bob Rudis -Description: A good description goes here otherwise CRAN checks fail. +Description: The jurisprudence database is intended to be a single source of the human rights recommendations + and findings issued by all above committees in their work on individual cases. It enables the general + public, governments, civil society organizations, United Nations partners and international regional + mechanisms to research the vast body of legal interpretation of international human rights law as + it has evolved over the past years. Tools are provided to search and retrieve search results from + the datbase. URL: https://git.rud.is/hrbrmstr/unjuris BugReports: https://git.rud.is/hrbrmstr/unjuris/issues Encoding: UTF-8 -License: AGPL +License: MIT + file LICENSE Suggests: covr, tinytest Depends: R (>= 3.2.0) Imports: + xml2, httr, - jsonlite + jsonlite, + rvest, + stringi, + magrittr, + stats Roxygen: list(markdown = TRUE) RoxygenNote: 7.0.2 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..49d554e --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2020 +COPYRIGHT HOLDER: Bob Rudis diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..524f6aa --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2020 Bob Rudis + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index 5b4b9ae..969c12d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,16 @@ # Generated by roxygen2: do not edit by hand +export("%>%") +export(get_details) +export(juris_search) import(httr) +import(stringi) importFrom(jsonlite,fromJSON) +importFrom(magrittr,"%>%") +importFrom(rvest,html_attr) +importFrom(rvest,html_node) +importFrom(rvest,html_nodes) +importFrom(rvest,html_table) +importFrom(rvest,html_text) +importFrom(stats,setNames) +importFrom(xml2,read_html) diff --git a/R/aaa.R b/R/aaa.R new file mode 100644 index 0000000..5a932ad --- /dev/null +++ b/R/aaa.R @@ -0,0 +1,4 @@ +httr::user_agent(sprintf( + "unjuris R package %s; ()", + utils::packageVersion("unjuris") +)) -> .UNJURIS_UA \ No newline at end of file diff --git a/R/get-details.R b/R/get-details.R new file mode 100644 index 0000000..f82f362 --- /dev/null +++ b/R/get-details.R @@ -0,0 +1,71 @@ +#' Retrieve details (document links) from a jurisprudence record entry +#' +#' @param detail_id_or_url either a number (e.g. `2606`) or a full URL +#' (i.e. the `detail_url` from a [juris_search()] query result) +#' @return data frame of document links +#' @export +#' @examples +#' xdf <- juris_search(year_start = 2019, year_end = 2020) +#' get_details(xdf$detail_url[1]) +#' get_details(2606) +get_details <- function(detail_id_or_url) { + + detail_id_or_url <- detail_id_or_url[1] + + if (!grepl("^http", detail_id_or_url)) { + detail_id_or_url <- sprintf("https://juris.ohchr.org/Search/Details/%s", detail_id_or_url) + } + + httr::GET( + url = detail_id_or_url, + .UNJURIS_UA + ) -> res + + httr::stop_for_status(res) + + out <- httr::content(res, as = "text", encoding = "UTF-8") + + pg <- read_html(out) + + table_exists <- html_node(pg, "section#download-listings > table") + + if (length(table_exists) == 0) { + + message( + sprintf( + "Please visit %s in your browser as there is no table of documents to return.", + detail_id_or_url + ) + ) + return(data.frame(stringsAsFactors=FALSE)) + + } + + table_exists %>% + html_table() %>% + colnames() %>% + tolower() %>% + trimws() -> detail_cols + + html_nodes(pg, "section#download-listings > table > tbody > tr") %>% + lapply(function(.x) { + + cols <- html_nodes(.x, "td") + + html_nodes(cols[-1], "a") %>% + html_attr("href") -> links + + as.data.frame( + as.list(stats::setNames(c(html_text(cols[1]), links), detail_cols)), + stringsAsFactors = FALSE + ) + + }) -> detail_entries + + detail_entries <- do.call(rbind.data.frame, detail_entries) + + class(detail_entries) <- c("tbl_df", "tbl", "data.frame") + + detail_entries + +} diff --git a/R/juris-search.R b/R/juris-search.R new file mode 100644 index 0000000..9a140fc --- /dev/null +++ b/R/juris-search.R @@ -0,0 +1,131 @@ +#' Search the U.N. Jurisprudence Database +#' +#' @param keyword keyword +#' @param search_type all or any +#' @param year_start,year_end year start/end +#' @param symbol symbol +#' @param communication_number comm # +#' @param session session +#' @param decision_type decision type +#' @references +#' @export +#' @examples +#' juris_search(year_start = 2019, year_end = 2020) +juris_search <- function(keyword = "", + search_type = c("all", "any"), + year_start = format(Sys.Date(), "%Y"), + year_end = format(Sys.Date(), "%Y"), + symbol = "", + communication_number = "", + session = "", + decision_type = c("all", "admissibility", "adoption", "discontinuance", + "inadmissibility", "opinion", "revised") +) { + + srch_trans <- stats::setNames(c("all", "any"), c(0, 1)) + + stats::setNames( + c("all", "admissibility", "adoption", "discontinuance", "inadmissibility", "opinion", "revised"), + c(0, 6, 3, 7, 4, 2, 5) + ) -> dec_trans + + search_type <- match.arg(tolower(trimws(search_type))[1], c("all", "any")) + + match.arg( + tolower(trimws(decision_type))[1], + c("all", "admissibility", "adoption", "discontinuance", "inadmissibility", "opinion", "revised") + ) -> decision_type + + httr::POST( + url = "https://juris.ohchr.org/search/results", + body = list ( + Keyword = keyword[1], + SearchOperatorType = srch_trans[search_type], + AdoptionOfViewYear = year_start[1], + EndAdoptionOfViewYear = year_end[1], + Symbol = symbol[1], + Communication = communication_number[1], + Session = session[1], + TypeOfDecision = dec_trans[decision_type] + ), + encode = "form", + .UNJURIS_UA + ) -> res + + httr::stop_for_status(res) + + out <- httr::content(res, as = "text", encoding = "UTF-8") + + pg <- xml2::read_html(out) + + html_nodes(pg, "section.content") %>% + html_text() -> content_section + + n_pgs <- stri_match_first_regex(content_section, "([[:digit:]]+)[[:space:]]+results found")[,2] + + if (n_pgs == 0) { + message("No results found.") + return(invisible(NULL)) + } + + html_node(pg, "table.results") %>% + html_table() -> first + + html_nodes(pg, "table.results > tbody > tr") %>% + html_attr("data-id") %>% + sprintf("https://juris.ohchr.org/Search/Details/%s", .) -> data_url + + colnames(first) <- gsub(" +", "_", tolower(trimws(colnames(first)))) + + first[["detail_url"]] <- data_url + + content_section %>% + stri_match_all_regex("([[:digit:]]+) results found page ([[:digit:]]+) of ([[:digit:]]+)") %>% + unlist() %>% + .[-1] %>% + as.integer() %>% + stats::setNames(c("total", "cur_pg", "last_pg")) %>% + as.list() -> results_info + + html_node(pg, "ul.pagination > li > a[href]") %>% + html_attr("href") -> results_pattern + + results_pattern <- stri_replace_first_regex(results_pattern, "/([[:digit:]]+)\\?", "/%s?") + + remaining_urls <- paste0("https://juris.ohchr.org", sprintf(results_pattern, 2:results_info$last_pg)) + + lapply(remaining_urls, function(.x) { + + httr::GET( + url = .x, + .UNJURIS_UA + ) -> res + + out <- httr::content(res, as = "text", encoding = "UTF-8") + + pg <- xml2::read_html(out) + + html_node(pg, "table.results") %>% + html_table() -> tmp + + html_nodes(pg,"table.results > tbody > tr") %>% + html_attr("data-id") %>% + sprintf("https://juris.ohchr.org/Search/Details/%s", .) -> data_url + + colnames(tmp) <- gsub(" +", "_", tolower(trimws(colnames(tmp)))) + + tmp[["detail_url"]] <- data_url + + tmp + + }) -> remaining_tbls + + almost_done <- do.call(rbind.data.frame, remaining_tbls) + + out <- rbind.data.frame(first, almost_done) + + class(out) <- c("tbl_df", "tbl", "data.frame") + + out + +} diff --git a/R/unjuris-package.R b/R/unjuris-package.R index 01863ab..26000f2 100644 --- a/R/unjuris-package.R +++ b/R/unjuris-package.R @@ -1,9 +1,19 @@ -#' ... -#' +#' Search the U.N. Jurisprudence Database +#' +#' The jurisprudence database is intended to be a single source of the human rights recommendations +#' and findings issued by all above committees in their work on individual cases. It enables the general +#' public, governments, civil society organizations, United Nations partners and international regional +#' mechanisms to research the vast body of legal interpretation of international human rights law as +#' it has evolved over the past years. Tools are provided to search and retrieve search results from +#' the datbase. +#' #' @md #' @name unjuris #' @keywords internal #' @author Bob Rudis (bob@@rud.is) -#' @import httr +#' @import httr stringi +#' @importFrom xml2 read_html +#' @importFrom rvest html_node html_nodes html_text html_attr html_table #' @importFrom jsonlite fromJSON +#' @importFrom stats setNames "_PACKAGE" diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100644 index 0000000..e79f3d8 --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,11 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +NULL diff --git a/README.Rmd b/README.Rmd index e663ffc..805d596 100644 --- a/README.Rmd +++ b/README.Rmd @@ -39,6 +39,16 @@ packageVersion("unjuris") ``` +```{r u-01} +library(tibble) # for pretty printing + +(xdf <- juris_search(year_start = 2019, year_end = 2020)) + +get_details(xdf$detail_url[10]) + +get_details(2606) +``` + ## unjuris Metrics ```{r cloc, echo=FALSE} diff --git a/README.md b/README.md new file mode 100644 index 0000000..6baf20a --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ + +[![Project Status: Active – The project has reached a stable, usable +state and is being actively +developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) +[![Signed +by](https://img.shields.io/badge/Keybase-Verified-brightgreen.svg)](https://keybase.io/hrbrmstr) +![Signed commit +%](https://img.shields.io/badge/Signed_Commits-100%25-lightgrey.svg) +[![Linux build +Status](https://travis-ci.org/hrbrmstr/unjuris.svg?branch=master)](https://travis-ci.org/hrbrmstr/unjuris) +![Minimal R +Version](https://img.shields.io/badge/R%3E%3D-3.2.0-blue.svg) +![License](https://img.shields.io/badge/License-MIT-blue.svg) + +# unjuris + +Search the U.N. Jurisprudence Database + +## Description + +The jurisprudence database is intended to be a single source of the +human rights recommendations and findings issued by all above committees +in their work on individual cases. It enables the general public, +governments, civil society organizations, United Nations partners and +international regional mechanisms to research the vast body of legal +interpretation of international human rights law as it has evolved over +the past years. Tools are provided to search and retrieve search results +from the datbase. + +## What’s Inside The Tin + +The following functions are implemented: + + - `get_details`: Retrieve details (document links) from a + jurisprudence record entry + - `juris_search`: Search the U.N. Jurisprudence Database + +## Installation + +``` r +remotes::install_git("https://git.rud.is/hrbrmstr/unjuris.git") +# or +remotes::install_git("https://git.sr.ht/~hrbrmstr/unjuris") +# or +remotes::install_gitlab("hrbrmstr/unjuris") +# or +remotes::install_bitbucket("hrbrmstr/unjuris") +``` + +NOTE: To use the ‘remotes’ install options you will need to have the +[{remotes} package](https://github.com/r-lib/remotes) installed. + +## Usage + +``` r +library(unjuris) + +# current version +packageVersion("unjuris") +## [1] '0.1.0' +``` + +``` r +library(tibble) # for pretty printing + +(xdf <- juris_search(year_start = 2019, year_end = 2020)) +## # A tibble: 61 x 10 +## display_name treaties countries symbols date_of_adoptio… issues articles communications type_of_decisio… detail_url +## +## 1 A.B. CRC Spain CRC/C/… 07 Feb 2020 "admi… CRC-12C… 024/2017 Adoption of vie… https://j… +## 2 N.R. CRC Paraguay CRC/C/… 03 Feb 2020 "admi… CRC-10-… 030/2017 Adoption of vie… https://j… +## 3 Natalia Ciob… CEDAW Republic … CEDAW/… 04 Nov 2019 "disc… 11(1)(E… 104/2016 Adoption of vie… https://j… +## 4 El Hasnaoui … CESCR Spain E/C.12… 22 Oct 2019 "hous… CESCR-1… 060/2018 Discontinuance … https://j… +## 5 López Albán … CESCR Spain E/C.12… 11 Oct 2019 "admi… CESCR-1… 037/2018 Adoption of vie… https://j… +## 6 S. S. R. CESCR Spain E/C.12… 11 Oct 2019 "admi… CESCR-1… 051/2018 Inadmissibility… https://j… +## 7 M. L. B. CESCR Luxembourg E/C.12… 11 Oct 2019 "admi… CESCR-8… 020/2017 Inadmissibility… https://j… +## 8 M. T. et al CESCR Spain E/C.12… 11 Oct 2019 "" CESCR-1… 110/2019 Discontinuance … https://j… +## 9 M. P. y otros CESCR Spain E/C.12… 11 Oct 2019 "hous… CESCR-1… 096/2019 Discontinuance … https://j… +## 10 Z. P. y otros CESCR Spain E/C.12… 11 Oct 2019 "hous… CESCR-1… 043/2018 Discontinuance … https://j… +## # … with 51 more rows + +get_details(xdf$detail_url[10]) +## # A tibble: 6 x 5 +## language doc docx pdf html +## +## 1 English http://docstore.ohchr.org/… http://docstore.ohchr.org/… http://docstore.ohchr.org… http://docstore.ohchr.org… +## 2 Français http://docstore.ohchr.org/… http://docstore.ohchr.org/… http://docstore.ohchr.org… http://docstore.ohchr.org… +## 3 Español http://docstore.ohchr.org/… http://docstore.ohchr.org/… http://docstore.ohchr.org… http://docstore.ohchr.org… +## 4 العربية http://docstore.ohchr.org/… http://docstore.ohchr.org/… http://docstore.ohchr.org… http://docstore.ohchr.org… +## 5 中文 http://docstore.ohchr.org/… http://docstore.ohchr.org/… http://docstore.ohchr.org… http://docstore.ohchr.org… +## 6 русский http://docstore.ohchr.org/… http://docstore.ohchr.org/… http://docstore.ohchr.org… http://docstore.ohchr.org… + +get_details(2606) +## # A tibble: 3 x 5 +## language doc docx pdf html +## +## 1 English http://docstore.ohchr.org/… http://docstore.ohchr.org/… http://docstore.ohchr.org… http://docstore.ohchr.org… +## 2 Español http://docstore.ohchr.org/… http://docstore.ohchr.org/… http://docstore.ohchr.org… http://docstore.ohchr.org… +## 3 中文 http://docstore.ohchr.org/… http://docstore.ohchr.org/… http://docstore.ohchr.org… http://docstore.ohchr.org… +``` + +## unjuris Metrics + +| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) | +| :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: | +| R | 6 | 0.86 | 134 | 0.92 | 56 | 0.75 | 51 | 0.63 | +| Rmd | 1 | 0.14 | 12 | 0.08 | 19 | 0.25 | 30 | 0.37 | + +## Code of Conduct + +Please note that this project is released with a Contributor Code of +Conduct. By participating in this project you agree to abide by its +terms. diff --git a/man/get_details.Rd b/man/get_details.Rd new file mode 100644 index 0000000..c8a2a08 --- /dev/null +++ b/man/get_details.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-details.R +\name{get_details} +\alias{get_details} +\title{Retrieve details (document links) from a jurisprudence record entry} +\usage{ +get_details(detail_id_or_url) +} +\arguments{ +\item{detail_id_or_url}{either a number (e.g. \code{2606}) or a full URL +(i.e. the \code{detail_url} from a \code{\link[=juris_search]{juris_search()}} query result)} +} +\value{ +data frame of document links +} +\description{ +Retrieve details (document links) from a jurisprudence record entry +} +\examples{ +xdf <- juris_search(year_start = 2019, year_end = 2020) +get_details(xdf$detail_url[1]) +get_details(2606) +} diff --git a/man/juris_search.Rd b/man/juris_search.Rd new file mode 100644 index 0000000..d0e5f47 --- /dev/null +++ b/man/juris_search.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/juris-search.R +\name{juris_search} +\alias{juris_search} +\title{Search the U.N. Jurisprudence Database} +\usage{ +juris_search( + keyword = "", + search_type = c("all", "any"), + year_start = format(Sys.Date(), "\%Y"), + year_end = format(Sys.Date(), "\%Y"), + symbol = "", + communication_number = "", + session = "", + decision_type = c("all", "admissibility", "adoption", "discontinuance", + "inadmissibility", "opinion", "revised") +) +} +\arguments{ +\item{keyword}{keyword} + +\item{search_type}{all or any} + +\item{year_start, year_end}{year start/end} + +\item{symbol}{symbol} + +\item{communication_number}{comm #} + +\item{session}{session} + +\item{decision_type}{decision type} +} +\description{ +Search the U.N. Jurisprudence Database +} +\examples{ +juris_search(year_start = 2019, year_end = 2020) +} +\references{ +\url{https://juris.ohchr.org/search/documents} +} diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000..0eec752 --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-pipe.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\description{ +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +} +\keyword{internal} diff --git a/man/unjuris.Rd b/man/unjuris.Rd index d3ac31c..a7f61fa 100644 --- a/man/unjuris.Rd +++ b/man/unjuris.Rd @@ -4,9 +4,14 @@ \name{unjuris} \alias{unjuris} \alias{unjuris-package} -\title{...} +\title{Search the U.N. Jurisprudence Database} \description{ -A good description goes here otherwise CRAN checks fail. +The jurisprudence database is intended to be a single source of the human rights recommendations +and findings issued by all above committees in their work on individual cases. It enables the general +public, governments, civil society organizations, United Nations partners and international regional +mechanisms to research the vast body of legal interpretation of international human rights law as +it has evolved over the past years. Tools are provided to search and retrieve search results from +the datbase. } \seealso{ Useful links: