Browse Source
fix for tidyverse upgrade introduced errors
tags/v0.6.1
No known key found for this signature in database
GPG Key ID: 1D7529BE14E2BBA9
10 changed files with
44 additions and
28 deletions
-
DESCRIPTION
-
NAMESPACE
-
NEWS.md
-
R/comments.R
-
R/describe.r
-
R/docx-extract-tbl.r
-
R/docxtractr-package.r
-
cran-comments.md
-
man/docx_extract_all.Rd
-
man/docx_extract_tbl.Rd
|
|
@ -1,6 +1,6 @@ |
|
|
|
Package: docxtractr |
|
|
|
Title: Extract Data Tables and Comments from 'Microsoft' 'Word' Documents |
|
|
|
Version: 0.6.0 |
|
|
|
Version: 0.6.1 |
|
|
|
Authors@R: c( |
|
|
|
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), |
|
|
|
comment = c(ORCID = "0000-0001-5670-2640")), |
|
|
@ -14,6 +14,8 @@ Description: 'Microsoft Word' 'docx' files provide an 'XML' structure that is fa |
|
|
|
comments. Tools are provided to determine table count/structure, comment count |
|
|
|
and also to extract/clean tables and comments from 'Microsoft Word' 'docx' documents. |
|
|
|
There is also nascent support for '.doc' files. |
|
|
|
SystemRequirements: LibreOffice (<https://www.libreoffice.org/>) required to extract |
|
|
|
data from .doc files. |
|
|
|
URL: http://gitlab.com/hrbrmstr/docxtractr |
|
|
|
BugReports: https://gitlab.com/hrbrmstr/docxtractr/issues |
|
|
|
Encoding: UTF-8 |
|
|
@ -27,12 +29,9 @@ Suggests: |
|
|
|
Imports: |
|
|
|
tools, |
|
|
|
xml2, |
|
|
|
tibble, |
|
|
|
purrr, |
|
|
|
dplyr, |
|
|
|
utils, |
|
|
|
httr, |
|
|
|
magrittr |
|
|
|
RoxygenNote: 6.0.1.9000 |
|
|
|
SystemRequirements: LibreOffice (<https://www.libreoffice.org/>) required to extract |
|
|
|
data from .doc files. |
|
|
|
RoxygenNote: 6.1.1 |
|
|
|
|
|
@ -27,9 +27,6 @@ importFrom(purrr,map_chr) |
|
|
|
importFrom(purrr,map_df) |
|
|
|
importFrom(purrr,map_int) |
|
|
|
importFrom(purrr,map_lgl) |
|
|
|
importFrom(tibble,as_data_frame) |
|
|
|
importFrom(tibble,as_tibble) |
|
|
|
importFrom(tibble,data_frame) |
|
|
|
importFrom(tools,file_ext) |
|
|
|
importFrom(utils,globalVariables) |
|
|
|
importFrom(utils,unzip) |
|
|
|
|
|
@ -1,3 +1,7 @@ |
|
|
|
# 0.6.1 |
|
|
|
|
|
|
|
- Fix for errors introduced by an update of the tidyverse |
|
|
|
|
|
|
|
# 0.6.0 |
|
|
|
|
|
|
|
- Enable support for accepting or rejecting tracked changes when |
|
|
|
|
|
@ -14,14 +14,21 @@ |
|
|
|
docx_extract_all_cmnts <- function(docx, include_text=FALSE) { |
|
|
|
|
|
|
|
ensure_docx(docx) |
|
|
|
if (docx_cmnt_count(docx) < 1) return(tibble::data_frame()) |
|
|
|
if (docx_cmnt_count(docx) < 1) { |
|
|
|
out <- data.frame(stringsAsFactors=FALSE) |
|
|
|
class(out) <- c("tbl_df", "tbl", "data.frame") |
|
|
|
return(out) |
|
|
|
} |
|
|
|
|
|
|
|
ns <- docx$ns |
|
|
|
|
|
|
|
comments <- docx$cmnts |
|
|
|
|
|
|
|
purrr::map_df(xml2::xml_attrs(comments), function(x) { |
|
|
|
tibble::as_data_frame(t(cbind.data.frame(x, stringsAsFactors=FALSE))) |
|
|
|
as.data.frame( |
|
|
|
t(cbind.data.frame(x, stringsAsFactors=FALSE)), |
|
|
|
stringsAsFactors=FALSE |
|
|
|
) |
|
|
|
}) -> meta |
|
|
|
|
|
|
|
dplyr::bind_cols( |
|
|
@ -49,6 +56,7 @@ docx_extract_all_cmnts <- function(docx, include_text=FALSE) { |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
tibble::as_tibble(out) |
|
|
|
class(out) <- c("tbl_df", "tbl", "data.frame") |
|
|
|
out |
|
|
|
|
|
|
|
} |
|
|
|
|
|
@ -85,7 +85,10 @@ docx_describe_cmnts <- function(docx) { |
|
|
|
cat(sprintf("Found %d comments.\n", length(cmnts))) |
|
|
|
|
|
|
|
purrr::map_df(xml_attrs(cmnts), function(x) { |
|
|
|
as_data_frame(t(cbind.data.frame(x, stringsAsFactors=FALSE))) |
|
|
|
as.data.frame( |
|
|
|
t(cbind.data.frame(x, stringsAsFactors=FALSE)), |
|
|
|
stringsAsFactors = FALSE |
|
|
|
) |
|
|
|
}) -> meta |
|
|
|
|
|
|
|
cmnt_df <- dplyr::bind_cols(meta, |
|
|
|
|
|
@ -52,7 +52,8 @@ docx_extract_tbl <- function(docx, tbl_number=1, header=TRUE, preserve=FALSE, tr |
|
|
|
}) -> dat |
|
|
|
|
|
|
|
if (header) { |
|
|
|
colnames(dat) <- dat[1,] |
|
|
|
hopeful_names <- make.names(dat[1,]) |
|
|
|
colnames(dat) <- hopeful_names |
|
|
|
dat <- dat[-1,] |
|
|
|
} else { |
|
|
|
hdr <- has_header(tbl, rows, ns) |
|
|
@ -63,6 +64,7 @@ docx_extract_tbl <- function(docx, tbl_number=1, header=TRUE, preserve=FALSE, tr |
|
|
|
|
|
|
|
rownames(dat) <- NULL |
|
|
|
|
|
|
|
tibble::as_tibble(dat) |
|
|
|
class(dat) <- c("tbl_df", "tbl", "data.frame") |
|
|
|
dat |
|
|
|
|
|
|
|
} |
|
|
|
|
|
@ -11,7 +11,6 @@ |
|
|
|
#' |
|
|
|
#' @author Bob Rudis (bob@@rud.is) |
|
|
|
#' @importFrom xml2 xml_find_all xml_text xml_ns xml_find_first xml_attrs read_xml |
|
|
|
#' @importFrom tibble data_frame as_data_frame as_tibble |
|
|
|
#' @importFrom dplyr bind_cols count arrange select |
|
|
|
#' @importFrom tools file_ext |
|
|
|
#' @importFrom utils unzip globalVariables |
|
|
|
|
|
@ -1,19 +1,22 @@ |
|
|
|
## Test environments |
|
|
|
* local OS X install, R 3.5.1 |
|
|
|
* ubuntu 14.04 (on travis-ci), R 3.5.1 |
|
|
|
|
|
|
|
* local OS X install, R 3.5.2 |
|
|
|
* ubuntu 14.04 (on travis-ci), R 3.5.2 |
|
|
|
* r-hub (fedora & windows) |
|
|
|
* win-builder (devel and release) |
|
|
|
|
|
|
|
## R CMD check results |
|
|
|
|
|
|
|
0 errors | 0 warnings | 0 note |
|
|
|
|
|
|
|
* This is a maintenance release |
|
|
|
0 errors | 0 warnings | 1 note |
|
|
|
|
|
|
|
## Reverse dependencies |
|
|
|
|
|
|
|
None. |
|
|
|
* checking CRAN incoming feasibility ... NOTE |
|
|
|
|
|
|
|
--- |
|
|
|
|
|
|
|
This is an update with a few new API calls and support |
|
|
|
for .doc files. |
|
|
|
This is an update to fix the errors introduced by the |
|
|
|
recent tidyverse update as noted by Kurt on 2018-01-05 |
|
|
|
(https://cran.r-project.org/web/checks/check_results_docxtractr.html) |
|
|
|
|
|
|
|
The code has been modified to account for the new |
|
|
|
behavior of the tidyverse. All tests and examples have been |
|
|
|
left intact. |
|
|
|
|
|
@ -4,7 +4,8 @@ |
|
|
|
\alias{docx_extract_all} |
|
|
|
\title{Extract all tables from a Word document} |
|
|
|
\usage{ |
|
|
|
docx_extract_all(docx, guess_header = TRUE, preserve = FALSE, trim = TRUE) |
|
|
|
docx_extract_all(docx, guess_header = TRUE, preserve = FALSE, |
|
|
|
trim = TRUE) |
|
|
|
} |
|
|
|
\arguments{ |
|
|
|
\item{docx}{\code{docx} object read with \code{read_docx}} |
|
|
|
|
|
@ -4,8 +4,8 @@ |
|
|
|
\alias{docx_extract_tbl} |
|
|
|
\title{Extract a table from a Word document} |
|
|
|
\usage{ |
|
|
|
docx_extract_tbl(docx, tbl_number = 1, header = TRUE, preserve = FALSE, |
|
|
|
trim = TRUE) |
|
|
|
docx_extract_tbl(docx, tbl_number = 1, header = TRUE, |
|
|
|
preserve = FALSE, trim = TRUE) |
|
|
|
} |
|
|
|
\arguments{ |
|
|
|
\item{docx}{\code{docx} object read with \code{read_docx}} |
|
|
|