Browse Source

initial commit

pull/2/head
Bob Rudis 9 years ago
parent
commit
1154ffc54d
  1. 6
      .Rbuildignore
  2. 14
      .travis.yml
  3. 25
      CONDUCT.md
  4. 11
      DESCRIPTION
  5. 16
      NAMESPACE
  6. 65
      R/describe.r
  7. 62
      R/docx_find_tbls.r
  8. 14
      R/docxtractr-package.r
  9. 49
      R/read_docs.r
  10. 30
      R/utils.r
  11. 122
      README.Rmd
  12. 219
      README.md
  13. BIN
      docxtractr-logo.png
  14. 9
      docxtractr.Rproj
  15. BIN
      inst/examples/complex.docx
  16. BIN
      inst/examples/data.docx
  17. BIN
      inst/examples/data3.docx
  18. BIN
      inst/examples/none.docx
  19. 21
      man/docx_describe_tbls.Rd
  20. 30
      man/docx_extract_tbl.Rd
  21. 22
      man/docx_tbl_count.Rd
  22. 17
      man/docxtractr.Rd
  23. 17
      man/print.docx.Rd
  24. 19
      man/read_docx.Rd
  25. 4
      tests/testthat.R
  26. 6
      tests/testthat/test-docxtractr.R

6
.Rbuildignore

@ -1,2 +1,8 @@
^.*\.Rproj$
^\.Rproj\.user$
^README\.Rmd$
^README-.*\.png$
^\.travis\.yml$
^CONDUCT\.md$
^README\.md$
^docxtractr-logo\.png$

14
.travis.yml

@ -0,0 +1,14 @@
# Sample .travis.yml for R projects
language: r
warnings_are_errors: true
sudo: required
env:
global:
- CRAN: http://cran.rstudio.com
notifications:
email:
on_success: change
on_failure: change

25
CONDUCT.md

@ -0,0 +1,25 @@
# Contributor Code of Conduct
As contributors and maintainers of this project, we pledge to respect all people who
contribute through reporting issues, posting feature requests, updating documentation,
submitting pull requests or patches, and other activities.
We are committed to making participation in this project a harassment-free experience for
everyone, regardless of level of experience, gender, gender identity and expression,
sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
Examples of unacceptable behavior by participants include the use of sexual language or
imagery, derogatory comments or personal attacks, trolling, public or private harassment,
insults, or other unprofessional conduct.
Project maintainers have the right and responsibility to remove, edit, or reject comments,
commits, code, wiki edits, issues, and other contributions that are not aligned to this
Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed
from the project team.
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by
opening an issue or contacting one or more of the project maintainers.
This Code of Conduct is adapted from the Contributor Covenant
(http:contributor-covenant.org), version 1.0.0, available at
http://contributor-covenant.org/version/1/0/0/

11
DESCRIPTION

@ -1,8 +1,13 @@
Package: docxtractr
Title: What the Package Does (one line, title case)
Title: Extract Tables from Microsoft Word Documents
Version: 0.0.0.9000
Authors@R: c(person("Bob", "Rudis", email = "bob@rudis.net", role = c("aut", "cre")))
Description: What the package does (one paragraph).
Depends: R (>= 3.2.2)
Description: Microsoft Word docx files provide an XML structure that is fairly
straightforward to navigate, especially when it applies to Word tables. The
docxtractr package provides tools to determine table count, table structure and
extract tables from Microsoft Word docx documents.
Depends: R (>= 3.0.0)
License: MIT + file LICENSE
LazyData: true
Suggests: testthat
Imports: tools, xml2, dplyr

16
NAMESPACE

@ -1 +1,15 @@
exportPattern("^[^\\.]")
# Generated by roxygen2 (4.1.1): do not edit by hand
S3method(print,docx)
export(docx_describe_tbls)
export(docx_extract_tbl)
export(docx_tbl_count)
export(read_docx)
importFrom(dplyr,bind_rows)
importFrom(tools,file_ext)
importFrom(xml2,read_xml)
importFrom(xml2,xml_attrs)
importFrom(xml2,xml_find_all)
importFrom(xml2,xml_find_one)
importFrom(xml2,xml_ns)
importFrom(xml2,xml_text)

65
R/describe.r

@ -0,0 +1,65 @@
#' Returns a description of all the tables in the Word document
#'
#' This function will attempt to discern the structure of each of the tables
#' in \code{docx} and print this information
#'
#' @param docx \code{docx} object read with \code{read_docx}
#' @export
#' @examples
#' complx <- read_docx(system.file("examples/complex.docx", package="docxtractr"))
#' docx_tbl_count(complx)
#' docx_describe_tbls(complx)
docx_describe_tbls <- function(docx) {
ensure_docx(docx)
if (!docx_tbl_count(docx) > 0) stop("No tables in document", call.=FALSE)
ns <- docx$ns
tbls <- docx$tbls
cat(sprintf("Word document [%s]\n\n", docx$path))
for (i in 1:length(tbls)) {
tbl <- tbls[[i]]
cells <- xml_find_all(tbl, "./w:tr/w:tc", ns=ns)
rows <- xml_find_all(tbl, "./w:tr", ns=ns)
cell_count_by_row <- sapply(rows, function(row) { length(xml_find_all(row, "./w:tc", ns)) })
row_counts <- paste0(unique(cell_count_by_row), collapse=", ")
max_cell_count <- max(cell_count_by_row)
cat(sprintf("Table %d\n total cells: %d\n row count : %d\n", i, length(cells), length(rows)))
# simplistic test for whether table is uniform rows x cells == cell count
if ((max_cell_count * length(rows)) == length(cells)) {
cat(" uniform : likely!\n")
} else {
cat(sprintf(" uniform : unlikely => found differing cell counts (%s) across some rows \n", row_counts))
}
# microsoft has a tag for some table structure info. examine it to
# see if the creator of the header made the first row special which
# will likely mean it's a header candidate
hdr <- has_header(tbl, rows, ns)
if (is.na(hdr)) {
cat(" has header : unlikely\n")
} else {
cat(sprintf(" has header : likely! => possibly [%s]\n", hdr))
}
cat("\n")
}
}
#' Display information about the document
#'
#' @param x \code{docx} object
#' @param ... ignored
#' @export
print.docx <- function(x, ...) {
docx_describe_tbls(x)
}

62
R/docx_find_tbls.r

@ -0,0 +1,62 @@
#' Extract a table from a Word document
#'
#' Given a document read with \code{read_docx} and a table to extract (optionally
#' indicating whether there was a header or not and if cell whitepace trimming is
#' desired) extract the contents of the table to a \code{data.frame}.
#'
#' @param docx \code{docx} object read with \code{read_docx}
#' @param tbl_number which table to extract (defaults to \code{1})
#' @param header assume first row of table is a header row? (default; \code{TRUE})
#' @param trim trim leading/trailing whitespace (if any) in cells? (default: \code{TRUE})
#' @return \code{data.frame}
#' @export
#' @examples
#' doc3 <- read_docx(system.file("examples/data3.docx", package="docxtractr"))
#' docx_extract_tbl(doc3, 3)
docx_extract_tbl <- function(docx, tbl_number=1, header=TRUE, trim=TRUE) {
ensure_docx(docx)
if ((tbl_number < 1) | (tbl_number > docx_tbl_count(docx))) {
stop("'tbl_number' is invalid.", call.=FALSE)
}
ns <- docx$ns
tbl <- docx$tbls[[tbl_number]]
cells <- xml_find_all(tbl, "./w:tr/w:tc", ns=ns)
rows <- xml_find_all(tbl, "./w:tr", ns=ns)
bind_rows(lapply(rows, function(row) {
vals <- xml_text(xml_find_all(row, "./w:tc", ns=ns), trim=trim)
names(vals) <- sprintf("V%d", 1:length(vals))
data.frame(as.list(vals), stringsAsFactors=FALSE)
})) -> dat
if (header) {
colnames(dat) <- dat[1,]
dat <- dat[-1,]
} else {
hdr <- has_header(tbl, rows, ns)
if (!is.na(hdr)) message("NOTE: header=FALSE but table has a marked header row in the Word document")
}
rownames(dat) <- NULL
dat
}
#' Get number of tables in a Word document
#'
#' @param docx \code{docx} object read with \code{read_docx}
#' @return numeric
#' @export
#' @examples
#' complx <- read_docx(system.file("examples/complex.docx", package="docxtractr"))
#' docx_tbl_count(complx)
docx_tbl_count <- function(docx) {
ensure_docx(docx)
length(docx$tbls)
}

14
R/docxtractr-package.r

@ -0,0 +1,14 @@
#' docxtractr is an R pacakge for extracting tables out of Word documents (docx)
#'
#' Microsoft Word docx files provide an XML structure that is fairly
#' straightforward to navigate, especially when it applies to Word tables. The
#' docxtractr package provides tools to determine table count, table structure and
#' extract tables from Microsoft Word docx documents.
#'
#' @name docxtractr
#' @docType package
#'
#' @author Bob Rudis (@@hrbrmstr)
#' @importFrom xml2 xml_find_all xml_text xml_ns xml_find_one xml_attrs
#' @importFrom dplyr bind_rows
NULL

49
R/read_docs.r

@ -0,0 +1,49 @@
#' Read in a Word document for table extraction
#'
#' Path must be local (i.e. not a URL)
#'
#' @param path path to the Word document
#' @importFrom xml2 read_xml
#' @importFrom tools file_ext
#' @export
#' @examples
#' doc <- read_docx(system.file("examples/data.docx", package="docxtractr"))
#' class(doc)
read_docx <- function(path) {
path <- path.expand(path)
if (!file_ext(path) == "docx") stop("read_docx only works with '.docx' files", call.=FALSE)
if (!file.exists(path)) stop(sprintf("Cannot find '%s'", path), call.=FALSE)
# make temporary things for us to work with
tmpd <- tempdir()
tmpf <- tempfile(tmpdir=tmpd, fileext=".zip")
# copy docx to zip (not entirely necessary)
file.copy(path, tmpf)
# unzip it
unzip(tmpf, exdir=sprintf("%s/docdata", tmpd))
# read the actual XML document
doc <- read_xml(sprintf("%s/docdata/word/document.xml", tmpd))
# cleanup
unlink(tmpf)
unlink(sprintf("%s/docdata", tmpd), recursive=TRUE)
# extract the namespace
ns <- xml_ns(doc)
# get the tables
tbls <- xml_find_all(doc, ".//w:tbl", ns=ns)
# make an object for other functions to work with
docx <- list(docx=doc, ns=ns, tbls=tbls, path=path)
# special class helps us work with these things
class(docx) <- "docx"
docx
}

30
R/utils.r

@ -0,0 +1,30 @@
# used by cuntions to make sure they are working with a well-formed docx object
ensure_docx <- function(docx) {
if (!inherits(docx, "docx")) stop("Must pass in a 'docx' object", call.=FALSE)
if (!(all(sapply(c("docx", "ns", "tbls", "path"), exists, where=docx))))
stop("'docx' object missing necessary components", call.=FALSE)
}
# test if a w:tbl has a header row
has_header <- function(tbl, rows, ns) {
# microsoft has a tag for some table structure info. examine it to
# see if the creator of the header made the first row special which
# will likely mean it's a header candidate
look <- try(xml_find_one(tbl, "./w:tblPr/w:tblLook", ns), silent=TRUE)
if (inherits(look, "try-error")) {
return(NA)
} else {
look_attr <- xml_attrs(look)
if ("firstRow" %in% names(look_attr)) {
if (look_attr["firstRow"] == "0") {
return(NA)
} else {
return(paste0(xml_text(xml_find_all(rows[[1]], "./w:tc", ns)), collapse=", "))
}
} else {
return(NA)
}
}
}

122
README.Rmd

@ -0,0 +1,122 @@
---
output:
md_document:
variant: markdown_github
---
<!-- README.md is generated from README.Rmd. Please edit that file -->
```{r, echo = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "README-"
)
```
![](docxtractr-logo.png)
docxtractr is an R pacakge for extracting tables out of Word documents (docx)
Microsoft Word docx files provide an XML structure that is fairly
straightforward to navigate, especially when it applies to Word tables. The
docxtractr package provides tools to determine table count, table structure and
extract tables from Microsoft Word docx documents.
The following functions are implemented:
- `read_docx`: Read in a Word document for table extraction
- `docx_describe_tbls`: Returns a description of all the tables in the Word document
- `docx_extract_tbl`: Extract a table from a Word document
- `docx_tbl_count`: Get number of tables in a Word document
The following data file are included:
- `system.file("examples/data.docx", package="docxtractr")`: Word docx with 1 table
- `system.file("examples/data3.docx", package="docxtractr")`: Word docx with 3 tables
- `system.file("examples/none.docx", package="docxtractr")`: Word docx with 0 tables
- `system.file("examples/complex.docx", package="docxtractr")`: Word docx with non-uniform tables
### News
- Version 0.0.0.9000 released
### Installation
```{r eval=FALSE}
devtools::install_github("hrbrmstr/docxtractr")
```
```{r echo=FALSE}
options(width=120)
```
### Usage
```{r sample}
library(docxtractr)
# current verison
packageVersion("docxtractr")
# one table
doc <- read_docx(system.file("examples/data.docx", package="docxtractr"))
docx_tbl_count(doc)
docx_describe_tbls(doc)
docx_extract_tbl(doc, 1)
docx_extract_tbl(doc)
docx_extract_tbl(doc, header=FALSE)
# three tables
doc3 <- read_docx(system.file("examples/data3.docx", package="docxtractr"))
docx_extract_tbl(doc3, 3)
docx_tbl_count(doc3)
docx_describe_tbls(doc3)
# no tables
none <- read_docx(system.file("examples/none.docx", package="docxtractr"))
docx_tbl_count(none)
# wrapping in try since it will return an error
# use docx_tbl_count before trying to extract in scripts/production
try(docx_describe_tbls(none))
try(docx_extract_tbl(none, 2))
# 5 tables, with two in sketchy formats
complx <- read_docx(system.file("examples/complex.docx", package="docxtractr"))
docx_tbl_count(complx)
docx_describe_tbls(complx)
docx_extract_tbl(complx, 3, header=TRUE)
docx_extract_tbl(complx, 4, header=TRUE)
docx_extract_tbl(complx, 5, header=TRUE)
```
### Test Results
```{r}
library(docxtractr)
library(testthat)
date()
test_dir("tests/")
```
### Code of Conduct
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md).
By participating in this project you agree to abide by its terms.

219
README.md

@ -0,0 +1,219 @@
<!-- README.md is generated from README.Rmd. Please edit that file -->
![](docxtractr-logo.png)
docxtractr is an R pacakge for extracting tables out of Word documents (docx)
Microsoft Word docx files provide an XML structure that is fairly straightforward to navigate, especially when it applies to Word tables. The docxtractr package provides tools to determine table count, table structure and extract tables from Microsoft Word docx documents.
The following functions are implemented:
- `read_docx`: Read in a Word document for table extraction
- `docx_describe_tbls`: Returns a description of all the tables in the Word document
- `docx_extract_tbl`: Extract a table from a Word document
- `docx_tbl_count`: Get number of tables in a Word document
The following data file are included:
- `system.file("examples/data.docx", package="docxtractr")`: Word docx with 1 table
- `system.file("examples/data3.docx", package="docxtractr")`: Word docx with 3 tables
- `system.file("examples/none.docx", package="docxtractr")`: Word docx with 0 tables
- `system.file("examples/complex.docx", package="docxtractr")`: Word docx with non-uniform tables
### News
- Version 0.0.0.9000 released
### Installation
``` r
devtools::install_github("hrbrmstr/docxtractr")
```
### Usage
``` r
library(docxtractr)
# current verison
packageVersion("docxtractr")
#> [1] '0.0.0.9000'
# one table
doc <- read_docx(system.file("examples/data.docx", package="docxtractr"))
docx_tbl_count(doc)
#> [1] 1
docx_describe_tbls(doc)
#> Word document [/Library/Frameworks/R.framework/Versions/3.2/Resources/library/docxtractr/examples/data.docx]
#>
#> Table 1
#> total cells: 16
#> row count : 4
#> uniform : likely!
#> has header : likely! => possibly [This, Is, A, Column]
docx_extract_tbl(doc, 1)
#> Source: local data frame [3 x 4]
#>
#> This Is A Column
#> 1 1 Cat 3.4 Dog
#> 2 3 Fish 100.3 Bird
#> 3 5 Pelican -99 Kangaroo
docx_extract_tbl(doc)
#> Source: local data frame [3 x 4]
#>
#> This Is A Column
#> 1 1 Cat 3.4 Dog
#> 2 3 Fish 100.3 Bird
#> 3 5 Pelican -99 Kangaroo
docx_extract_tbl(doc, header=FALSE)
#> NOTE: header=FALSE but table has a marked header row in the Word document
#> Source: local data frame [4 x 4]
#>
#> V1 V2 V3 V4
#> 1 This Is A Column
#> 2 1 Cat 3.4 Dog
#> 3 3 Fish 100.3 Bird
#> 4 5 Pelican -99 Kangaroo
# three tables
doc3 <- read_docx(system.file("examples/data3.docx", package="docxtractr"))
docx_extract_tbl(doc3, 3)
#> Source: local data frame [6 x 2]
#>
#> Foo Bar
#> 1 Aa Bb
#> 2 Dd Ee
#> 3 Gg Hh
#> 4 1 2
#> 5 Zz Jj
#> 6 Tt ii
docx_tbl_count(doc3)
#> [1] 3
docx_describe_tbls(doc3)
#> Word document [/Library/Frameworks/R.framework/Versions/3.2/Resources/library/docxtractr/examples/data3.docx]
#>
#> Table 1
#> total cells: 16
#> row count : 4
#> uniform : likely!
#> has header : likely! => possibly [This, Is, A, Column]
#>
#> Table 2
#> total cells: 12
#> row count : 4
#> uniform : likely!
#> has header : likely! => possibly [Foo, Bar, Baz]
#>
#> Table 3
#> total cells: 14
#> row count : 7
#> uniform : likely!
#> has header : likely! => possibly [Foo, Bar]
# no tables
none <- read_docx(system.file("examples/none.docx", package="docxtractr"))
docx_tbl_count(none)
#> [1] 0
# wrapping in try since it will return an error
# use docx_tbl_count before trying to extract in scripts/production
try(docx_describe_tbls(none))
try(docx_extract_tbl(none, 2))
# 5 tables, with two in sketchy formats
complx <- read_docx(system.file("examples/complex.docx", package="docxtractr"))
docx_tbl_count(complx)
#> [1] 5
docx_describe_tbls(complx)
#> Word document [/Library/Frameworks/R.framework/Versions/3.2/Resources/library/docxtractr/examples/complex.docx]
#>
#> Table 1
#> total cells: 16
#> row count : 4
#> uniform : likely!
#> has header : likely! => possibly [This, Is, A, Column]
#>
#> Table 2
#> total cells: 12
#> row count : 4
#> uniform : likely!
#> has header : likely! => possibly [Foo, Bar, Baz]
#>
#> Table 3
#> total cells: 14
#> row count : 7
#> uniform : likely!
#> has header : likely! => possibly [Foo, Bar]
#>
#> Table 4
#> total cells: 11
#> row count : 4
#> uniform : unlikely => found differing cell counts (3, 2) across some rows
#> has header : likely! => possibly [Foo, Bar, Baz]
#>
#> Table 5
#> total cells: 21
#> row count : 7
#> uniform : likely!
#> has header : unlikely
docx_extract_tbl(complx, 3, header=TRUE)
#> Source: local data frame [6 x 2]
#>
#> Foo Bar
#> 1 Aa Bb
#> 2 Dd Ee
#> 3 Gg Hh
#> 4 1 2
#> 5 Zz Jj
#> 6 Tt ii
docx_extract_tbl(complx, 4, header=TRUE)
#> Source: local data frame [3 x 3]
#>
#> Foo Bar Baz
#> 1 Aa BbCc NA
#> 2 Dd Ee Ff
#> 3 Gg Hh ii
docx_extract_tbl(complx, 5, header=TRUE)
#> Source: local data frame [6 x 3]
#>
#> Foo Bar Baz
#> 1 Aa Bb Cc
#> 2 Dd Ee Ff
#> 3 Gg Hh Ii
#> 4 Jj88 Kk Ll
#> 5 Uu Ii
#> 6 Hh Ii h
```
### Test Results
``` r
library(docxtractr)
library(testthat)
date()
#> [1] "Mon Aug 24 13:36:23 2015"
test_dir("tests/")
#> testthat results ========================================================================================================
#> OK: 0 SKIPPED: 0 FAILED: 0
#>
#> DONE
```
### Code of Conduct
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.

BIN
docxtractr-logo.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

9
docxtractr.Rproj

@ -14,3 +14,12 @@ BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
UseSpacesForTab: Yes
NumSpacesForTab: 2
RnwWeave: Sweave
LaTeX: pdfLaTeX
PackageBuildArgs: --resave-data
PackageCheckArgs: --as-cran

BIN
inst/examples/complex.docx

Binary file not shown.

BIN
inst/examples/data.docx

Binary file not shown.

BIN
inst/examples/data3.docx

Binary file not shown.

BIN
inst/examples/none.docx

Binary file not shown.

21
man/docx_describe_tbls.Rd

@ -0,0 +1,21 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/describe.r
\name{docx_describe_tbls}
\alias{docx_describe_tbls}
\title{Returns a description of all the tables in the Word document}
\usage{
docx_describe_tbls(docx)
}
\arguments{
\item{docx}{\code{docx} object read with \code{read_docx}}
}
\description{
This function will attempt to discern the structure of each of the tables
in \code{docx} and print this information
}
\examples{
complx <- read_docx(system.file("examples/complex.docx", package="docxtractr"))
docx_tbl_count(complx)
docx_describe_tbls(complx)
}

30
man/docx_extract_tbl.Rd

@ -0,0 +1,30 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/docx_find_tbls.r
\name{docx_extract_tbl}
\alias{docx_extract_tbl}
\title{Extract a table from a Word document}
\usage{
docx_extract_tbl(docx, tbl_number = 1, header = TRUE, trim = TRUE)
}
\arguments{
\item{docx}{\code{docx} object read with \code{read_docx}}
\item{tbl_number}{which table to extract (defaults to \code{1})}
\item{header}{assume first row of table is a header row? (default; \code{TRUE})}
\item{trim}{trim leading/trailing whitespace (if any) in cells? (default: \code{TRUE})}
}
\value{
\code{data.frame}
}
\description{
Given a document read with \code{read_docx} and a table to extract (optionally
indicating whether there was a header or not and if cell whitepace trimming is
desired) extract the contents of the table to a \code{data.frame}.
}
\examples{
doc3 <- read_docx(system.file("examples/data3.docx", package="docxtractr"))
docx_extract_tbl(doc3, 3)
}

22
man/docx_tbl_count.Rd

@ -0,0 +1,22 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/docx_find_tbls.r
\name{docx_tbl_count}
\alias{docx_tbl_count}
\title{Get number of tables in a Word document}
\usage{
docx_tbl_count(docx)
}
\arguments{
\item{docx}{\code{docx} object read with \code{read_docx}}
}
\value{
numeric
}
\description{
Get number of tables in a Word document
}
\examples{
complx <- read_docx(system.file("examples/complex.docx", package="docxtractr"))
docx_tbl_count(complx)
}

17
man/docxtractr.Rd

@ -0,0 +1,17 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/docxtractr-package.r
\docType{package}
\name{docxtractr}
\alias{docxtractr}
\alias{docxtractr-package}
\title{docxtractr is an R pacakge for extracting tables out of Word documents (docx)}
\description{
Microsoft Word docx files provide an XML structure that is fairly
straightforward to navigate, especially when it applies to Word tables. The
docxtractr package provides tools to determine table count, table structure and
extract tables from Microsoft Word docx documents.
}
\author{
Bob Rudis (@hrbrmstr)
}

17
man/print.docx.Rd

@ -0,0 +1,17 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/describe.r
\name{print.docx}
\alias{print.docx}
\title{Display information about the document}
\usage{
\method{print}{docx}(x, ...)
}
\arguments{
\item{x}{\code{docx} object}
\item{...}{ignored}
}
\description{
Display information about the document
}

19
man/read_docx.Rd

@ -0,0 +1,19 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/read_docs.r
\name{read_docx}
\alias{read_docx}
\title{Read in a Word document for table extraction}
\usage{
read_docx(path)
}
\arguments{
\item{path}{path to the Word document}
}
\description{
Path must be local (i.e. not a URL)
}
\examples{
doc <- read_docx(system.file("examples/data.docx", package="docxtractr"))
class(doc)
}

4
tests/testthat.R

@ -0,0 +1,4 @@
library(testthat)
library(docxtractr)
test_check("docxtractr")

6
tests/testthat/test-docxtractr.R

@ -0,0 +1,6 @@
context("basic functionality")
test_that("we can do something", {
#expect_that(some_function(), is_a("data.frame"))
})
Loading…
Cancel
Save