From 7829003217382327d11e303292823d0bc18e6529 Mon Sep 17 00:00:00 2001
From: boB Rudis <bob@rud.is>
Date: Mon, 19 Jun 2017 05:53:49 -0400
Subject: [PATCH] cleanup + fix #7

---
 DESCRIPTION                      |  14 +--
 NAMESPACE                        |   8 +-
 NEWS.md                          |   9 ++
 R/aaa.r                          |   2 +-
 R/assign_colnames.r              |   7 +-
 R/describe.r                     |  11 ++-
 R/docx_find_tbls.r               |  17 ++--
 R/docxtractr-package.r           |   8 +-
 R/extract_all.r                  |  10 +-
 R/mcga.r                         |  26 ++++++
 R/read_docs.r                    |  10 +-
 R/utils.r                        |   8 +-
 README.Rmd                       |   6 +-
 README.md                        | 194 ++++++++++++++++++++++-----------------
 docxtractr.Rproj                 |  14 ++-
 man/assign_colnames.Rd           |   3 +-
 man/docx_cmnt_count.Rd           |   1 -
 man/docx_describe_cmnts.Rd       |   1 -
 man/docx_describe_tbls.Rd        |   1 -
 man/docx_extract_all.Rd          |   1 -
 man/docx_extract_all_cmnts.Rd    |   1 -
 man/docx_extract_all_tbls.Rd     |   1 -
 man/docx_extract_tbl.Rd          |   1 -
 man/docx_tbl_count.Rd            |   1 -
 man/docxtractr.Rd                |   1 -
 man/mcga.Rd                      |  23 +++++
 man/print.docx.Rd                |   1 -
 man/read_docx.Rd                 |   1 -
 tests/testthat/test-docxtractr.R |  10 +-
 29 files changed, 237 insertions(+), 154 deletions(-)
 create mode 100644 R/mcga.r
 create mode 100644 man/mcga.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 7aa074d..336f769 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,16 +1,16 @@
 Package: docxtractr
-Title: Extract Data Tables and Comments from Microsoft Word Documents
-Version: 0.2.1
+Title: Extract Data Tables and Comments from 'Microsoft' 'Word' Documents
+Version: 0.3.0
 Authors@R: c(person("Bob", "Rudis", email = "bob@rudis.net", role = c("aut", "cre")))
 Maintainer: Bob Rudis <bob@rudis.net>
-Description: Microsoft Word docx files provide an XML structure that is fairly
-    straightforward to navigate, especially when it applies to Word tables and
+Description: 'Microsoft Word' 'docx' files provide an 'XML' structure that is fairly
+    straightforward to navigate, especially when it applies to 'Word' tables and
     comments. Tools are provided to determine table count/structure, comment count
-    and also to extract/clean tables and comments from Microsoft Word docx documents.
+    and also to extract/clean tables and comments from 'Microsoft Word' 'docx' documents.
 URL: http://github.com/hrbrmstr/docxtractr
 BugReports: https://github.com/hrbrmstr/docxtractr/issues
 Depends:
-    R (>= 3.0.0)
+    R (>= 3.1.2)
 License: MIT + file LICENSE
 LazyData: true
 Suggests:
@@ -22,4 +22,4 @@ Imports:
     purrr,
     dplyr,
     utils
-RoxygenNote: 5.0.1
+RoxygenNote: 6.0.1
diff --git a/NAMESPACE b/NAMESPACE
index 39de290..82bd419 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,17 +10,23 @@ export(docx_extract_all_cmnts)
 export(docx_extract_all_tbls)
 export(docx_extract_tbl)
 export(docx_tbl_count)
+export(mcga)
 export(read_docx)
 importFrom(dplyr,arrange)
 importFrom(dplyr,bind_cols)
-importFrom(dplyr,bind_rows)
 importFrom(dplyr,count)
 importFrom(dplyr,select)
+importFrom(purrr,map)
+importFrom(purrr,map_chr)
 importFrom(purrr,map_df)
+importFrom(purrr,map_int)
+importFrom(purrr,map_lgl)
 importFrom(tibble,as_data_frame)
+importFrom(tibble,as_tibble)
 importFrom(tibble,data_frame)
 importFrom(tools,file_ext)
 importFrom(utils,download.file)
+importFrom(utils,globalVariables)
 importFrom(utils,unzip)
 importFrom(xml2,read_xml)
 importFrom(xml2,xml_attrs)
diff --git a/NEWS.md b/NEWS.md
index 8e07745..c2a49ab 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,12 @@
+# 0.3.0 WIP
+
+- return tibbles where possible & not stomping on input type (#7)
+- change tests to test for `tbl` vs `data.frame` (related to #7)
+- don't stomp on data frame-ish input type in `assign_colnames()`
+- prefix `::` (non-user facing tweak)
+- switch all `*apply()` to `purrr` calls since we bother to import `purrr`  (non-user facing tweak)
+- 
+
 # 0.2.0 released
 
 - update for new xml2 pkg compatibility
diff --git a/R/aaa.r b/R/aaa.r
index ee6fa33..e3396d0 100644
--- a/R/aaa.r
+++ b/R/aaa.r
@@ -1 +1 @@
-n <- author <- meta <- NULL
+utils::globalVariables(c("n", "author", "meta"))
diff --git a/R/assign_colnames.r b/R/assign_colnames.r
index 744281f..ac30abc 100644
--- a/R/assign_colnames.r
+++ b/R/assign_colnames.r
@@ -25,7 +25,7 @@
 #' docx_tbl_count(real_world)
 #'
 #' # get all the tables
-#' tbls <- docx_extract_all(real_world)
+#' tbls <- docx_extract_all_tbls(real_world)
 #'
 #' # make table 1 better
 #' assign_colnames(tbls[[1]], 2)
@@ -36,6 +36,8 @@ assign_colnames <- function(dat, row, remove=TRUE, remove_previous=remove) {
 
   if ((row > nrow(dat)) | (row < 1)) return(dat)
 
+  d_class <- class(dat)
+
   # just in case someone shoots us a data.table or other stranger things
   dat <- data.frame(dat, stringsAsFactors=FALSE)
 
@@ -47,6 +49,9 @@ assign_colnames <- function(dat, row, remove=TRUE, remove_previous=remove) {
   dat <- dat[-(start:end),]
   rownames(dat) <- NULL
 
+  # give them back what they passed in
+  class(dat) <- d_class
+
   dat
 
 }
diff --git a/R/describe.r b/R/describe.r
index ca77285..1159ddb 100644
--- a/R/describe.r
+++ b/R/describe.r
@@ -26,10 +26,10 @@ docx_describe_tbls <- function(docx) {
 
     tbl <- tbls[[i]]
 
-    cells <- xml_find_all(tbl, "./w:tr/w:tc", ns=ns)
-    rows <- xml_find_all(tbl, "./w:tr", ns=ns)
+    cells <- xml2::xml_find_all(tbl, "./w:tr/w:tc", ns=ns)
+    rows <- xml2::xml_find_all(tbl, "./w:tr", ns=ns)
 
-    cell_count_by_row <- sapply(rows, function(row) { length(xml_find_all(row, "./w:tc", ns)) })
+    cell_count_by_row <- purrr::map_int(rows, ~{ length(xml2::xml_find_all(.x, "./w:tc", ns)) })
     row_counts <- paste0(unique(cell_count_by_row), collapse=", ")
     max_cell_count <- max(cell_count_by_row)
 
@@ -84,16 +84,17 @@ docx_describe_cmnts <- function(docx) {
 
   cat(sprintf("Found %d comments.\n", length(cmnts)))
 
-  map_df(xml_attrs(cmnts), function(x) {
+  purrr::map_df(xml_attrs(cmnts), function(x) {
     as_data_frame(t(cbind.data.frame(x, stringsAsFactors=FALSE)))
   }) -> meta
 
   cmnt_df <- dplyr::bind_cols(meta,
-                       cbind.data.frame(comment_text=xml_text(cmnts),
+                       cbind.data.frame(comment_text=xml2::xml_text(cmnts),
                                         stringsAsFactors=FALSE))
 
   aut_df <- dplyr::count(cmnt_df, author)
   aut_df <- dplyr::arrange(aut_df, -n)
+
   print(select(aut_df, author, `# Comments`=n))
 
 }
diff --git a/R/docx_find_tbls.r b/R/docx_find_tbls.r
index f34442f..64e205d 100644
--- a/R/docx_find_tbls.r
+++ b/R/docx_find_tbls.r
@@ -25,16 +25,15 @@ docx_extract_tbl <- function(docx, tbl_number=1, header=TRUE, trim=TRUE) {
   ns <- docx$ns
   tbl <- docx$tbls[[tbl_number]]
 
-  cells <- xml_find_all(tbl, "./w:tr/w:tc", ns=ns)
-  rows <- xml_find_all(tbl, "./w:tr", ns=ns)
+  cells <- xml2::xml_find_all(tbl, "./w:tr/w:tc", ns=ns)
+  rows <- xml2::xml_find_all(tbl, "./w:tr", ns=ns)
 
-  bind_rows(lapply(rows, function(row) {
-
-    vals <- xml_text(xml_find_all(row, "./w:tc", ns=ns), trim=trim)
+  purrr::map_df(rows, ~{
+    vals <- xml2::xml_text(xml2::xml_find_all(.x, "./w:tc", ns=ns), trim=trim)
     names(vals) <- sprintf("V%d", 1:length(vals))
-    data.frame(as.list(vals), stringsAsFactors=FALSE)
-
-  })) -> dat
+    as.list(vals)
+    # data.frame(as.list(vals), stringsAsFactors=FALSE)
+  }) -> dat
 
   if (header) {
     colnames(dat) <- dat[1,]
@@ -48,7 +47,7 @@ docx_extract_tbl <- function(docx, tbl_number=1, header=TRUE, trim=TRUE) {
 
   rownames(dat) <- NULL
 
-  dat
+  tibble::as_tibble(dat)
 
 }
 
diff --git a/R/docxtractr-package.r b/R/docxtractr-package.r
index aeebe1c..84ceaa7 100644
--- a/R/docxtractr-package.r
+++ b/R/docxtractr-package.r
@@ -11,9 +11,9 @@
 #'
 #' @author Bob Rudis (@@hrbrmstr)
 #' @importFrom xml2 xml_find_all xml_text xml_ns xml_find_first xml_attrs
-#' @importFrom tibble data_frame as_data_frame
-#' @importFrom dplyr bind_rows bind_cols count arrange select
+#' @importFrom tibble data_frame as_data_frame as_tibble
+#' @importFrom dplyr bind_cols count arrange select
 #' @importFrom tools file_ext
-#' @importFrom utils download.file unzip
-#' @importFrom purrr map_df
+#' @importFrom utils download.file unzip globalVariables
+#' @importFrom purrr map_df map map_int map_chr map_lgl
 NULL
diff --git a/R/extract_all.r b/R/extract_all.r
index f7ced3d..23a0d65 100644
--- a/R/extract_all.r
+++ b/R/extract_all.r
@@ -27,7 +27,7 @@ docx_extract_all_tbls <- function(docx, guess_header=TRUE, trim=TRUE) {
     hdr <- FALSE
     if (guess_header) {
       tbl <- docx$tbls[[i]]
-      rows <- xml_find_all(tbl, "./w:tr", ns=ns)
+      rows <- xml2::xml_find_all(tbl, "./w:tr", ns=ns)
       hdr <- !is.na(has_header(tbl, rows, ns))
     }
     docx_extract_tbl(docx, i, hdr, trim)
@@ -77,12 +77,14 @@ docx_extract_all_cmnts <- function(docx) {
 
   comments <- docx$cmnts
 
-  map_df(xml_attrs(comments), function(x) {
+  purrr::map_df(xml2::xml_attrs(comments), function(x) {
     as_data_frame(t(cbind.data.frame(x, stringsAsFactors=FALSE)))
   }) -> meta
 
   bind_cols(meta,
-            cbind.data.frame(comment_text=xml_text(comments),
-                             stringsAsFactors=FALSE))
+            cbind.data.frame(comment_text=xml2::xml_text(comments),
+                             stringsAsFactors=FALSE)) -> out
+
+  as_tibble(out)
 
 }
diff --git a/R/mcga.r b/R/mcga.r
new file mode 100644
index 0000000..15183f0
--- /dev/null
+++ b/R/mcga.r
@@ -0,0 +1,26 @@
+#' Make Column Names Great Again
+#'
+#' Remove punctuation and spaces and turn them to underscores plus convert to lower case.
+#'
+#' @md
+#' @param tbl a `data.frame`-like object
+#' @return whatver class `x` was but with truly great, really great column names. They're amazing.
+#'     Trust me. They'll be incredible column names once we're done.
+#' @export
+#' @examples
+#' real_world <- read_docx(system.file("examples/realworld.docx", package="docxtractr"))
+#' tbls <- docx_extract_all_tbls(real_world)
+#' mcga(assign_colnames(tbls[[1]], 2))
+mcga <- function(tbl) {
+
+  x <- colnames(tbl)
+  x <- tolower(x)
+  x <- gsub("[[:punct:][:space:]]+", "_", x)
+  x <- gsub("_+", "_", x)
+  x <- gsub("(^_|_$)", "", x)
+
+  colnames(tbl) <- x
+
+  tbl
+
+}
diff --git a/R/read_docs.r b/R/read_docs.r
index 6aea6f8..4c7bbec 100644
--- a/R/read_docs.r
+++ b/R/read_docs.r
@@ -36,20 +36,20 @@ read_docx <- function(path) {
   unzip(tmpf, exdir=sprintf("%s/docdata", tmpd))
 
   # read the actual XML document
-  doc <- read_xml(sprintf("%s/docdata/word/document.xml", tmpd))
+  doc <- xml2::read_xml(sprintf("%s/docdata/word/document.xml", tmpd))
 
   # extract the namespace
-  ns <- xml_ns(doc)
+  ns <- xml2::xml_ns(doc)
 
   # get the tables
-  tbls <- xml_find_all(doc, ".//w:tbl", ns=ns)
+  tbls <- xml2::xml_find_all(doc, ".//w:tbl", ns=ns)
 
   if (file.exists(sprintf("%s/docdata/word/comments.xml", tmpd))) {
     docmnt <- read_xml(sprintf("%s/docdata/word/comments.xml", tmpd))
     # get the comments
-    cmnts <- xml_find_all(docmnt, ".//w:comment", ns=ns)
+    cmnts <- xml2::xml_find_all(docmnt, ".//w:comment", ns=ns)
   } else {
-    cmnts <- xml_find_all(doc, ".//w:comment", ns=ns)
+    cmnts <- xml2::xml_find_all(doc, ".//w:comment", ns=ns)
   }
 
   # make an object for other functions to work with
diff --git a/R/utils.r b/R/utils.r
index 85861c6..2fd3210 100644
--- a/R/utils.r
+++ b/R/utils.r
@@ -1,7 +1,7 @@
 # used by cuntions to make sure they are working with a well-formed docx object
 ensure_docx <- function(docx) {
   if (!inherits(docx, "docx")) stop("Must pass in a 'docx' object", call.=FALSE)
-  if (!(all(sapply(c("docx", "ns", "tbls", "path"), exists, where=docx))))
+  if (!(all(purrr::map_lgl(c("docx", "ns", "tbls", "path"), exists, where=docx))))
     stop("'docx' object missing necessary components", call.=FALSE)
 }
 
@@ -11,16 +11,16 @@ has_header <- function(tbl, rows, ns) {
   # microsoft has a tag for some table structure info. examine it to
   # see if the creator of the header made the first row special which
   # will likely mean it's a header candidate
-  look <- try(xml_find_first(tbl, "./w:tblPr/w:tblLook", ns), silent=TRUE)
+  look <- try(xml2::xml_find_first(tbl, "./w:tblPr/w:tblLook", ns), silent=TRUE)
   if (inherits(look, "try-error")) {
     return(NA)
   } else {
-    look_attr <- xml_attrs(look)
+    look_attr <- xml2::xml_attrs(look)
     if ("firstRow" %in% names(look_attr)) {
       if (look_attr["firstRow"] == "0") {
         return(NA)
       } else {
-        return(paste0(xml_text(xml_find_all(rows[[1]], "./w:tc", ns)), collapse=", "))
+        return(paste0(xml2::xml_text(xml_find_all(rows[[1]], "./w:tc", ns)), collapse=", "))
       }
     } else {
       return(NA)
diff --git a/README.Rmd b/README.Rmd
index 4b92022..d28f0b6 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -34,6 +34,7 @@ The following functions are implemented:
 - `docx_tbl_count`:	Get number of tables in a Word document
 - `docx_cmnt_count`:	Get number of comments in a Word document
 - `assign_colnames`:	Make a specific row the column names for the specified data.frame
+- `mcga` : Make column names great again
 
 The following data file are included:
 
@@ -134,9 +135,12 @@ tbls <- docx_extract_all(real_world)
 # see table 1
 tbls[[1]]
 
-#' # make table 1 better
+# make table 1 better
 assign_colnames(tbls[[1]], 2)
 
+# make table 1's column names great again 
+mcga(assign_colnames(tbls[[1]], 2))
+
 # see table 5
 tbls[[5]]
 
diff --git a/README.md b/README.md
index 597e4a7..f97db7c 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@ The following functions are implemented:
 -   `docx_tbl_count`: Get number of tables in a Word document
 -   `docx_cmnt_count`: Get number of comments in a Word document
 -   `assign_colnames`: Make a specific row the column names for the specified data.frame
+-   `mcga` : Make column names great again
 
 The following data file are included:
 
@@ -56,7 +57,7 @@ library(dplyr)
 
 # current verison
 packageVersion("docxtractr")
-#> [1] '0.2.0'
+#> [1] '0.3.0'
 
 # one table
 doc <- read_docx(system.file("examples/data.docx", package="docxtractr"))
@@ -65,7 +66,7 @@ docx_tbl_count(doc)
 #> [1] 1
 
 docx_describe_tbls(doc)
-#> Word document [/Library/Frameworks/R.framework/Versions/3.3/Resources/library/docxtractr/examples/data.docx]
+#> Word document [/Library/Frameworks/R.framework/Versions/3.4/Resources/library/docxtractr/examples/data.docx]
 #> 
 #> Table 1
 #>   total cells: 16
@@ -74,24 +75,30 @@ docx_describe_tbls(doc)
 #>   has header : likely! => possibly [This, Is, A, Column]
 
 docx_extract_tbl(doc, 1)
-#>   This      Is     A   Column
-#> 1    1     Cat   3.4      Dog
-#> 2    3    Fish 100.3     Bird
-#> 3    5 Pelican   -99 Kangaroo
+#> # A tibble: 3 x 4
+#>    This      Is     A   Column
+#>   <chr>   <chr> <chr>    <chr>
+#> 1     1     Cat   3.4      Dog
+#> 2     3    Fish 100.3     Bird
+#> 3     5 Pelican   -99 Kangaroo
 
 docx_extract_tbl(doc)
-#>   This      Is     A   Column
-#> 1    1     Cat   3.4      Dog
-#> 2    3    Fish 100.3     Bird
-#> 3    5 Pelican   -99 Kangaroo
+#> # A tibble: 3 x 4
+#>    This      Is     A   Column
+#>   <chr>   <chr> <chr>    <chr>
+#> 1     1     Cat   3.4      Dog
+#> 2     3    Fish 100.3     Bird
+#> 3     5 Pelican   -99 Kangaroo
 
 docx_extract_tbl(doc, header=FALSE)
 #> NOTE: header=FALSE but table has a marked header row in the Word document
-#>     V1      V2    V3       V4
-#> 1 This      Is     A   Column
-#> 2    1     Cat   3.4      Dog
-#> 3    3    Fish 100.3     Bird
-#> 4    5 Pelican   -99 Kangaroo
+#> # A tibble: 4 x 4
+#>      V1      V2    V3       V4
+#>   <chr>   <chr> <chr>    <chr>
+#> 1  This      Is     A   Column
+#> 2     1     Cat   3.4      Dog
+#> 3     3    Fish 100.3     Bird
+#> 4     5 Pelican   -99 Kangaroo
 
 # url 
 
@@ -116,22 +123,23 @@ docx_describe_tbls(budget)
 #>   has header : unlikely
 
 docx_extract_tbl(budget, 1)
-#>                                      Short-term Portfolio Long-term Portfolio Total Portfolio Values
-#> 1 Portfolio Balance (Market Value) *       $  123,651,911       $ 294,704,136          $ 418,356,047
-#> 2                    Effective Yield               0.16 %              1.42 %                 1.05 %
-#> 3             Avg. Weighted Maturity              11 Days           2.4 Years              1.7 Years
-#> 4                       Net Earnings        $      18,470      $      350,554         $      369,024
-#> 5                        Benchmark**               0.02 %              0.41 %                 0.27 %
+#> # A tibble: 5 x 4
+#>                                   `` `Short-term Portfolio` `Long-term Portfolio` `Total Portfolio Values`
+#>                                <chr>                  <chr>                 <chr>                    <chr>
+#> 1 Portfolio Balance (Market Value) *         $  123,651,911         $ 294,704,136            $ 418,356,047
+#> 2                    Effective Yield                 0.16 %                1.42 %                   1.05 %
+#> 3             Avg. Weighted Maturity                11 Days             2.4 Years                1.7 Years
+#> 4                       Net Earnings          $      18,470        $      350,554           $      369,024
+#> 5                        Benchmark**                 0.02 %                0.41 %                   0.27 %
 
 docx_extract_tbl(budget, 2) 
-#>                        Amount of Funds (Market Value)  Maturity Effective Yield Interpolated Yield
-#> 1 Short-Term Portfolio                  $ 123,651,911   11 days          0.16 %             0.01 %
-#> 2  Long-Term Portfolio                  $ 294,704,136 2.4 years          1.42 %             0.41 %
-#> 3      Total Portfolio                  $ 418,356,047 1.7 years          1.05 %             0.27 %
-#>   Total Return  Monthly Total Return    Annual
-#> 1                 0.013                  0.160
-#> 2                 0.437                  0.250
-#> 3                 0.298                  0.222
+#> # A tibble: 3 x 7
+#>                     `` `Amount of Funds (Market Value)`  Maturity `Effective Yield` `Interpolated Yield`
+#>                  <chr>                            <chr>     <chr>             <chr>                <chr>
+#> 1 Short-Term Portfolio                    $ 123,651,911   11 days            0.16 %               0.01 %
+#> 2  Long-Term Portfolio                    $ 294,704,136 2.4 years            1.42 %               0.41 %
+#> 3      Total Portfolio                    $ 418,356,047 1.7 years            1.05 %               0.27 %
+#> # ... with 2 more variables: `Total Return Monthly` <chr>, `Total Return Annual` <chr>
 
 # three tables
 doc3 <- read_docx(system.file("examples/data3.docx", package="docxtractr"))
@@ -140,7 +148,7 @@ docx_tbl_count(doc3)
 #> [1] 3
 
 docx_describe_tbls(doc3)
-#> Word document [/Library/Frameworks/R.framework/Versions/3.3/Resources/library/docxtractr/examples/data3.docx]
+#> Word document [/Library/Frameworks/R.framework/Versions/3.4/Resources/library/docxtractr/examples/data3.docx]
 #> 
 #> Table 1
 #>   total cells: 16
@@ -161,13 +169,15 @@ docx_describe_tbls(doc3)
 #>   has header : likely! => possibly [Foo, Bar]
 
 docx_extract_tbl(doc3, 3)
-#>   Foo Bar
-#> 1  Aa  Bb
-#> 2  Dd  Ee
-#> 3  Gg  Hh
-#> 4   1   2
-#> 5  Zz  Jj
-#> 6  Tt  ii
+#> # A tibble: 6 x 2
+#>     Foo   Bar
+#>   <chr> <chr>
+#> 1    Aa    Bb
+#> 2    Dd    Ee
+#> 3    Gg    Hh
+#> 4     1     2
+#> 5    Zz    Jj
+#> 6    Tt    ii
 
 # no tables
 none <- read_docx(system.file("examples/none.docx", package="docxtractr"))
@@ -188,7 +198,7 @@ docx_tbl_count(complx)
 #> [1] 5
 
 docx_describe_tbls(complx)
-#> Word document [/Library/Frameworks/R.framework/Versions/3.3/Resources/library/docxtractr/examples/complex.docx]
+#> Word document [/Library/Frameworks/R.framework/Versions/3.4/Resources/library/docxtractr/examples/complex.docx]
 #> 
 #> Table 1
 #>   total cells: 16
@@ -221,28 +231,34 @@ docx_describe_tbls(complx)
 #>   has header : unlikely
 
 docx_extract_tbl(complx, 3, header=TRUE)
-#>   Foo Bar
-#> 1  Aa  Bb
-#> 2  Dd  Ee
-#> 3  Gg  Hh
-#> 4   1   2
-#> 5  Zz  Jj
-#> 6  Tt  ii
+#> # A tibble: 6 x 2
+#>     Foo   Bar
+#>   <chr> <chr>
+#> 1    Aa    Bb
+#> 2    Dd    Ee
+#> 3    Gg    Hh
+#> 4     1     2
+#> 5    Zz    Jj
+#> 6    Tt    ii
 
 docx_extract_tbl(complx, 4, header=TRUE)
-#>   Foo  Bar  Baz
-#> 1  Aa BbCc <NA>
-#> 2  Dd   Ee   Ff
-#> 3  Gg   Hh   ii
+#> # A tibble: 3 x 3
+#>     Foo   Bar   Baz
+#>   <chr> <chr> <chr>
+#> 1    Aa  BbCc  <NA>
+#> 2    Dd    Ee    Ff
+#> 3    Gg    Hh    ii
 
 docx_extract_tbl(complx, 5, header=TRUE)
-#>    Foo Bar Baz
-#> 1   Aa  Bb  Cc
-#> 2   Dd  Ee  Ff
-#> 3   Gg  Hh  Ii
-#> 4 Jj88  Kk  Ll
-#> 5       Uu  Ii
-#> 6   Hh  Ii   h
+#> # A tibble: 6 x 3
+#>     Foo   Bar   Baz
+#>   <chr> <chr> <chr>
+#> 1    Aa    Bb    Cc
+#> 2    Dd    Ee    Ff
+#> 3    Gg    Hh    Ii
+#> 4  Jj88    Kk    Ll
+#> 5          Uu    Ii
+#> 6    Hh    Ii     h
 
 # a "real" Word doc
 real_world <- read_docx(system.file("examples/realworld.docx", package="docxtractr"))
@@ -256,7 +272,9 @@ tbls <- docx_extract_all(real_world)
 
 # see table 1
 tbls[[1]]
+#> # A tibble: 9 x 9
 #>                  V1        V2         V3                     V4                     V5
+#>               <chr>     <chr>      <chr>                  <chr>                  <chr>
 #> 1 Lesson 1:  Step 1      <NA>       <NA>                   <NA>                   <NA>
 #> 2           Country Birthrate Death Rate Population Growth 2005 Population Growth 2050
 #> 3               USA      2.06      0.51%                  0.92%                 -0.06%
@@ -266,20 +284,27 @@ tbls[[1]]
 #> 7             Italy      1.28      0.72%                  0.35%                 -1.33%
 #> 8            Mexico      2.43      0.25%                  1.41%                  0.96%
 #> 9           Nigeria      4.78      0.26%                  2.46%                  3.58%
-#>                                    V6                      V7                   V8                              V9
-#> 1                                <NA>                    <NA>                 <NA>                            <NA>
-#> 2        Relative place in Transition        Social Factors 1     Social Factors 2                Social Factors 3
-#> 3                    Post- Industrial     Female Independence    Stable Birth Rate                 Good technology
-#> 4                    Post- Industrial Government intervention           Technology                    Urbanization
-#> 5                   Mature Industrial  Not yet industrialized More children needed Slightly higher life expectancy
-#> 6                     Post Industrial         Economic growth              Poverty    Becoming more industrialized
-#> 7                Late Post industrial       Stable birth rate   People marry later              Better health care
-#> 8                   Mature Industrial      Better health care           Emigration                 Economic growth
-#> 9 End of Mechanization of Agriculture                 Disease   People marry early       People have many children
-
-#' # make table 1 better
+#> # ... with 4 more variables: V6 <chr>, V7 <chr>, V8 <chr>, V9 <chr>
+
+# make table 1 better
 assign_colnames(tbls[[1]], 2)
-#>   Country Birthrate Death Rate Population Growth 2005 Population Growth 2050        Relative place in Transition
+#> # A tibble: 7 x 9
+#>   Country Birthrate `Death Rate` `Population Growth 2005` `Population Growth 2050`      `Relative place in Transition`
+#>     <chr>     <chr>        <chr>                    <chr>                    <chr>                               <chr>
+#> 1     USA      2.06        0.51%                    0.92%                   -0.06%                    Post- Industrial
+#> 2   China      1.62         0.3%                     0.6%                   -0.58%                    Post- Industrial
+#> 3   Egypt      2.83        0.41%                     2.0%                    1.32%                   Mature Industrial
+#> 4   India      2.35        0.34%                    1.56%                    0.76%                     Post Industrial
+#> 5   Italy      1.28        0.72%                    0.35%                   -1.33%                Late Post industrial
+#> 6  Mexico      2.43        0.25%                    1.41%                    0.96%                   Mature Industrial
+#> 7 Nigeria      4.78        0.26%                    2.46%                    3.58% End of Mechanization of Agriculture
+#> # ... with 3 more variables: `Social Factors 1` <chr>, `Social Factors 2` <chr>, `Social Factors 3` <chr>
+
+# make table 1's column names great again 
+mcga(assign_colnames(tbls[[1]], 2))
+#> # A tibble: 7 x 9
+#>   country birthrate death_rate population_growth_2005 population_growth_2050        relative_place_in_transition
+#>     <chr>     <chr>      <chr>                  <chr>                  <chr>                               <chr>
 #> 1     USA      2.06      0.51%                  0.92%                 -0.06%                    Post- Industrial
 #> 2   China      1.62       0.3%                   0.6%                 -0.58%                    Post- Industrial
 #> 3   Egypt      2.83      0.41%                   2.0%                  1.32%                   Mature Industrial
@@ -287,18 +312,13 @@ assign_colnames(tbls[[1]], 2)
 #> 5   Italy      1.28      0.72%                  0.35%                 -1.33%                Late Post industrial
 #> 6  Mexico      2.43      0.25%                  1.41%                  0.96%                   Mature Industrial
 #> 7 Nigeria      4.78      0.26%                  2.46%                  3.58% End of Mechanization of Agriculture
-#>          Social Factors 1     Social Factors 2                Social Factors 3
-#> 1     Female Independence    Stable Birth Rate                 Good technology
-#> 2 Government intervention           Technology                    Urbanization
-#> 3  Not yet industrialized More children needed Slightly higher life expectancy
-#> 4         Economic growth              Poverty    Becoming more industrialized
-#> 5       Stable birth rate   People marry later              Better health care
-#> 6      Better health care           Emigration                 Economic growth
-#> 7                 Disease   People marry early       People have many children
+#> # ... with 3 more variables: social_factors_1 <chr>, social_factors_2 <chr>, social_factors_3 <chr>
 
 # see table 5
 tbls[[5]]
+#> # A tibble: 5 x 6
 #>                  V1      V2            V3        V4        V5       V6
+#>               <chr>   <chr>         <chr>     <chr>     <chr>    <chr>
 #> 1 Lesson 2:  Step 1    <NA>          <NA>      <NA>      <NA>     <NA>
 #> 2           Nigeria Default    Prediction + 5 years +15 years -5 years
 #> 3        Birth rate    4.78     Goes Down      4.76      4.72     4.79
@@ -307,23 +327,25 @@ tbls[[5]]
 
 # make table 5 better
 assign_colnames(tbls[[5]], 2)
-#>             Nigeria Default    Prediction + 5 years +15 years -5 years
-#> 1        Birth rate    4.78     Goes Down      4.76      4.72     4.79
-#> 2        Death rate   0.36% Stay the Same     0.42%     0.52%     0.3%
-#> 3 Population growth   3.58%     Goes Down     3.02%     2.32%    4.38%
+#> # A tibble: 3 x 6
+#>             Nigeria Default    Prediction `+ 5 years` `+15 years` `-5 years`
+#>               <chr>   <chr>         <chr>       <chr>       <chr>      <chr>
+#> 1        Birth rate    4.78     Goes Down        4.76        4.72       4.79
+#> 2        Death rate   0.36% Stay the Same       0.42%       0.52%       0.3%
+#> 3 Population growth   3.58%     Goes Down       3.02%       2.32%      4.38%
 
 # comments
 cmnts <- read_docx(system.file("examples/comments.docx", package="docxtractr"))
 
 print(cmnts)
 #> No tables in document
-#> Word document [/Library/Frameworks/R.framework/Versions/3.3/Resources/library/docxtractr/examples/comments.docx]
+#> Word document [/Library/Frameworks/R.framework/Versions/3.4/Resources/library/docxtractr/examples/comments.docx]
 #> 
 #> Found 3 comments.
 #> # A tibble: 1 x 2
-#>      author # Comments
-#>       <chr>      <int>
-#> 1 boB Rudis          3
+#>      author `# Comments`
+#>       <chr>        <int>
+#> 1 boB Rudis            3
 
 glimpse(docx_extract_all_cmnts(cmnts))
 #> Observations: 3
@@ -347,7 +369,7 @@ library(testthat)
 #>     matches
 
 date()
-#> [1] "Tue Jul 19 22:56:37 2016"
+#> [1] "Mon Jun 19 05:52:59 2017"
 
 test_dir("tests/")
 #> testthat results ========================================================================================================
diff --git a/docxtractr.Rproj b/docxtractr.Rproj
index 773de7e..9f58d70 100644
--- a/docxtractr.Rproj
+++ b/docxtractr.Rproj
@@ -5,21 +5,19 @@ SaveWorkspace: No
 AlwaysSaveHistory: Default
 
 EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
 Encoding: UTF-8
 
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
 AutoAppendNewline: Yes
 StripTrailingWhitespace: Yes
 
 BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
-PackageRoxygenize: rd,collate,namespace
-
-UseSpacesForTab: Yes
-NumSpacesForTab: 2
-
-RnwWeave: Sweave
-LaTeX: pdfLaTeX
-
 PackageBuildArgs: --resave-data
 PackageCheckArgs: --as-cran
+PackageRoxygenize: rd,collate,namespace
diff --git a/man/assign_colnames.Rd b/man/assign_colnames.Rd
index 3f24f98..43531a1 100644
--- a/man/assign_colnames.Rd
+++ b/man/assign_colnames.Rd
@@ -37,7 +37,7 @@ real_world <- read_docx(system.file("examples/realworld.docx", package="docxtrac
 docx_tbl_count(real_world)
 
 # get all the tables
-tbls <- docx_extract_all(real_world)
+tbls <- docx_extract_all_tbls(real_world)
 
 # make table 1 better
 assign_colnames(tbls[[1]], 2)
@@ -48,4 +48,3 @@ assign_colnames(tbls[[5]], 2)
 \seealso{
 \code{\link{docx_extract_all}}, \code{\link{docx_extract_tbl}}
 }
-
diff --git a/man/docx_cmnt_count.Rd b/man/docx_cmnt_count.Rd
index e316727..0ca7ccc 100644
--- a/man/docx_cmnt_count.Rd
+++ b/man/docx_cmnt_count.Rd
@@ -19,4 +19,3 @@ Get number of comments in a Word document
 cmnts <- read_docx(system.file("examples/comments.docx", package="docxtractr"))
 docx_cmnt_count(cmnts)
 }
-
diff --git a/man/docx_describe_cmnts.Rd b/man/docx_describe_cmnts.Rd
index 3b6e897..0ad06d2 100644
--- a/man/docx_describe_cmnts.Rd
+++ b/man/docx_describe_cmnts.Rd
@@ -17,4 +17,3 @@ cmnts <- read_docx(system.file("examples/comments.docx", package="docxtractr"))
 docx_cmnt_count(cmnts)
 docx_describe_cmnts(cmnts)
 }
-
diff --git a/man/docx_describe_tbls.Rd b/man/docx_describe_tbls.Rd
index 5324c3f..8a9aca6 100644
--- a/man/docx_describe_tbls.Rd
+++ b/man/docx_describe_tbls.Rd
@@ -18,4 +18,3 @@ complx <- read_docx(system.file("examples/complex.docx", package="docxtractr"))
 docx_tbl_count(complx)
 docx_describe_tbls(complx)
 }
-
diff --git a/man/docx_extract_all.Rd b/man/docx_extract_all.Rd
index d21169e..e4c8c31 100644
--- a/man/docx_extract_all.Rd
+++ b/man/docx_extract_all.Rd
@@ -33,4 +33,3 @@ tbls <- docx_extract_all_tbls(real_world)
 \seealso{
 \code{\link{assign_colnames}}, \code{\link{docx_extract_tbl}}
 }
-
diff --git a/man/docx_extract_all_cmnts.Rd b/man/docx_extract_all_cmnts.Rd
index 3116149..612c181 100644
--- a/man/docx_extract_all_cmnts.Rd
+++ b/man/docx_extract_all_cmnts.Rd
@@ -21,4 +21,3 @@ docx_cmnt_count(cmnts)
 docx_describe_cmnts(cmnts)
 docx_extract_all_cmnts(cmnts)
 }
-
diff --git a/man/docx_extract_all_tbls.Rd b/man/docx_extract_all_tbls.Rd
index 1014e49..6c51aac 100644
--- a/man/docx_extract_all_tbls.Rd
+++ b/man/docx_extract_all_tbls.Rd
@@ -33,4 +33,3 @@ tbls <- docx_extract_all_tbls(real_world)
 \seealso{
 \code{\link{assign_colnames}}, \code{\link{docx_extract_tbl}}
 }
-
diff --git a/man/docx_extract_tbl.Rd b/man/docx_extract_tbl.Rd
index 89beb22..04e76e3 100644
--- a/man/docx_extract_tbl.Rd
+++ b/man/docx_extract_tbl.Rd
@@ -31,4 +31,3 @@ docx_extract_tbl(doc3, 3)
 \code{\link{docx_extract_all}}, \code{\link{docx_extract_tbl}},
          \code{\link{assign_colnames}}
 }
-
diff --git a/man/docx_tbl_count.Rd b/man/docx_tbl_count.Rd
index b7e91f2..eeb2280 100644
--- a/man/docx_tbl_count.Rd
+++ b/man/docx_tbl_count.Rd
@@ -19,4 +19,3 @@ Get number of tables in a Word document
 complx <- read_docx(system.file("examples/complex.docx", package="docxtractr"))
 docx_tbl_count(complx)
 }
-
diff --git a/man/docxtractr.Rd b/man/docxtractr.Rd
index 820a44d..dc130a5 100644
--- a/man/docxtractr.Rd
+++ b/man/docxtractr.Rd
@@ -15,4 +15,3 @@ comment count and extract comments from Word docx documents.
 \author{
 Bob Rudis (@hrbrmstr)
 }
-
diff --git a/man/mcga.Rd b/man/mcga.Rd
new file mode 100644
index 0000000..9150451
--- /dev/null
+++ b/man/mcga.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mcga.r
+\name{mcga}
+\alias{mcga}
+\title{Make Column Names Great Again}
+\usage{
+mcga(tbl)
+}
+\arguments{
+\item{tbl}{a \code{data.frame}-like object}
+}
+\value{
+whatver class \code{x} was but with truly great, really great column names. They're amazing.
+Trust me. They'll be incredible column names once we're done.
+}
+\description{
+Remove punctuation and spaces and turn them to underscores plus convert to lower case.
+}
+\examples{
+real_world <- read_docx(system.file("examples/realworld.docx", package="docxtractr"))
+tbls <- docx_extract_all_tbls(real_world)
+mcga(assign_colnames(tbls[[1]], 2))
+}
diff --git a/man/print.docx.Rd b/man/print.docx.Rd
index a908af0..1771e52 100644
--- a/man/print.docx.Rd
+++ b/man/print.docx.Rd
@@ -14,4 +14,3 @@
 \description{
 Display information about the document
 }
-
diff --git a/man/read_docx.Rd b/man/read_docx.Rd
index 6a358d3..63a6eca 100644
--- a/man/read_docx.Rd
+++ b/man/read_docx.Rd
@@ -21,4 +21,3 @@ budget <- read_docx(
 "http://rud.is/dl/1.DOCX")
 }
 }
-
diff --git a/tests/testthat/test-docxtractr.R b/tests/testthat/test-docxtractr.R
index 69d8ab6..f02caad 100644
--- a/tests/testthat/test-docxtractr.R
+++ b/tests/testthat/test-docxtractr.R
@@ -1,11 +1,11 @@
-context("basic functionality")
+context("docx extraction works")
 test_that("we can do something", {
 
   doc <- read_docx(system.file("examples/data.docx", package="docxtractr"))
 
   expect_that(doc, is_a("docx"))
   expect_that(docx_tbl_count(doc), equals(1))
-  expect_that(docx_extract_tbl(doc, 1), is_a("data.frame"))
+  expect_that(docx_extract_tbl(doc, 1), is_a("tbl"))
 
   complx <- read_docx(system.file("examples/complex.docx", package="docxtractr"))
   expect_that(docx_tbl_count(complx), equals(5))
@@ -14,9 +14,9 @@ test_that("we can do something", {
   tmp_4 <- docx_extract_tbl(complx, 4)
   tmp_5 <- docx_extract_tbl(complx, 5)
 
-  expect_that(tmp_3, is_a("data.frame"))
-  expect_that(tmp_4, is_a("data.frame"))
-  expect_that(tmp_5, is_a("data.frame"))
+  expect_that(tmp_3, is_a("tbl"))
+  expect_that(tmp_4, is_a("tbl"))
+  expect_that(tmp_5, is_a("tbl"))
 
   expect_that(nrow(tmp_3), equals(6))
   expect_that(ncol(tmp_4), equals(3))