You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
1.9 KiB

#' Make a specific row the column names for the specified data.frame
#'
#' Many tables in Word documents are in twisted formats where there may be
#' labels or other oddities mixed in that make it difficult to work with the
#' underlying data. This function makes it easy to identify a particular row
#' in a scraped \code{data.frame} as the one containing column names and
#' have it become the column names, removing it and (optionally) all of the
#' rows before it (since that's usually what needs to be done).
#'
#' @param dat can be any \code{data.frame} but is intended for use with
#' ones retuned by this package
#' @param row numeric value indicating the row number that is to become
#' the column names
#' @param remove remove row specified by \code{row} after making it
#' the column names? (Default: \code{TRUE})
#' @param remove_previous remove any rows preceding \code{row}? (Default:
#' \code{TRUE} but will be assigned whatever is given for
#' \code{remove}).
#' @return \code{data.frame}
#' @seealso \code{\link{docx_extract_all}}, \code{\link{docx_extract_tbl}}
#' @export
#' @examples
#' # a "real" Word doc
#' real_world <- read_docx(system.file("examples/realworld.docx", package="docxtractr"))
#' docx_tbl_count(real_world)
#'
#' # get all the tables
#' tbls <- docx_extract_all_tbls(real_world)
#'
#' # make table 1 better
#' assign_colnames(tbls[[1]], 2)
#'
#' # make table 5 better
#' assign_colnames(tbls[[5]], 2)
assign_colnames <- function(dat, row, remove=TRUE, remove_previous=remove) {
if ((row > nrow(dat)) | (row < 1)) return(dat)
d_class <- class(dat)
# just in case someone shoots us a data.table or other stranger things
dat <- data.frame(dat, stringsAsFactors=FALSE)
colnames(dat) <- dat[row,]
start <- row
end <- row
if (remove_previous) start <- 1
dat <- dat[-(start:end),]
rownames(dat) <- NULL
# give them back what they passed in
class(dat) <- d_class
dat
}