From 176870b1b7bc04ad1c8705fdc6c8e04039cfda16 Mon Sep 17 00:00:00 2001 From: Tarcisio Date: Tue, 28 Nov 2017 15:03:31 +0100 Subject: [PATCH 1/6] Reimplementation of the bytes formatter. Adds flexibility and fixes the problem with NAs in the input. --- R/formatters.r | 82 ++++++++++++++++++++++++++-------------------------------- 1 file changed, 36 insertions(+), 46 deletions(-) diff --git a/R/formatters.r b/R/formatters.r index d35edc0..adf488d 100644 --- a/R/formatters.r +++ b/R/formatters.r @@ -1,9 +1,9 @@ #' Bytes formatter: convert to byte measurement and display symbol. #' #' @return a function with three parameters, \code{x}, a numeric vector that -#' returns a character vector, \code{symbol} the byte symbol (e.g. "\code{Kb}") -#' desired and the measurement \code{units} (traditional \code{binary} or -#' \code{si} for ISI metric units). +#' returns a character vector, \code{symbol} a single or a vector of byte +#' symbol(s) (e.g. "\code{Kb}") desired and the measurement \code{units} +#' (traditional \code{binary} or \code{si} for ISI metric units). #' @param x a numeric vector to format #' @param symbol byte symbol to use. If "\code{auto}" the symbol used will be #' determined by the maximum value of \code{x}. Valid symbols are @@ -12,6 +12,8 @@ #' equivalents and "\code{iB}" equivalents. #' @param units which unit base to use, "\code{binary}" (1024 base) or #' "\code{si}" (1000 base) for ISI units. +#' @param only_highest Whether to use the unit of the highest number or +#' each number uses its own base. #' @references Units of Information (Wikipedia) : #' \url{http://en.wikipedia.org/wiki/Units_of_information} #' @export @@ -21,8 +23,8 @@ #' Kb(sample(3000000000, 10)) #' Mb(sample(3000000000, 10)) #' Gb(sample(3000000000, 10)) -byte_format <- function(symbol="auto", units="binary") { - function(x) bytes(x, symbol, units) +byte_format <- function (symbol = "auto", units = "binary", only_highest = TRUE) { + function(x) bytes(x, symbol, units, only_highest) } #' @export @@ -39,47 +41,35 @@ Gb <- byte_format("Gb", "binary") #' @export #' @rdname byte_format -bytes <- function(x, symbol="auto", units=c("binary", "si")) { - - symbol <- match.arg(symbol, c("auto", - "b", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb", "Yb", - "B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", - "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")) - - units <- match.arg(units, c("binary", "si")) - - base <- switch(units, `binary`=1024, `si`=1000) - - if (symbol == "auto") { - symbol <- - if (max(x) >= (base^5)) { "Pb" } - else if (max(x) >= (base^4)) { "Tb" } - else if (max(x) >= (base^3)) { "Gb" } - else if (max(x) >= (base^2)) { "Kb" } - else if (max(x) >= (base^1)) { "Mb" } - else { "b" } +bytes <- function (x, symbol = 'auto', units = c('binary', 'si'), + only_highest = FALSE) { + bin_names <- c("bytes", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb", "Yb") + si_names <- c("bytes", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB") + symbol <- match.arg(symbol, unique(c("auto", bin_names, toupper(bin_names), + si_names)), several.ok = TRUE) + units <- match.arg(units, c("binary", "si")) + base <- switch(units, binary = 1024, si = 1000) + out_names <- switch(units, binary = bin_names, si = si_names) + sym_len <- length(symbol) + inp_len <- length(x) + if (sym_len == 1) { + symbol <- rep(symbol, inp_len) + sym_len <- inp_len } + if (sym_len != inp_len) { + stop('Symbols argument must be either long 1 or of the same length as the input vector.') + } + symbol <- ifelse(symbol == "auto", + pmax(floor(log(x, base)), 0), + match(tolower(symbol), tolower(out_names)) - 1) + if (only_highest) { + symbol <- max(symbol, na.rm = TRUE) + } + res <- paste(scales::comma(round(x / base^symbol, 1L)), + out_names[symbol + 1]) + ifelse(!is.na(x), res, x) +} - switch(symbol, - "b" =, "B" = paste(x, "bytes"), - - "Kb" =, "KB" = paste(scales::comma(round(x/(base^1), 1L)), "Kb"), - "Mb" =, "MB" = paste(scales::comma(round(x/(base^2), 1L)), "Mb"), - "Gb" =, "GB" = paste(scales::comma(round(x/(base^3), 1L)), "Gb"), - "Tb" =, "TB" = paste(scales::comma(round(x/(base^4), 1L)), "Tb"), - "Pb" =, "PB" = paste(scales::comma(round(x/(base^5), 1L)), "Pb"), - "Eb" =, "EB" = paste(scales::comma(round(x/(base^6), 1L)), "Eb"), - "Zb" =, "ZB" = paste(scales::comma(round(x/(base^7), 1L)), "Zb"), - "Yb" =, "YB" = paste(scales::comma(round(x/(base^8), 1L)), "Yb"), - - "KiB" = paste(scales::comma(round(x/(base^1), 1L)), "KiB"), - "MiB" = paste(scales::comma(round(x/(base^2), 1L)), "MiB"), - "GiB" = paste(scales::comma(round(x/(base^3), 1L)), "GiB"), - "TiB" = paste(scales::comma(round(x/(base^4), 1L)), "TiB"), - "PiB" = paste(scales::comma(round(x/(base^5), 1L)), "PiB"), - "EiB" = paste(scales::comma(round(x/(base^6), 1L)), "EiB"), - "ZiB" = paste(scales::comma(round(x/(base^7), 1L)), "ZiB"), - "YiB" = paste(scales::comma(round(x/(base^8), 1L)), "YiB") - ) - +byte_format <- function (symbol = "auto", units = "binary", only_highest = TRUE) { + function(x) bytes(x, symbol, units, only_highest) } From 1f8d1a3a094e630d6c5c66c658caa3bdac382ddd Mon Sep 17 00:00:00 2001 From: Tarcisio Date: Tue, 28 Nov 2017 15:03:31 +0100 Subject: [PATCH 2/6] Reimplementation of the bytes formatter. Adds flexibility and fixes the problem with NAs in the input. --- R/formatters.r | 81 ++++++++++++++++++++++++---------------------------------- 1 file changed, 34 insertions(+), 47 deletions(-) diff --git a/R/formatters.r b/R/formatters.r index d35edc0..f738d09 100644 --- a/R/formatters.r +++ b/R/formatters.r @@ -1,9 +1,9 @@ #' Bytes formatter: convert to byte measurement and display symbol. #' #' @return a function with three parameters, \code{x}, a numeric vector that -#' returns a character vector, \code{symbol} the byte symbol (e.g. "\code{Kb}") -#' desired and the measurement \code{units} (traditional \code{binary} or -#' \code{si} for ISI metric units). +#' returns a character vector, \code{symbol} a single or a vector of byte +#' symbol(s) (e.g. "\code{Kb}") desired and the measurement \code{units} +#' (traditional \code{binary} or \code{si} for ISI metric units). #' @param x a numeric vector to format #' @param symbol byte symbol to use. If "\code{auto}" the symbol used will be #' determined by the maximum value of \code{x}. Valid symbols are @@ -12,6 +12,8 @@ #' equivalents and "\code{iB}" equivalents. #' @param units which unit base to use, "\code{binary}" (1024 base) or #' "\code{si}" (1000 base) for ISI units. +#' @param only_highest Whether to use the unit of the highest number or +#' each number uses its own base. #' @references Units of Information (Wikipedia) : #' \url{http://en.wikipedia.org/wiki/Units_of_information} #' @export @@ -21,8 +23,8 @@ #' Kb(sample(3000000000, 10)) #' Mb(sample(3000000000, 10)) #' Gb(sample(3000000000, 10)) -byte_format <- function(symbol="auto", units="binary") { - function(x) bytes(x, symbol, units) +byte_format <- function (symbol = "auto", units = "binary", only_highest = TRUE) { + function(x) bytes(x, symbol, units, only_highest) } #' @export @@ -39,47 +41,32 @@ Gb <- byte_format("Gb", "binary") #' @export #' @rdname byte_format -bytes <- function(x, symbol="auto", units=c("binary", "si")) { - - symbol <- match.arg(symbol, c("auto", - "b", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb", "Yb", - "B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", - "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")) - - units <- match.arg(units, c("binary", "si")) - - base <- switch(units, `binary`=1024, `si`=1000) - - if (symbol == "auto") { - symbol <- - if (max(x) >= (base^5)) { "Pb" } - else if (max(x) >= (base^4)) { "Tb" } - else if (max(x) >= (base^3)) { "Gb" } - else if (max(x) >= (base^2)) { "Kb" } - else if (max(x) >= (base^1)) { "Mb" } - else { "b" } +bytes <- function (x, symbol = 'auto', units = c('binary', 'si'), + only_highest = FALSE) { + bin_names <- c("bytes", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb", "Yb") + si_names <- c("bytes", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB") + symbol <- match.arg(symbol, unique(c("auto", bin_names, toupper(bin_names), + si_names)), several.ok = TRUE) + units <- match.arg(units, c("binary", "si")) + base <- switch(units, binary = 1024, si = 1000) + out_names <- switch(units, binary = bin_names, si = si_names) + sym_len <- length(symbol) + inp_len <- length(x) + if (sym_len == 1) { + symbol <- rep(symbol, inp_len) + sym_len <- inp_len } - - switch(symbol, - "b" =, "B" = paste(x, "bytes"), - - "Kb" =, "KB" = paste(scales::comma(round(x/(base^1), 1L)), "Kb"), - "Mb" =, "MB" = paste(scales::comma(round(x/(base^2), 1L)), "Mb"), - "Gb" =, "GB" = paste(scales::comma(round(x/(base^3), 1L)), "Gb"), - "Tb" =, "TB" = paste(scales::comma(round(x/(base^4), 1L)), "Tb"), - "Pb" =, "PB" = paste(scales::comma(round(x/(base^5), 1L)), "Pb"), - "Eb" =, "EB" = paste(scales::comma(round(x/(base^6), 1L)), "Eb"), - "Zb" =, "ZB" = paste(scales::comma(round(x/(base^7), 1L)), "Zb"), - "Yb" =, "YB" = paste(scales::comma(round(x/(base^8), 1L)), "Yb"), - - "KiB" = paste(scales::comma(round(x/(base^1), 1L)), "KiB"), - "MiB" = paste(scales::comma(round(x/(base^2), 1L)), "MiB"), - "GiB" = paste(scales::comma(round(x/(base^3), 1L)), "GiB"), - "TiB" = paste(scales::comma(round(x/(base^4), 1L)), "TiB"), - "PiB" = paste(scales::comma(round(x/(base^5), 1L)), "PiB"), - "EiB" = paste(scales::comma(round(x/(base^6), 1L)), "EiB"), - "ZiB" = paste(scales::comma(round(x/(base^7), 1L)), "ZiB"), - "YiB" = paste(scales::comma(round(x/(base^8), 1L)), "YiB") - ) - + if (sym_len != inp_len) { + stop('Symbols argument must be either long 1 or of the same length as the input vector.') + } + symbol <- ifelse(symbol == "auto", + pmax(floor(log(x, base)), 0), + match(tolower(symbol), tolower(out_names)) - 1) + if (only_highest) { + symbol <- max(symbol, na.rm = TRUE) + } + res <- paste(scales::comma(round(x / base^symbol, 1L)), + out_names[symbol + 1]) + ifelse(!is.na(x), res, x) } + From 8f35776575eb527c1d4905bf63f08937fff052e0 Mon Sep 17 00:00:00 2001 From: Tarcisio Date: Tue, 28 Nov 2017 15:53:20 +0100 Subject: [PATCH 3/6] Makes selection of symbol coherent with the chosen unit --- R/formatters.r | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/formatters.r b/R/formatters.r index f738d09..a20089b 100644 --- a/R/formatters.r +++ b/R/formatters.r @@ -45,9 +45,16 @@ bytes <- function (x, symbol = 'auto', units = c('binary', 'si'), only_highest = FALSE) { bin_names <- c("bytes", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb", "Yb") si_names <- c("bytes", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB") - symbol <- match.arg(symbol, unique(c("auto", bin_names, toupper(bin_names), - si_names)), several.ok = TRUE) units <- match.arg(units, c("binary", "si")) + valid_names <- c('auto', if (units == 'binary') { + c(bin_names, toupper(bin_names)) + } else { + si_names + }) + symbol <- valid_names[pmatch(symbol, valid_names, duplicates.ok = TRUE)] + if (any(is.na(symbol))) { + stop(gettextf('Symbol must be one of %s', paste(dQuote(valid_names), collapse = ', '))) + } base <- switch(units, binary = 1024, si = 1000) out_names <- switch(units, binary = bin_names, si = si_names) sym_len <- length(symbol) @@ -70,3 +77,4 @@ bytes <- function (x, symbol = 'auto', units = c('binary', 'si'), ifelse(!is.na(x), res, x) } + From 01710b21885d721018b39efcecc9ad366d455442 Mon Sep 17 00:00:00 2001 From: Tarcisio Date: Tue, 28 Nov 2017 15:58:42 +0100 Subject: [PATCH 4/6] Make quote type uniform --- R/formatters.r | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/formatters.r b/R/formatters.r index a20089b..39b2026 100644 --- a/R/formatters.r +++ b/R/formatters.r @@ -41,19 +41,19 @@ Gb <- byte_format("Gb", "binary") #' @export #' @rdname byte_format -bytes <- function (x, symbol = 'auto', units = c('binary', 'si'), +bytes <- function (x, symbol = "auto", units = c("binary", "si"), only_highest = FALSE) { bin_names <- c("bytes", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb", "Yb") si_names <- c("bytes", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB") units <- match.arg(units, c("binary", "si")) - valid_names <- c('auto', if (units == 'binary') { + valid_names <- c("auto", if (units == "binary") { c(bin_names, toupper(bin_names)) } else { si_names }) symbol <- valid_names[pmatch(symbol, valid_names, duplicates.ok = TRUE)] if (any(is.na(symbol))) { - stop(gettextf('Symbol must be one of %s', paste(dQuote(valid_names), collapse = ', '))) + stop(gettextf("Symbol must be one of %s", paste(dQuote(valid_names), collapse = ", "))) } base <- switch(units, binary = 1024, si = 1000) out_names <- switch(units, binary = bin_names, si = si_names) @@ -64,7 +64,7 @@ bytes <- function (x, symbol = 'auto', units = c('binary', 'si'), sym_len <- inp_len } if (sym_len != inp_len) { - stop('Symbols argument must be either long 1 or of the same length as the input vector.') + stop("Symbols argument must be either long 1 or of the same length as the input vector.") } symbol <- ifelse(symbol == "auto", pmax(floor(log(x, base)), 0), From fd2d973eb2470641a66598e1618eab744518d477 Mon Sep 17 00:00:00 2001 From: Tarcisio Date: Tue, 28 Nov 2017 15:59:01 +0100 Subject: [PATCH 5/6] Adds myself to contributors --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ef9ebbd..f0bea93 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,8 @@ Authors@R: c( person("ProPublica", role="dtc", comment="StateFace font"), person("Aditya", "Kothari", role=c("aut", "ctb"), comment="Core functionality of horizon plots"), person("Ather", role="dtc", comment="Core functionality of horizon plots"), - person("Jonathan","Sidi", role=c("aut","ctb"), comment="Annotation ticks") + person("Jonathan","Sidi", role=c("aut","ctb"), comment="Annotation ticks"), + person("Tarcisio","Fedrizzi", role="ctb", comment="Bytes formatter") ) Description: A compendium of new geometries, coordinate systems, statistical transformations, scales and fonts for 'ggplot2', including splines, 1d and 2d densities, From 419365647d8a6cb97f0f9bfa062e46862a9f5a25 Mon Sep 17 00:00:00 2001 From: Tarcisio Date: Tue, 28 Nov 2017 16:09:10 +0100 Subject: [PATCH 6/6] Fixes documentation typo --- R/formatters.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/formatters.r b/R/formatters.r index 39b2026..5adf8a4 100644 --- a/R/formatters.r +++ b/R/formatters.r @@ -13,7 +13,7 @@ #' @param units which unit base to use, "\code{binary}" (1024 base) or #' "\code{si}" (1000 base) for ISI units. #' @param only_highest Whether to use the unit of the highest number or -#' each number uses its own base. +#' each number uses its own unit. #' @references Units of Information (Wikipedia) : #' \url{http://en.wikipedia.org/wiki/Units_of_information} #' @export