From 7f93de696fa3415feb05ddb284a8c741af208f8c Mon Sep 17 00:00:00 2001 From: hrbrmstr Date: Wed, 1 Jul 2020 11:38:43 -0400 Subject: [PATCH] native --- DESCRIPTION | 8 ++-- NAMESPACE | 3 ++ R/RcppExports.R | 16 ++++++++ R/slugify-package.R | 2 + R/slugify-r.R | 22 +++++++++++ R/zzz.R | 92 ++++++++++++++++++++++++++++++++++++++++++-- README.Rmd | 19 ++++++++- README.md | 45 +++++++++++++++++----- inst/tinytest/test_slugify.R | 18 +++++++++ man/slugify_fast.Rd | 24 ++++++++++++ man/slugify_native.Rd | 18 +++++++++ 11 files changed, 250 insertions(+), 17 deletions(-) create mode 100644 R/RcppExports.R create mode 100644 R/slugify-r.R create mode 100644 man/slugify_fast.Rd create mode 100644 man/slugify_native.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 35f06e9..5ebb601 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: slugify Type: Package Title: Create slug strings for SEO -Version: 0.1.1 -Date: 2020-06-29 +Version: 0.2.0 +Date: 2020-07-01 Authors@R: c( person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5670-2640")) @@ -13,12 +13,14 @@ Description: Most blogging platforms turn title phrases into slugs URL: https://git.rud.is/hrbrmstr/slugify BugReports: https://git.rud.is/hrbrmstr/slugify/issues Encoding: UTF-8 +SystemRequirements: C++11 License: MIT + file LICENSE Suggests: covr, tinytest Depends: R (>= 3.6.0) Imports: - V8 + V8, + stringi Roxygen: list(markdown = TRUE) RoxygenNote: 7.1.0 diff --git a/NAMESPACE b/NAMESPACE index 387c0c9..034547e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,7 @@ # Generated by roxygen2: do not edit by hand export(slugify) +export(slugify_fast) +export(slugify_native) import(V8) +import(stringi) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..c6d1a83 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,16 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#' Much faster string slugifier +#' +#' @param x string to slugify +#' @param repl what to replace whitespace with; defaults to `'` +#' @param lower turn slugified string into lowercase? defaults to `TRUE` +#' @return string +#' @export +#' @examples +#' slugify_basic("R is great!") +slugify_fast <- function(x, repl = "-", lower = TRUE) { + .Call(`_slugify_slugify_fast`, x, repl, lower) +} + diff --git a/R/slugify-package.R b/R/slugify-package.R index 1310f24..f36a103 100644 --- a/R/slugify-package.R +++ b/R/slugify-package.R @@ -7,5 +7,7 @@ #' @name slugify #' @keywords internal #' @author Bob Rudis (bob@@rud.is) +#' @import stringi #' @import V8 "_PACKAGE" + diff --git a/R/slugify-r.R b/R/slugify-r.R new file mode 100644 index 0000000..13cae6b --- /dev/null +++ b/R/slugify-r.R @@ -0,0 +1,22 @@ +`%na%` <- function(a, b) if (is.na(a)) b else a + +#' Native R slugify (with the help of {stringi}) +#' +#' @param x string to slugify +#' @param repl what to replace spaces with +#' @param lower lowercase final output? +#' @export +slugify_native <- function(x, repl = "-", lower = TRUE) { + + x <- stri_replace_all_fixed(x, names(slugify_charmap), slugify_charmap, vectorize_all = FALSE) + x <- stri_replace_all_regex(x, "[^\\P{P}-]", "") + x <- stri_trim_both(x) + x <- stri_replace_all_regex(x, "[[:space:]]+", repl) + + if (lower) (x <- stri_trans_tolower(x)) + + x + +} + + diff --git a/R/zzz.R b/R/zzz.R index 1df0e76..905c94b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,91 @@ +slugify_charmap <- c(`$` = "dollar", `%` = "percent", `&` = "and", `<` = "less", + `>` = "greater", `|` = "or", `¢` = "cent", `£` = "pound", `¤` = "currency", + `¥` = "yen", `©` = "(c)", ª = "a", `®` = "(r)", º = "o", + À = "A", Á = "A",  = "A", à = "A", Ä = "A", Å = "A", Æ = "AE", + Ç = "C", È = "E", É = "E", Ê = "E", Ë = "E", Ì = "I", Í = "I", + Î = "I", Ï = "I", Ð = "D", Ñ = "N", Ò = "O", Ó = "O", Ô = "O", + Õ = "O", Ö = "O", Ø = "O", Ù = "U", Ú = "U", Û = "U", Ü = "U", + Ý = "Y", Þ = "TH", ß = "ss", à = "a", á = "a", â = "a", + ã = "a", ä = "a", å = "a", æ = "ae", ç = "c", è = "e", + é = "e", ê = "e", ë = "e", ì = "i", í = "i", î = "i", ï = "i", + ð = "d", ñ = "n", ò = "o", ó = "o", ô = "o", õ = "o", ö = "o", + ø = "o", ù = "u", ú = "u", û = "u", ü = "u", ý = "y", þ = "th", + ÿ = "y", Ā = "A", ā = "a", Ă = "A", ă = "a", Ą = "A", ą = "a", + Ć = "C", ć = "c", Č = "C", č = "c", Ď = "D", ď = "d", Đ = "DJ", + đ = "dj", Ē = "E", ē = "e", Ė = "E", ė = "e", Ę = "e", + ę = "e", Ě = "E", ě = "e", Ğ = "G", ğ = "g", Ģ = "G", ģ = "g", + Ĩ = "I", ĩ = "i", Ī = "i", ī = "i", Į = "I", į = "i", İ = "I", + ı = "i", Ķ = "k", ķ = "k", Ļ = "L", ļ = "l", Ľ = "L", ľ = "l", + Ł = "L", ł = "l", Ń = "N", ń = "n", Ņ = "N", ņ = "n", Ň = "N", + ň = "n", Ō = "O", ō = "o", Ő = "O", ő = "o", Œ = "OE", + œ = "oe", Ŕ = "R", ŕ = "r", Ř = "R", ř = "r", Ś = "S", + ś = "s", Ş = "S", ş = "s", Š = "S", š = "s", Ţ = "T", ţ = "t", + Ť = "T", ť = "t", Ũ = "U", ũ = "u", Ū = "u", ū = "u", Ů = "U", + ů = "u", Ű = "U", ű = "u", Ų = "U", ų = "u", Ŵ = "W", ŵ = "w", + Ŷ = "Y", ŷ = "y", Ÿ = "Y", Ź = "Z", ź = "z", Ż = "Z", ż = "z", + Ž = "Z", ž = "z", ƒ = "f", Ơ = "O", ơ = "o", Ư = "U", ư = "u", + Lj = "LJ", lj = "lj", Nj = "NJ", nj = "nj", Ș = "S", ș = "s", + Ț = "T", ț = "t", `˚` = "o", Ά = "A", Έ = "E", Ή = "H", + Ί = "I", Ό = "O", Ύ = "Y", Ώ = "W", ΐ = "i", Α = "A", Β = "B", + Γ = "G", Δ = "D", Ε = "E", Ζ = "Z", Η = "H", Θ = "8", Ι = "I", + Κ = "K", Λ = "L", Μ = "M", Ν = "N", Ξ = "3", Ο = "O", Π = "P", + Ρ = "R", Σ = "S", Τ = "T", Υ = "Y", Φ = "F", Χ = "X", Ψ = "PS", + Ω = "W", Ϊ = "I", Ϋ = "Y", ά = "a", έ = "e", ή = "h", ί = "i", + ΰ = "y", α = "a", β = "b", γ = "g", δ = "d", ε = "e", ζ = "z", + η = "h", θ = "8", ι = "i", κ = "k", λ = "l", μ = "m", ν = "n", + ξ = "3", ο = "o", π = "p", ρ = "r", ς = "s", σ = "s", τ = "t", + υ = "y", φ = "f", χ = "x", ψ = "ps", ω = "w", ϊ = "i", + ϋ = "y", ό = "o", ύ = "y", ώ = "w", Ё = "Yo", Ђ = "DJ", + Є = "Ye", І = "I", Ї = "Yi", Ј = "J", Љ = "LJ", Њ = "NJ", + Ћ = "C", Џ = "DZ", А = "A", Б = "B", В = "V", Г = "G", + Д = "D", Е = "E", Ж = "Zh", З = "Z", И = "I", Й = "J", + К = "K", Л = "L", М = "M", Н = "N", О = "O", П = "P", Р = "R", + С = "S", Т = "T", У = "U", Ф = "F", Х = "H", Ц = "C", Ч = "Ch", + Ш = "Sh", Щ = "Sh", Ъ = "U", Ы = "Y", Ь = "", Э = "E", + Ю = "Yu", Я = "Ya", а = "a", б = "b", в = "v", г = "g", + д = "d", е = "e", ж = "zh", з = "z", и = "i", й = "j", + к = "k", л = "l", м = "m", н = "n", о = "o", п = "p", р = "r", + с = "s", т = "t", у = "u", ф = "f", х = "h", ц = "c", ч = "ch", + ш = "sh", щ = "sh", ъ = "u", ы = "y", ь = "", э = "e", + ю = "yu", я = "ya", ё = "yo", ђ = "dj", є = "ye", і = "i", + ї = "yi", ј = "j", љ = "lj", њ = "nj", ћ = "c", ѝ = "u", + џ = "dz", Ґ = "G", ґ = "g", Ғ = "GH", ғ = "gh", Қ = "KH", + қ = "kh", Ң = "NG", ң = "ng", Ү = "UE", ү = "ue", Ұ = "U", + ұ = "u", Һ = "H", һ = "h", Ә = "AE", ә = "ae", Ө = "OE", + ө = "oe", `฿` = "baht", ა = "a", ბ = "b", გ = "g", დ = "d", + ე = "e", ვ = "v", ზ = "z", თ = "t", ი = "i", კ = "k", + ლ = "l", მ = "m", ნ = "n", ო = "o", პ = "p", ჟ = "zh", + რ = "r", ს = "s", ტ = "t", უ = "u", ფ = "f", ქ = "k", + ღ = "gh", ყ = "q", შ = "sh", ჩ = "ch", ც = "ts", ძ = "dz", + წ = "ts", ჭ = "ch", ხ = "kh", ჯ = "j", ჰ = "h", Ẁ = "W", + ẁ = "w", Ẃ = "W", ẃ = "w", Ẅ = "W", ẅ = "w", `ẞ` = "SS", + Ạ = "A", ạ = "a", Ả = "A", ả = "a", Ấ = "A", ấ = "a", + Ầ = "A", ầ = "a", Ẩ = "A", ẩ = "a", Ẫ = "A", ẫ = "a", + Ậ = "A", ậ = "a", Ắ = "A", ắ = "a", Ằ = "A", ằ = "a", + Ẳ = "A", ẳ = "a", Ẵ = "A", ẵ = "a", Ặ = "A", ặ = "a", + Ẹ = "E", ẹ = "e", Ẻ = "E", ẻ = "e", Ẽ = "E", ẽ = "e", + Ế = "E", ế = "e", Ề = "E", ề = "e", Ể = "E", ể = "e", + Ễ = "E", ễ = "e", Ệ = "E", ệ = "e", Ỉ = "I", ỉ = "i", + Ị = "I", ị = "i", Ọ = "O", ọ = "o", Ỏ = "O", ỏ = "o", + Ố = "O", ố = "o", Ồ = "O", ồ = "o", Ổ = "O", ổ = "o", + Ỗ = "O", ỗ = "o", Ộ = "O", ộ = "o", Ớ = "O", ớ = "o", + Ờ = "O", ờ = "o", Ở = "O", ở = "o", Ỡ = "O", ỡ = "o", + Ợ = "O", ợ = "o", Ụ = "U", ụ = "u", Ủ = "U", ủ = "u", + Ứ = "U", ứ = "u", Ừ = "U", ừ = "u", Ử = "U", ử = "u", + Ữ = "U", ữ = "u", Ự = "U", ự = "u", Ỳ = "Y", ỳ = "y", + Ỵ = "Y", ỵ = "y", Ỷ = "Y", ỷ = "y", Ỹ = "Y", ỹ = "y", + `‘` = "'", `’` = "'", `“` = "\\\"", `”` = "\\\"", `†` = "+", + `•` = "*", `…` = "...", `₠` = "ecu", `₢` = "cruzeiro", + `₣` = "french franc", `₤` = "lira", `₥` = "mill", `₦` = "naira", + `₧` = "peseta", `₨` = "rupee", `₩` = "won", `₪` = "new shequel", + `₫` = "dong", `€` = "euro", `₭` = "kip", `₮` = "tugrik", + `₯` = "drachma", `₰` = "penny", `₱` = "peso", `₲` = "guarani", + `₳` = "austral", `₴` = "hryvnia", `₵` = "cedi", `₸` = "kazakhstani tenge", + `₹` = "indian rupee", `₺` = "turkish lira", `₽` = "russian ruble", + `₿` = "bitcoin", `℠` = "sm", `™` = "tm", `∂` = "d", `∆` = "delta", + `∑` = "sum", `∞` = "infinity", `♥` = "love", 元 = "yuan", + 円 = "yen", `﷼` = "rial") + .pkgenv <- new.env(parent=emptyenv()) .onLoad <- function(...) { @@ -7,6 +95,4 @@ assign("ctx", ctx, envir=.pkgenv) - cache <- list() - -} \ No newline at end of file +} diff --git a/README.Rmd b/README.Rmd index b1a2ef2..7476f06 100644 --- a/README.Rmd +++ b/README.Rmd @@ -42,13 +42,28 @@ packageVersion("slugify") ``` ```{r ex-01} -slugify("R is great!") +slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET") -slugify("R is great!", replacement = "@@") +slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET", replacement = "@") slugify("R is great!", remove = "/[Rr]/g") ``` +```{r ex-02} +slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET") + +slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET", repl = "@") +``` + +```{r bench} +microbenchmark::microbenchmark( + slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET"), + slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET"), + control = list(warmup = 10) +) +``` + + ## slugify Metrics ```{r cloc, echo=FALSE} diff --git a/README.md b/README.md index 626e22b..bbcfaf6 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,10 @@ Tools are provided to turn phrases into slugs. The following functions are implemented: + - `slugify_fast`: Much faster string slugifier + + - `slugify_native`: Native R slugify (with the help of stringi) + - `slugify`: Slugify a string ## Installation @@ -53,26 +57,49 @@ library(slugify) # current version packageVersion("slugify") -## [1] '0.1.1' +## [1] '0.2.0' ``` ``` r -slugify("R is great!") -## [1] "r-is-great" +slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET") +## [1] "new-package-cdccovidview-to-work-with-the-us-cdcs-new-covid-19-trackers-covidview-and-covid-net" -slugify("R is great!", replacement = "@@") -## [1] "r@@is@@great" +slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET", replacement = "@") +## [1] "new@package@cdccovidview@to@work@with@the@us@cdcs@new@covid19@trackers@covidview@and@covidnet" slugify("R is great!", remove = "/[Rr]/g") ## [1] "is-geat" ``` +``` r +slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET") +## [1] "new-package-cdccovidview-to-work-with-the-us-cdcs-new-covid-19-trackers-covidview-and-covid-net" + +slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET", repl = "@") +## [1] "new@package@cdccovidview@to@work@with@the@us@cdcs@new@covid-19@trackers@covidview@and@covid-net" +``` + +``` r +microbenchmark::microbenchmark( + slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET"), + slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET"), + control = list(warmup = 10) +) +## Unit: microseconds +## expr +## slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET") +## slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET") +## min lq mean median uq max neval +## 402.524 446.394 519.0379 476.3795 561.544 880.638 100 +## 499.746 539.201 690.6867 571.3005 760.809 2282.490 100 +``` + ## slugify Metrics -| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) | -| :--- | -------: | --: | --: | ---: | ----------: | ---: | -------: | ---: | -| R | 4 | 0.8 | 33 | 0.75 | 15 | 0.44 | 25 | 0.45 | -| Rmd | 1 | 0.2 | 11 | 0.25 | 19 | 0.56 | 31 | 0.55 | +| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) | +| :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: | +| R | 6 | 0.86 | 131 | 0.88 | 25 | 0.52 | 43 | 0.55 | +| Rmd | 1 | 0.14 | 18 | 0.12 | 23 | 0.48 | 35 | 0.45 | clock Package Metrics for slugify diff --git a/inst/tinytest/test_slugify.R b/inst/tinytest/test_slugify.R index d1d1cbb..d8750d5 100644 --- a/inst/tinytest/test_slugify.R +++ b/inst/tinytest/test_slugify.R @@ -7,3 +7,21 @@ expect_true(slugify("R is great!", lower = FALSE) == "R-is-great") expect_true(slugify("R is great!", replacement = "@") == "r@is@great") expect_true(slugify("R is great!", remove = "/[Rr]/g") == "is-geat") + +expect_true(slugify_native("R is great!") == "r-is-great") + +expect_true(slugify_native("R is great!", lower = FALSE) == "R-is-great") + +expect_true(slugify_native("R is great!", repl = "@") == "r@is@great") + +expect_true( + slugify("Mining R 4.0.0 Changelog for Nuggets of Gold: #1 stopifnot()") == + "mining-r-400-changelog-for-nuggets-of-gold-1-stopifnot" +) + +expect_true( + slugify_native("Mining R 4.0.0 Changelog for Nuggets of Gold: #1 stopifnot()") == + "mining-r-400-changelog-for-nuggets-of-gold-1-stopifnot" +) + + diff --git a/man/slugify_fast.Rd b/man/slugify_fast.Rd new file mode 100644 index 0000000..1ec1b3d --- /dev/null +++ b/man/slugify_fast.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{slugify_fast} +\alias{slugify_fast} +\title{Much faster string slugifier} +\usage{ +slugify_fast(x, repl = "-", lower = TRUE) +} +\arguments{ +\item{x}{string to slugify} + +\item{repl}{what to replace whitespace with; defaults to \verb{'}} + +\item{lower}{turn slugified string into lowercase? defaults to \code{TRUE}} +} +\value{ +string +} +\description{ +Much faster string slugifier +} +\examples{ +slugify_basic("R is great!") +} diff --git a/man/slugify_native.Rd b/man/slugify_native.Rd new file mode 100644 index 0000000..21d5919 --- /dev/null +++ b/man/slugify_native.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/slugify-r.R +\name{slugify_native} +\alias{slugify_native} +\title{Native R slugify (with the help of {stringi})} +\usage{ +slugify_native(x, repl = "-", lower = TRUE) +} +\arguments{ +\item{x}{string to slugify} + +\item{repl}{what to replace spaces with} + +\item{lower}{lowercase final output?} +} +\description{ +Native R slugify (with the help of {stringi}) +}