Browse Source

native

master
boB Rudis 2 years ago
parent
commit
7f93de696f
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 8
      DESCRIPTION
  2. 3
      NAMESPACE
  3. 16
      R/RcppExports.R
  4. 2
      R/slugify-package.R
  5. 22
      R/slugify-r.R
  6. 92
      R/zzz.R
  7. 19
      README.Rmd
  8. 45
      README.md
  9. 18
      inst/tinytest/test_slugify.R
  10. 24
      man/slugify_fast.Rd
  11. 18
      man/slugify_native.Rd

8
DESCRIPTION

@ -1,8 +1,8 @@
Package: slugify
Type: Package
Title: Create slug strings for SEO
Version: 0.1.1
Date: 2020-06-29
Version: 0.2.0
Date: 2020-07-01
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640"))
@ -13,12 +13,14 @@ Description: Most blogging platforms turn title phrases into slugs
URL: https://git.rud.is/hrbrmstr/slugify
BugReports: https://git.rud.is/hrbrmstr/slugify/issues
Encoding: UTF-8
SystemRequirements: C++11
License: MIT + file LICENSE
Suggests:
covr, tinytest
Depends:
R (>= 3.6.0)
Imports:
V8
V8,
stringi
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.0

3
NAMESPACE

@ -1,4 +1,7 @@
# Generated by roxygen2: do not edit by hand
export(slugify)
export(slugify_fast)
export(slugify_native)
import(V8)
import(stringi)

16
R/RcppExports.R

@ -0,0 +1,16 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#' Much faster string slugifier
#'
#' @param x string to slugify
#' @param repl what to replace whitespace with; defaults to `'`
#' @param lower turn slugified string into lowercase? defaults to `TRUE`
#' @return string
#' @export
#' @examples
#' slugify_basic("R is great!")
slugify_fast <- function(x, repl = "-", lower = TRUE) {
.Call(`_slugify_slugify_fast`, x, repl, lower)
}

2
R/slugify-package.R

@ -7,5 +7,7 @@
#' @name slugify
#' @keywords internal
#' @author Bob Rudis (bob@@rud.is)
#' @import stringi
#' @import V8
"_PACKAGE"

22
R/slugify-r.R

@ -0,0 +1,22 @@
`%na%` <- function(a, b) if (is.na(a)) b else a
#' Native R slugify (with the help of {stringi})
#'
#' @param x string to slugify
#' @param repl what to replace spaces with
#' @param lower lowercase final output?
#' @export
slugify_native <- function(x, repl = "-", lower = TRUE) {
x <- stri_replace_all_fixed(x, names(slugify_charmap), slugify_charmap, vectorize_all = FALSE)
x <- stri_replace_all_regex(x, "[^\\P{P}-]", "")
x <- stri_trim_both(x)
x <- stri_replace_all_regex(x, "[[:space:]]+", repl)
if (lower) (x <- stri_trans_tolower(x))
x
}

92
R/zzz.R

@ -1,3 +1,91 @@
slugify_charmap <- c(`$` = "dollar", `%` = "percent", `&` = "and", `<` = "less",
`>` = "greater", `|` = "or", `¢` = "cent", `£` = "pound", `¤` = "currency",
`¥` = "yen", `©` = "(c)", ª = "a", `®` = "(r)", º = "o",
À = "A", Á = "A", Â = "A", Ã = "A", Ä = "A", Å = "A", Æ = "AE",
Ç = "C", È = "E", É = "E", Ê = "E", Ë = "E", Ì = "I", Í = "I",
Î = "I", Ï = "I", Ð = "D", Ñ = "N", Ò = "O", Ó = "O", Ô = "O",
Õ = "O", Ö = "O", Ø = "O", Ù = "U", Ú = "U", Û = "U", Ü = "U",
Ý = "Y", Þ = "TH", ß = "ss", à = "a", á = "a", â = "a",
ã = "a", ä = "a", å = "a", æ = "ae", ç = "c", è = "e",
é = "e", ê = "e", ë = "e", ì = "i", í = "i", î = "i", ï = "i",
ð = "d", ñ = "n", ò = "o", ó = "o", ô = "o", õ = "o", ö = "o",
ø = "o", ù = "u", ú = "u", û = "u", ü = "u", ý = "y", þ = "th",
ÿ = "y", Ā = "A", ā = "a", Ă = "A", ă = "a", Ą = "A", ą = "a",
Ć = "C", ć = "c", Č = "C", č = "c", Ď = "D", ď = "d", Đ = "DJ",
đ = "dj", Ē = "E", ē = "e", Ė = "E", ė = "e", Ę = "e",
ę = "e", Ě = "E", ě = "e", Ğ = "G", ğ = "g", Ģ = "G", ģ = "g",
Ĩ = "I", ĩ = "i", Ī = "i", ī = "i", Į = "I", į = "i", İ = "I",
ı = "i", Ķ = "k", ķ = "k", Ļ = "L", ļ = "l", Ľ = "L", ľ = "l",
Ł = "L", ł = "l", Ń = "N", ń = "n", Ņ = "N", ņ = "n", Ň = "N",
ň = "n", Ō = "O", ō = "o", Ő = "O", ő = "o", Œ = "OE",
œ = "oe", Ŕ = "R", ŕ = "r", Ř = "R", ř = "r", Ś = "S",
ś = "s", Ş = "S", ş = "s", Š = "S", š = "s", Ţ = "T", ţ = "t",
Ť = "T", ť = "t", Ũ = "U", ũ = "u", Ū = "u", ū = "u", Ů = "U",
ů = "u", Ű = "U", ű = "u", Ų = "U", ų = "u", Ŵ = "W", ŵ = "w",
Ŷ = "Y", ŷ = "y", Ÿ = "Y", Ź = "Z", ź = "z", Ż = "Z", ż = "z",
Ž = "Z", ž = "z", ƒ = "f", Ơ = "O", ơ = "o", Ư = "U", ư = "u",
Lj = "LJ", lj = "lj", Nj = "NJ", nj = "nj", Ș = "S", ș = "s",
Ț = "T", ț = "t", `˚` = "o", Ά = "A", Έ = "E", Ή = "H",
Ί = "I", Ό = "O", Ύ = "Y", Ώ = "W", ΐ = "i", Α = "A", Β = "B",
Γ = "G", Δ = "D", Ε = "E", Ζ = "Z", Η = "H", Θ = "8", Ι = "I",
Κ = "K", Λ = "L", Μ = "M", Ν = "N", Ξ = "3", Ο = "O", Π = "P",
Ρ = "R", Σ = "S", Τ = "T", Υ = "Y", Φ = "F", Χ = "X", Ψ = "PS",
Ω = "W", Ϊ = "I", Ϋ = "Y", ά = "a", έ = "e", ή = "h", ί = "i",
ΰ = "y", α = "a", β = "b", γ = "g", δ = "d", ε = "e", ζ = "z",
η = "h", θ = "8", ι = "i", κ = "k", λ = "l", μ = "m", ν = "n",
ξ = "3", ο = "o", π = "p", ρ = "r", ς = "s", σ = "s", τ = "t",
υ = "y", φ = "f", χ = "x", ψ = "ps", ω = "w", ϊ = "i",
ϋ = "y", ό = "o", ύ = "y", ώ = "w", Ё = "Yo", Ђ = "DJ",
Є = "Ye", І = "I", Ї = "Yi", Ј = "J", Љ = "LJ", Њ = "NJ",
Ћ = "C", Џ = "DZ", А = "A", Б = "B", В = "V", Г = "G",
Д = "D", Е = "E", Ж = "Zh", З = "Z", И = "I", Й = "J",
К = "K", Л = "L", М = "M", Н = "N", О = "O", П = "P", Р = "R",
С = "S", Т = "T", У = "U", Ф = "F", Х = "H", Ц = "C", Ч = "Ch",
Ш = "Sh", Щ = "Sh", Ъ = "U", Ы = "Y", Ь = "", Э = "E",
Ю = "Yu", Я = "Ya", а = "a", б = "b", в = "v", г = "g",
д = "d", е = "e", ж = "zh", з = "z", и = "i", й = "j",
к = "k", л = "l", м = "m", н = "n", о = "o", п = "p", р = "r",
с = "s", т = "t", у = "u", ф = "f", х = "h", ц = "c", ч = "ch",
ш = "sh", щ = "sh", ъ = "u", ы = "y", ь = "", э = "e",
ю = "yu", я = "ya", ё = "yo", ђ = "dj", є = "ye", і = "i",
ї = "yi", ј = "j", љ = "lj", њ = "nj", ћ = "c", ѝ = "u",
џ = "dz", Ґ = "G", ґ = "g", Ғ = "GH", ғ = "gh", Қ = "KH",
қ = "kh", Ң = "NG", ң = "ng", Ү = "UE", ү = "ue", Ұ = "U",
ұ = "u", Һ = "H", һ = "h", Ә = "AE", ә = "ae", Ө = "OE",
ө = "oe", `฿` = "baht",= "a",= "b",= "g",= "d",
= "e",= "v",= "z",= "t",= "i",= "k",
= "l",= "m",= "n",= "o",= "p",= "zh",
= "r",= "s",= "t",= "u",= "f",= "k",
= "gh",= "q",= "sh",= "ch",= "ts",= "dz",
= "ts",= "ch",= "kh",= "j",= "h",= "W",
= "w",= "W",= "w",= "W",= "w", `ẞ` = "SS",
= "A",= "a",= "A",= "a",= "A",= "a",
= "A",= "a",= "A",= "a",= "A",= "a",
= "A",= "a",= "A",= "a",= "A",= "a",
= "A",= "a",= "A",= "a",= "A",= "a",
= "E",= "e",= "E",= "e",= "E",= "e",
= "E", ế = "e",= "E",= "e",= "E",= "e",
= "E",= "e",= "E",= "e",= "I",= "i",
= "I",= "i",= "O",= "o",= "O",= "o",
= "O",= "o",= "O",= "o",= "O",= "o",
= "O",= "o",= "O",= "o",= "O",= "o",
= "O",= "o",= "O",= "o",= "O",= "o",
= "O",= "o",= "U",= "u",= "U",= "u",
= "U",= "u",= "U",= "u",= "U",= "u",
= "U",= "u",= "U",= "u",= "Y",= "y",
= "Y",= "y",= "Y",= "y",= "Y",= "y",
`‘` = "'", `’` = "'", `“` = "\\\"", `”` = "\\\"", `†` = "+",
`•` = "*", `…` = "...", `₠` = "ecu", `₢` = "cruzeiro",
`₣` = "french franc", `₤` = "lira", `₥` = "mill", `₦` = "naira",
`₧` = "peseta", `₨` = "rupee", `₩` = "won", `₪` = "new shequel",
`₫` = "dong", `€` = "euro", `₭` = "kip", `₮` = "tugrik",
`₯` = "drachma", `₰` = "penny", `₱` = "peso", `₲` = "guarani",
`₳` = "austral", `₴` = "hryvnia", `₵` = "cedi", `₸` = "kazakhstani tenge",
`₹` = "indian rupee", `₺` = "turkish lira", `₽` = "russian ruble",
`₿` = "bitcoin", `℠` = "sm", `™` = "tm", `∂` = "d", `∆` = "delta",
`∑` = "sum", `∞` = "infinity", `♥` = "love",= "yuan",
= "yen", `﷼` = "rial")
.pkgenv <- new.env(parent=emptyenv())
.onLoad <- function(...) {
@ -7,6 +95,4 @@
assign("ctx", ctx, envir=.pkgenv)
cache <- list()
}
}

19
README.Rmd

@ -42,13 +42,28 @@ packageVersion("slugify")
```
```{r ex-01}
slugify("R is great!")
slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET")
slugify("R is great!", replacement = "@@")
slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET", replacement = "@")
slugify("R is great!", remove = "/[Rr]/g")
```
```{r ex-02}
slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET")
slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET", repl = "@")
```
```{r bench}
microbenchmark::microbenchmark(
slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET"),
slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET"),
control = list(warmup = 10)
)
```
## slugify Metrics
```{r cloc, echo=FALSE}

45
README.md

@ -25,6 +25,10 @@ Tools are provided to turn phrases into slugs.
The following functions are implemented:
- `slugify_fast`: Much faster string slugifier
- `slugify_native`: Native R slugify (with the help of stringi)
- `slugify`: Slugify a string
## Installation
@ -53,26 +57,49 @@ library(slugify)
# current version
packageVersion("slugify")
## [1] '0.1.1'
## [1] '0.2.0'
```
``` r
slugify("R is great!")
## [1] "r-is-great"
slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET")
## [1] "new-package-cdccovidview-to-work-with-the-us-cdcs-new-covid-19-trackers-covidview-and-covid-net"
slugify("R is great!", replacement = "@@")
## [1] "r@@is@@great"
slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET", replacement = "@")
## [1] "new@package@cdccovidview@to@work@with@the@us@cdcs@new@covid19@trackers@covidview@and@covidnet"
slugify("R is great!", remove = "/[Rr]/g")
## [1] "is-geat"
```
``` r
slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET")
## [1] "new-package-cdccovidview-to-work-with-the-us-cdcs-new-covid-19-trackers-covidview-and-covid-net"
slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET", repl = "@")
## [1] "new@package@cdccovidview@to@work@with@the@us@cdcs@new@covid-19@trackers@covidview@and@covid-net"
```
``` r
microbenchmark::microbenchmark(
slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET"),
slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET"),
control = list(warmup = 10)
)
## Unit: microseconds
## expr
## slugify_native("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET")
## slugify("New Package — {cdccovidview} — To Work with the U.S. CDC’s New COVID-19 Trackers: COVIDView and COVID-NET")
## min lq mean median uq max neval
## 402.524 446.394 519.0379 476.3795 561.544 880.638 100
## 499.746 539.201 690.6867 571.3005 760.809 2282.490 100
```
## slugify Metrics
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :--- | -------: | --: | --: | ---: | ----------: | ---: | -------: | ---: |
| R | 4 | 0.8 | 33 | 0.75 | 15 | 0.44 | 25 | 0.45 |
| Rmd | 1 | 0.2 | 11 | 0.25 | 19 | 0.56 | 31 | 0.55 |
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: |
| R | 6 | 0.86 | 131 | 0.88 | 25 | 0.52 | 43 | 0.55 |
| Rmd | 1 | 0.14 | 18 | 0.12 | 23 | 0.48 | 35 | 0.45 |
clock Package Metrics for slugify

18
inst/tinytest/test_slugify.R

@ -7,3 +7,21 @@ expect_true(slugify("R is great!", lower = FALSE) == "R-is-great")
expect_true(slugify("R is great!", replacement = "@") == "r@is@great")
expect_true(slugify("R is great!", remove = "/[Rr]/g") == "is-geat")
expect_true(slugify_native("R is great!") == "r-is-great")
expect_true(slugify_native("R is great!", lower = FALSE) == "R-is-great")
expect_true(slugify_native("R is great!", repl = "@") == "r@is@great")
expect_true(
slugify("Mining R 4.0.0 Changelog for Nuggets of Gold: #1 stopifnot()") ==
"mining-r-400-changelog-for-nuggets-of-gold-1-stopifnot"
)
expect_true(
slugify_native("Mining R 4.0.0 Changelog for Nuggets of Gold: #1 stopifnot()") ==
"mining-r-400-changelog-for-nuggets-of-gold-1-stopifnot"
)

24
man/slugify_fast.Rd

@ -0,0 +1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{slugify_fast}
\alias{slugify_fast}
\title{Much faster string slugifier}
\usage{
slugify_fast(x, repl = "-", lower = TRUE)
}
\arguments{
\item{x}{string to slugify}
\item{repl}{what to replace whitespace with; defaults to \verb{'}}
\item{lower}{turn slugified string into lowercase? defaults to \code{TRUE}}
}
\value{
string
}
\description{
Much faster string slugifier
}
\examples{
slugify_basic("R is great!")
}

18
man/slugify_native.Rd

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/slugify-r.R
\name{slugify_native}
\alias{slugify_native}
\title{Native R slugify (with the help of {stringi})}
\usage{
slugify_native(x, repl = "-", lower = TRUE)
}
\arguments{
\item{x}{string to slugify}
\item{repl}{what to replace spaces with}
\item{lower}{lowercase final output?}
}
\description{
Native R slugify (with the help of {stringi})
}
Loading…
Cancel
Save