diff --git a/README.md b/README.md index 93039c4..9ec7384 100644 --- a/README.md +++ b/README.md @@ -83,10 +83,14 @@ The following functions are implemented: ``` r install.packages("ulid", repos = "https://cinc.rud.is") # or +devtools::install_git("https://git.rud.is/hrbrmstr/ulid.git") +# or devtools::install_git("https://git.sr.ht/~hrbrmstr/ulid") # or devtools::install_gitlab("hrbrmstr/ulid") # or +devtools::install_bitbucket("hrbrmstr/ulid") +# or devtools::install_github("hrbrmstr/ulid") ``` @@ -107,7 +111,7 @@ packageVersion("ulid") ulid::ULIDgenerate() ``` - ## [1] "0001EHX06QWTHZTAT6TE2GVAVT" + ## [1] "0001EHX0NG5HMT3JM7FPPX3VE4" ### Many @@ -115,11 +119,11 @@ ulid::ULIDgenerate() (u <- ulid::ULIDgenerate(20)) ``` - ## [1] "0001EHX06Q1C0H9NQPZQW7YN65" "0001EHX06QN1D7PYNDZ33B7QF1" "0001EHX06QZHV13RGNDWAJ7VX9" "0001EHX06Q2XC40QWY7AFR8DCZ" - ## [5] "0001EHX06QXWZRT9EJ1YM75214" "0001EHX06QYDSTG3KRWSKG01EE" "0001EHX06QNC8YWTX0H2M7HHYQ" "0001EHX06QFPBJWQ1PAQ1KXTJY" - ## [9] "0001EHX06Q6HW1WE5GP8Q9J6D6" "0001EHX06Q3QEY4KF2DX0FKTFB" "0001EHX06QS3FDSPSHW2W7AB7X" "0001EHX06QS2VENPTAADWYKAQW" - ## [13] "0001EHX06QQJ7XEK9ZDT5542SD" "0001EHX06QHWVRKRF2SC86KB3Z" "0001EHX06QKG0V1129FBA0A5XY" "0001EHX06QY66KTA09CB9KKQP8" - ## [17] "0001EHX06QZ9VMBDT8YZMXK1B5" "0001EHX06QJ60ZFVA45DFAGB5R" "0001EHX06QC0CXK917QFWXA71M" "0001EHX06QBW2TTWPDAFG23J7E" + ## [1] "0001EHX0NGZF5WY8FPK8ZAKSZW" "0001EHX0NG8BFAJMM5WY9TANC3" "0001EHX0NGNW3339V2AC502DZT" "0001EHX0NGJDXCHBH9H6FRT2Z2" + ## [5] "0001EHX0NGVEBX1HWE0YJDTHQF" "0001EHX0NG6XQBHZ6W8WEDB327" "0001EHX0NGPKVDNQRD11MEZB0A" "0001EHX0NGKYPGCBZ4254AW24R" + ## [9] "0001EHX0NGRCVC3AD2HMCDS0SK" "0001EHX0NGDWNZV19PFS0NKFS0" "0001EHX0NGKYV3BSBJF55DDSZ1" "0001EHX0NG7C16T3C373PD72K6" + ## [13] "0001EHX0NG915080RV94QRNMJG" "0001EHX0NGFTD707BDMVJR7HQE" "0001EHX0NGYVAG9SVHVDGCZMWC" "0001EHX0NGYCDAPTJSCGAF0DQ8" + ## [17] "0001EHX0NGG8MTT1RVSCP20VCG" "0001EHX0NGQBM57RVGW82WN2JA" "0001EHX0NGBKRM0DNC66RAMWT9" "0001EHX0NGA30VQ2FKTC2VBZFH" ### Unmarshal @@ -128,26 +132,26 @@ unmarshal(u) ``` ## ts rnd - ## 1 2019-07-04 18:42:31 1C0H9NQPZQW7YN65 - ## 2 2019-07-04 18:42:31 N1D7PYNDZ33B7QF1 - ## 3 2019-07-04 18:42:31 ZHV13RGNDWAJ7VX9 - ## 4 2019-07-04 18:42:31 2XC40QWY7AFR8DCZ - ## 5 2019-07-04 18:42:31 XWZRT9EJ1YM75214 - ## 6 2019-07-04 18:42:31 YDSTG3KRWSKG01EE - ## 7 2019-07-04 18:42:31 NC8YWTX0H2M7HHYQ - ## 8 2019-07-04 18:42:31 FPBJWQ1PAQ1KXTJY - ## 9 2019-07-04 18:42:31 6HW1WE5GP8Q9J6D6 - ## 10 2019-07-04 18:42:31 3QEY4KF2DX0FKTFB - ## 11 2019-07-04 18:42:31 S3FDSPSHW2W7AB7X - ## 12 2019-07-04 18:42:31 S2VENPTAADWYKAQW - ## 13 2019-07-04 18:42:31 QJ7XEK9ZDT5542SD - ## 14 2019-07-04 18:42:31 HWVRKRF2SC86KB3Z - ## 15 2019-07-04 18:42:31 KG0V1129FBA0A5XY - ## 16 2019-07-04 18:42:31 Y66KTA09CB9KKQP8 - ## 17 2019-07-04 18:42:31 Z9VMBDT8YZMXK1B5 - ## 18 2019-07-04 18:42:31 J60ZFVA45DFAGB5R - ## 19 2019-07-04 18:42:31 C0CXK917QFWXA71M - ## 20 2019-07-04 18:42:31 BW2TTWPDAFG23J7E + ## 1 2019-07-04 18:50:24 ZF5WY8FPK8ZAKSZW + ## 2 2019-07-04 18:50:24 8BFAJMM5WY9TANC3 + ## 3 2019-07-04 18:50:24 NW3339V2AC502DZT + ## 4 2019-07-04 18:50:24 JDXCHBH9H6FRT2Z2 + ## 5 2019-07-04 18:50:24 VEBX1HWE0YJDTHQF + ## 6 2019-07-04 18:50:24 6XQBHZ6W8WEDB327 + ## 7 2019-07-04 18:50:24 PKVDNQRD11MEZB0A + ## 8 2019-07-04 18:50:24 KYPGCBZ4254AW24R + ## 9 2019-07-04 18:50:24 RCVC3AD2HMCDS0SK + ## 10 2019-07-04 18:50:24 DWNZV19PFS0NKFS0 + ## 11 2019-07-04 18:50:24 KYV3BSBJF55DDSZ1 + ## 12 2019-07-04 18:50:24 7C16T3C373PD72K6 + ## 13 2019-07-04 18:50:24 915080RV94QRNMJG + ## 14 2019-07-04 18:50:24 FTD707BDMVJR7HQE + ## 15 2019-07-04 18:50:24 YVAG9SVHVDGCZMWC + ## 16 2019-07-04 18:50:24 YCDAPTJSCGAF0DQ8 + ## 17 2019-07-04 18:50:24 G8MTT1RVSCP20VCG + ## 18 2019-07-04 18:50:24 QBM57RVGW82WN2JA + ## 19 2019-07-04 18:50:24 BKRM0DNC66RAMWT9 + ## 20 2019-07-04 18:50:24 A30VQ2FKTC2VBZFH ### Use defined timestamps @@ -155,14 +159,14 @@ unmarshal(u) (ut <- ts_generate(as.POSIXct("2017-11-01 15:00:00", origin="1970-01-01"))) ``` - ## [1] "0001CZM6DGVRVF68B8AP8VATS0" + ## [1] "0001CZM6DG02A5QRDFBXV6W9CN" ``` r unmarshal(ut) ``` ## ts rnd - ## 1 2017-11-01 15:00:00 VRVF68B8AP8VATS0 + ## 1 2017-11-01 15:00:00 02A5QRDFBXV6W9CN ## Package Code Metrics @@ -170,9 +174,9 @@ unmarshal(ut) cloc::cloc_pkg_md() ``` -| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) | -| :----------- | -------: | --: | --: | ---: | ----------: | ---: | -------: | ---: | -| C/C++ Header | 3 | 0.3 | 763 | 0.87 | 238 | 0.78 | 302 | 0.60 | -| C++ | 2 | 0.2 | 87 | 0.10 | 22 | 0.07 | 37 | 0.07 | -| R | 4 | 0.4 | 15 | 0.02 | 7 | 0.02 | 94 | 0.19 | -| Rmd | 1 | 0.1 | 10 | 0.01 | 38 | 0.12 | 73 | 0.14 | +| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) | +| :----------- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: | +| C/C++ Header | 3 | 0.27 | 763 | 0.86 | 238 | 0.72 | 302 | 0.53 | +| C++ | 2 | 0.18 | 87 | 0.10 | 22 | 0.07 | 37 | 0.07 | +| Rmd | 2 | 0.18 | 20 | 0.02 | 65 | 0.20 | 136 | 0.24 | +| R | 4 | 0.36 | 15 | 0.02 | 7 | 0.02 | 94 | 0.17 | diff --git a/vignettes/intro-to-ulid.Rmd b/vignettes/intro-to-ulid.Rmd index bcc7e87..121b782 100644 --- a/vignettes/intro-to-ulid.Rmd +++ b/vignettes/intro-to-ulid.Rmd @@ -14,6 +14,61 @@ knitr::opts_chunk$set( ) ``` +## UUID : Universally Unique Lexicographically Sortable Identifier + +UUID can be suboptimal for many uses-cases because: + +- It isn't the most character efficient way of encoding 128 bits of randomness +- UUID v1/v2 is impractical in many environments, as it requires access to a unique, stable MAC address +- UUID v3/v5 requires a unique seed and produces randomly distributed IDs, which can cause fragmentation in many data structures +- UUID v4 provides no other information than randomness which can cause fragmentation in many data structures + +Instead, herein is proposed ULID: + +```javascript +ulid() // 01ARZ3NDEKTSV4RRFFQ69G5FAV +``` + +- 128-bit compatibility with UUID +- 1.21e+24 unique ULIDs per millisecond +- Lexicographically sortable! +- Canonically encoded as a 26 character string, as opposed to the 36 character UUID +- Uses Crockford's base32 for better efficiency and readability (5 bits per character) +- Case insensitive +- No special characters (URL safe) +- Monotonic sort order (correctly detects and handles the same millisecond) + +``` + 01AN4Z07BY 79KA1307SR9X4MV3 + +|----------| |----------------| + Timestamp Randomness + 48bits 80bits +``` + +### Components + +**Timestamp** +- 48 bit integer +- UNIX-time in milliseconds +- Won't run out of space till the year 10889 AD. + +**Randomness** +- 80 bits +- Cryptographically secure source of randomness, if possible + +### Sorting + +The left-most character must be sorted first, and the right-most character sorted last (lexical order). The default ASCII character set must be used. Within the same millisecond, sort order is not guaranteed. + +## What's Inside The Tin + +The following functions are implemented: + +- `ULIDgenerate` / `generate` / `ulid_generate`: Generate a time-based ULID +- `ts_generate`: Generate ULID from timestamps +- `unmarshal`: Unmarshal a ULID into a data frame with timestamp and random bitstring columns + ```{r setup} library(ulid) ```