From 49db042160495165de30554ec5c57671958374c3 Mon Sep 17 00:00:00 2001 From: boB Rudis Date: Fri, 28 Dec 2018 11:58:41 -0500 Subject: [PATCH] initial commit --- .Rbuildignore | 1 + DESCRIPTION | 29 ++- LICENSE | 2 + LICENSE.md | 21 ++ NAMESPACE | 5 +- R/RcppExports.R | 17 ++ R/ulid-package.R | 34 ++- README.Rmd | 40 +++ README.md | 78 ++++++ man/ULIDgenerate.Rd | 18 ++ man/ulid.Rd | 23 +- src/.gitignore | 3 + src/Makevars | 2 + src/RcppExports.cpp | 28 +++ src/ulid-main.cpp | 24 ++ src/ulid.h | 12 + src/ulid_struct.h | 699 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ulid_uint128.h | 539 ++++++++++++++++++++++++++++++++++++++++ 18 files changed, 1561 insertions(+), 14 deletions(-) create mode 100644 LICENSE create mode 100644 LICENSE.md create mode 100644 R/RcppExports.R create mode 100644 man/ULIDgenerate.Rd create mode 100644 src/.gitignore create mode 100644 src/Makevars create mode 100644 src/RcppExports.cpp create mode 100644 src/ulid-main.cpp create mode 100644 src/ulid.h create mode 100644 src/ulid_struct.h create mode 100644 src/ulid_uint128.h diff --git a/.Rbuildignore b/.Rbuildignore index 451a4dd..c027b43 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,4 @@ +^LICENSE\.md$ ^.*\.Rproj$ ^\.Rproj\.user$ ^\.travis\.yml$ diff --git a/DESCRIPTION b/DESCRIPTION index 145b56d..ae37773 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,24 +1,41 @@ Package: ulid Type: Package -Title: ulid title goes here otherwise CRAN checks fail +Title: Generate Universally Unique Lexicographically Sortable Identifier Version: 0.1.0 Date: 2018-12-28 Authors@R: c( person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), - comment = c(ORCID = "0000-0001-5670-2640")) + comment = c(ORCID = "0000-0001-5670-2640")), + person("suyash", role = c("aut"), + comment = "ULID C++ Port ") ) Maintainer: Bob Rudis -Description: A good description goes here otherwise CRAN checks fail. +Description: Universally unique identifiers ('UUIDs') can be suboptimal for many + uses-cases because they aren't the most character efficient way of encoding + 128 bits of randomness; v1/v2 versions are impractical in many environments, + as they require access to a unique, stable MAC address; v3/v5 versions require + a unique seed and produce randomly distributed IDs, which can cause fragmentation + in many data structures; v4 provides no other information than randomness which + can cause fragmentation in many data structures. 'ULIDs' () + have 128-bit compatibility with 'UUID', 1.21e+24 unique ULIDs per millisecond, + are lexicographically sortable, canonically encoded as a 26 character string, + as opposed to the 36 character 'UUID', use Crockford's 'base32' for better + efficiency and readability (5 bits per character), are case insensitive, + have no special characters (i.e. are 'URL' safe) and have a onotonic sort order + (correctly detects and handles the same millisecond). URL: https://gitlab.com/hrbrmstr/ulid BugReports: https://gitlab.com/hrbrmstr/ulid/issues +SystemRequirements: C++11 +NeedsCompilation: yes Encoding: UTF-8 -License: AGPL +License: MIT + file LICENSE Suggests: testthat, covr Depends: R (>= 3.2.0) Imports: - httr, - jsonlite + Rcpp RoxygenNote: 6.1.1 +LinkingTo: + Rcpp diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e7949b3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2018 +COPYRIGHT HOLDER: Bob Rudis diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..e7946f8 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2018 Bob Rudis + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index 5b4b9ae..99b1727 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,5 @@ # Generated by roxygen2: do not edit by hand -import(httr) -importFrom(jsonlite,fromJSON) +export(ULIDgenerate) +importFrom(Rcpp,sourceCpp) +useDynLib(ulid, .registration = TRUE) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..468cb0a --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,17 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#' Generate ULID +#' +#' [ULIDgenerate()] generates a new []Universally Unique Lexicographically +#' Sortable Identifier](https://github.com/ulid/spec). +#' +#' @md +#' @param n number of id's to generate (default = `1`) +#' @export +#' @examples +#' ULIDgenerate() +ULIDgenerate <- function(n = 1L) { + .Call(`_ulid_ULIDgenerate`, n) +} + diff --git a/R/ulid-package.R b/R/ulid-package.R index 1b13386..ec2dc96 100644 --- a/R/ulid-package.R +++ b/R/ulid-package.R @@ -1,12 +1,36 @@ -#' ... -#' +#' Generate Universally Unique Lexicographically Sortable Identifier +#' +#' (grifted from ) +#' +#' UUID can be suboptimal for many uses-cases because: +#' +#' - It isn't the most character efficient way of encoding 128 bits of randomness +#' - UUID v1/v2 is impractical in many environments, as it requires access to a unique, stable MAC address +#' - UUID v3/v5 requires a unique seed and produces randomly distributed IDs, which can cause fragmentation in many data structures +#' - UUID v4 provides no other information than randomness which can cause fragmentation in many data structures +#' +#' Instead, herein is proposed ULID: +#' +#' ```javascript +#' ulid() // 01ARZ3NDEKTSV4RRFFQ69G5FAV +#' ``` +#' +#' - 128-bit compatibility with UUID +#' - 1.21e+24 unique ULIDs per millisecond +#' - Lexicographically sortable! +#' - Canonically encoded as a 26 character string, as opposed to the 36 character UUID +#' - Uses Crockford's base32 for better efficiency and readability (5 bits per character) +#' - Case insensitive +#' - No special characters (URL safe) +#' - Monotonic sort order (correctly detects and handles the same millisecond) +#' #' - URL: #' - BugReports: -#' +#' #' @md #' @name ulid #' @docType package #' @author Bob Rudis (bob@@rud.is) -#' @import httr -#' @importFrom jsonlite fromJSON +#' @useDynLib ulid, .registration = TRUE +#' @importFrom Rcpp sourceCpp NULL diff --git a/README.Rmd b/README.Rmd index da955ee..cf34af4 100644 --- a/README.Rmd +++ b/README.Rmd @@ -4,12 +4,40 @@ output: rmarkdown::github_document # ulid +Universally Unique Lexicographically Sortable Identifier + ## Description +(grifted from ) + +UUID can be suboptimal for many uses-cases because: + +- It isn't the most character efficient way of encoding 128 bits of randomness +- UUID v1/v2 is impractical in many environments, as it requires access to a unique, stable MAC address +- UUID v3/v5 requires a unique seed and produces randomly distributed IDs, which can cause fragmentation in many data structures +- UUID v4 provides no other information than randomness which can cause fragmentation in many data structures + +Instead, herein is proposed ULID: + +```javascript +ulid() // 01ARZ3NDEKTSV4RRFFQ69G5FAV +``` + +- 128-bit compatibility with UUID +- 1.21e+24 unique ULIDs per millisecond +- Lexicographically sortable! +- Canonically encoded as a 26 character string, as opposed to the 36 character UUID +- Uses Crockford's base32 for better efficiency and readability (5 bits per character) +- Case insensitive +- No special characters (URL safe) +- Monotonic sort order (correctly detects and handles the same millisecond) + ## What's Inside The Tin The following functions are implemented: +- `ULIDgenerate`: Generate a time-based ULID + ## Installation ```{r eval=FALSE} @@ -30,3 +58,15 @@ packageVersion("ulid") ``` +### One + +```{r} +ulid::ULIDgenerate() +``` + +### Many + +```{r} +ulid::ULIDgenerate(20) + +``` \ No newline at end of file diff --git a/README.md b/README.md index 4c653dd..c735fe5 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,80 @@ + # ulid +Universally Unique Lexicographically Sortable Identifier + +## Description + +(grifted from ) + +UUID can be suboptimal for many uses-cases because: + + - It isn’t the most character efficient way of encoding 128 bits of + randomness + - UUID v1/v2 is impractical in many environments, as it requires + access to a unique, stable MAC address + - UUID v3/v5 requires a unique seed and produces randomly distributed + IDs, which can cause fragmentation in many data structures + - UUID v4 provides no other information than randomness which can + cause fragmentation in many data structures + +Instead, herein is proposed ULID: + +``` javascript +ulid() // 01ARZ3NDEKTSV4RRFFQ69G5FAV +``` + + - 128-bit compatibility with UUID + - 1.21e+24 unique ULIDs per millisecond + - Lexicographically sortable\! + - Canonically encoded as a 26 character string, as opposed to the 36 + character UUID + - Uses Crockford’s base32 for better efficiency and readability (5 + bits per character) + - Case insensitive + - No special characters (URL safe) + - Monotonic sort order (correctly detects and handles the same + millisecond) + +## What’s Inside The Tin + +The following functions are implemented: + + - `ULIDgenerate`: Generate a time-based ULID + +## Installation + +``` r +devtools::install_github("hrbrmstr/ulid") +``` + +## Usage + +``` r +library(ulid) + +# current verison +packageVersion("ulid") +``` + + ## [1] '0.1.0' + +### One + +``` r +ulid::ULIDgenerate() +``` + + ## [1] "0001E2CNHEB71VBCP6Y1M3RAGR" + +### Many + +``` r +ulid::ULIDgenerate(20) +``` + + ## [1] "0001E2CNHEVGN75257V2TZBSBD" "0001E2CNHE7WRWS5HB52XHCK0X" "0001E2CNHEATMJ0TNJK0D7R9NV" "0001E2CNHE3GS9WDYF9V45YRVG" + ## [5] "0001E2CNHEJ4NZPP9TB4K53Z8B" "0001E2CNHE383ZMKDRPNR8N9B0" "0001E2CNHEYPRFKC6BBXXGZ8TK" "0001E2CNHEQK1M8FZKDGPG32TS" + ## [9] "0001E2CNHECVSY4VD69NMT8JNK" "0001E2CNHEEVPNZTXZ9QA66DRK" "0001E2CNHESEDDC41ETPJJWN8C" "0001E2CNHEZ4D0D9XGAV2AK5TA" + ## [13] "0001E2CNHEZF13W414M0AKYKX8" "0001E2CNHEVKZ4WQZTFH4FBX4D" "0001E2CNHE69545C9DN9A29EP8" "0001E2CNHEJMDHYHFW3FBTZG5X" + ## [17] "0001E2CNHEN9EZ4HHZCS3NQ9C0" "0001E2CNHEWR1SWBEPG69PDHXN" "0001E2CNHEVEPE5P2K5DNHB6CT" "0001E2CNHE8WX898BTPG3EJMQN" diff --git a/man/ULIDgenerate.Rd b/man/ULIDgenerate.Rd new file mode 100644 index 0000000..5b89246 --- /dev/null +++ b/man/ULIDgenerate.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{ULIDgenerate} +\alias{ULIDgenerate} +\title{Generate ULID} +\usage{ +ULIDgenerate(n = 1L) +} +\arguments{ +\item{n}{number of id's to generate (default = \code{1})} +} +\description{ +\code{\link[=ULIDgenerate]{ULIDgenerate()}} generates a new []Universally Unique Lexicographically +Sortable Identifier](https://github.com/ulid/spec). +} +\examples{ +ULIDgenerate() +} diff --git a/man/ulid.Rd b/man/ulid.Rd index c8ec41c..a5f156f 100644 --- a/man/ulid.Rd +++ b/man/ulid.Rd @@ -4,9 +4,30 @@ \name{ulid} \alias{ulid} \alias{ulid-package} -\title{...} +\title{Generate Universally Unique Lexicographically Sortable Identifier} \description{ +(grifted from \url{https://github.com/ulid/spec}) +} +\details{ +UUID can be suboptimal for many uses-cases because: +\itemize{ +\item It isn't the most character efficient way of encoding 128 bits of randomness +\item UUID v1/v2 is impractical in many environments, as it requires access to a unique, stable MAC address +\item UUID v3/v5 requires a unique seed and produces randomly distributed IDs, which can cause fragmentation in many data structures +\item UUID v4 provides no other information than randomness which can cause fragmentation in many data structures +} + +Instead, herein is proposed ULID:\preformatted{ulid() // 01ARZ3NDEKTSV4RRFFQ69G5FAV +} \itemize{ +\item 128-bit compatibility with UUID +\item 1.21e+24 unique ULIDs per millisecond +\item Lexicographically sortable! +\item Canonically encoded as a 26 character string, as opposed to the 36 character UUID +\item Uses Crockford's base32 for better efficiency and readability (5 bits per character) +\item Case insensitive +\item No special characters (URL safe) +\item Monotonic sort order (correctly detects and handles the same millisecond) \item URL: \url{https://gitlab.com/hrbrmstr/ulid} \item BugReports: \url{https://gitlab.com/hrbrmstr/ulid/issues} } diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..22034c4 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..e4e2cca --- /dev/null +++ b/src/Makevars @@ -0,0 +1,2 @@ +CXX_STD = CXX11 +PKG_LIBS = -L. -lz -lpthread -pthread -std=c++11 diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..5ce21d8 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,28 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// ULIDgenerate +CharacterVector ULIDgenerate(long n); +RcppExport SEXP _ulid_ULIDgenerate(SEXP nSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< long >::type n(nSEXP); + rcpp_result_gen = Rcpp::wrap(ULIDgenerate(n)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_ulid_ULIDgenerate", (DL_FUNC) &_ulid_ULIDgenerate, 1}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_ulid(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/ulid-main.cpp b/src/ulid-main.cpp new file mode 100644 index 0000000..7cd84e3 --- /dev/null +++ b/src/ulid-main.cpp @@ -0,0 +1,24 @@ +#include + +#include "ulid.h" + +using namespace Rcpp; + +//' Generate ULID +//' +//' [ULIDgenerate()] generates a new []Universally Unique Lexicographically +//' Sortable Identifier](https://github.com/ulid/spec). +//' +//' @md +//' @param n number of id's to generate (default = `1`) +//' @export +//' @examples +//' ULIDgenerate() +// [[Rcpp::export]] +CharacterVector ULIDgenerate(long n=1) { + CharacterVector c(n); + for (long i=0; i<=(n-1); i++) { + c[i] = ulid::Marshal(ulid::CreateNowRand()); + } + return(c); +} diff --git a/src/ulid.h b/src/ulid.h new file mode 100644 index 0000000..d07be99 --- /dev/null +++ b/src/ulid.h @@ -0,0 +1,12 @@ +#pragma once + +// http://stackoverflow.com/a/23981011 +#ifdef __SIZEOF_INT128__ +#define ULIDUINT128 +#endif + +#ifdef ULIDUINT128 +#include "ulid_uint128.h" +#else +#include "ulid_struct.h" +#endif // ULIDUINT128 diff --git a/src/ulid_struct.h b/src/ulid_struct.h new file mode 100644 index 0000000..2f53a2d --- /dev/null +++ b/src/ulid_struct.h @@ -0,0 +1,699 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace ulid { + +/** + * ULID is a 16 byte Universally Unique Lexicographically Sortable Identifier + * */ +struct ULID { + uint8_t data[16]; + + ULID() { + // for (int i = 0 ; i < 16 ; i++) { + // data[i] = 0; + // } + + // unrolled loop + data[0] = 0; + data[1] = 0; + data[2] = 0; + data[3] = 0; + data[4] = 0; + data[5] = 0; + data[6] = 0; + data[7] = 0; + data[8] = 0; + data[9] = 0; + data[10] = 0; + data[11] = 0; + data[12] = 0; + data[13] = 0; + data[14] = 0; + data[15] = 0; + } + + ULID(uint64_t val) { + // for (int i = 0 ; i < 16 ; i++) { + // data[15 - i] = static_cast(val); + // val >>= 8; + // } + + // unrolled loop + data[15] = static_cast(val); + + val >>= 8; + data[14] = static_cast(val); + + val >>= 8; + data[13] = static_cast(val); + + val >>= 8; + data[12] = static_cast(val); + + val >>= 8; + data[11] = static_cast(val); + + val >>= 8; + data[10] = static_cast(val); + + val >>= 8; + data[9] = static_cast(val); + + val >>= 8; + data[8] = static_cast(val); + + data[7] = 0; + data[6] = 0; + data[5] = 0; + data[4] = 0; + data[3] = 0; + data[2] = 0; + data[1] = 0; + data[0] = 0; + } + + ULID(const ULID& other) { + // for (int i = 0 ; i < 16 ; i++) { + // data[i] = other.data[i]; + // } + + // unrolled loop + data[0] = other.data[0]; + data[1] = other.data[1]; + data[2] = other.data[2]; + data[3] = other.data[3]; + data[4] = other.data[4]; + data[5] = other.data[5]; + data[6] = other.data[6]; + data[7] = other.data[7]; + data[8] = other.data[8]; + data[9] = other.data[9]; + data[10] = other.data[10]; + data[11] = other.data[11]; + data[12] = other.data[12]; + data[13] = other.data[13]; + data[14] = other.data[14]; + data[15] = other.data[15]; + } + + ULID& operator=(const ULID& other) { + // for (int i = 0 ; i < 16 ; i++) { + // data[i] = other.data[i]; + // } + + // unrolled loop + data[0] = other.data[0]; + data[1] = other.data[1]; + data[2] = other.data[2]; + data[3] = other.data[3]; + data[4] = other.data[4]; + data[5] = other.data[5]; + data[6] = other.data[6]; + data[7] = other.data[7]; + data[8] = other.data[8]; + data[9] = other.data[9]; + data[10] = other.data[10]; + data[11] = other.data[11]; + data[12] = other.data[12]; + data[13] = other.data[13]; + data[14] = other.data[14]; + data[15] = other.data[15]; + + return *this; + } + + ULID(ULID&& other) { + // for (int i = 0 ; i < 16 ; i++) { + // data[i] = other.data[i]; + // other.data[i] = 0; + // } + + // unrolled loop + data[0] = other.data[0]; + other.data[0] = 0; + + data[1] = other.data[1]; + other.data[1] = 0; + + data[2] = other.data[2]; + other.data[2] = 0; + + data[3] = other.data[3]; + other.data[3] = 0; + + data[4] = other.data[4]; + other.data[4] = 0; + + data[5] = other.data[5]; + other.data[5] = 0; + + data[6] = other.data[6]; + other.data[6] = 0; + + data[7] = other.data[7]; + other.data[7] = 0; + + data[8] = other.data[8]; + other.data[8] = 0; + + data[9] = other.data[9]; + other.data[9] = 0; + + data[10] = other.data[10]; + other.data[10] = 0; + + data[11] = other.data[11]; + other.data[11] = 0; + + data[12] = other.data[12]; + other.data[12] = 0; + + data[13] = other.data[13]; + other.data[13] = 0; + + data[14] = other.data[14]; + other.data[14] = 0; + + data[15] = other.data[15]; + other.data[15] = 0; + } + + ULID& operator=(ULID&& other) { + // for (int i = 0 ; i < 16 ; i++) { + // data[i] = other.data[i]; + // other.data[i] = 0; + // } + + // unrolled loop + data[0] = other.data[0]; + other.data[0] = 0; + + data[1] = other.data[1]; + other.data[1] = 0; + + data[2] = other.data[2]; + other.data[2] = 0; + + data[3] = other.data[3]; + other.data[3] = 0; + + data[4] = other.data[4]; + other.data[4] = 0; + + data[5] = other.data[5]; + other.data[5] = 0; + + data[6] = other.data[6]; + other.data[6] = 0; + + data[7] = other.data[7]; + other.data[7] = 0; + + data[8] = other.data[8]; + other.data[8] = 0; + + data[9] = other.data[9]; + other.data[9] = 0; + + data[10] = other.data[10]; + other.data[10] = 0; + + data[11] = other.data[11]; + other.data[11] = 0; + + data[12] = other.data[12]; + other.data[12] = 0; + + data[13] = other.data[13]; + other.data[13] = 0; + + data[14] = other.data[14]; + other.data[14] = 0; + + data[15] = other.data[15]; + other.data[15] = 0; + + return *this; + } +}; + +/** + * EncodeTime will encode the first 6 bytes of a uint8_t array to the passed + * timestamp + * */ +void EncodeTime(time_t timestamp, ULID& ulid) { + ulid.data[0] = static_cast(timestamp >> 40); + ulid.data[1] = static_cast(timestamp >> 32); + ulid.data[2] = static_cast(timestamp >> 24); + ulid.data[3] = static_cast(timestamp >> 16); + ulid.data[4] = static_cast(timestamp >> 8); + ulid.data[5] = static_cast(timestamp); +} + +/** + * EncodeTimeNow will encode a ULID using the time obtained using std::time(nullptr) + * */ +void EncodeTimeNow(ULID& ulid) { + EncodeTime(std::time(nullptr), ulid); +} + +/** + * EncodeTimeSystemClockNow will encode a ULID using the time obtained using + * std::chrono::system_clock::now() by taking the timestamp in milliseconds. + * */ +void EncodeTimeSystemClockNow(ULID& ulid) { + auto now = std::chrono::system_clock::now(); + auto ms = std::chrono::duration_cast(now.time_since_epoch()); + EncodeTime(ms.count(), ulid); +} + +/** + * EncodeEntropy will encode the last 10 bytes of the passed uint8_t array with + * the values generated using the passed random number generator. + * */ +void EncodeEntropy(const std::function& rng, ULID& ulid) { + ulid.data[6] = rng(); + ulid.data[7] = rng(); + ulid.data[8] = rng(); + ulid.data[9] = rng(); + ulid.data[10] = rng(); + ulid.data[11] = rng(); + ulid.data[12] = rng(); + ulid.data[13] = rng(); + ulid.data[14] = rng(); + ulid.data[15] = rng(); +} + +/** + * EncodeEntropyRand will encode a ulid using std::rand + * + * std::rand returns values in [0, RAND_MAX] + * */ +void EncodeEntropyRand(ULID& ulid) { + ulid.data[6] = (std::rand() * 255ull) / RAND_MAX; + ulid.data[7] = (std::rand() * 255ull) / RAND_MAX; + ulid.data[8] = (std::rand() * 255ull) / RAND_MAX; + ulid.data[9] = (std::rand() * 255ull) / RAND_MAX; + ulid.data[10] = (std::rand() * 255ull) / RAND_MAX; + ulid.data[11] = (std::rand() * 255ull) / RAND_MAX; + ulid.data[12] = (std::rand() * 255ull) / RAND_MAX; + ulid.data[13] = (std::rand() * 255ull) / RAND_MAX; + ulid.data[14] = (std::rand() * 255ull) / RAND_MAX; + ulid.data[15] = (std::rand() * 255ull) / RAND_MAX; +} + +std::uniform_int_distribution Distribution_0_255(0, 255); + +/** + * EncodeEntropyMt19937 will encode a ulid using std::mt19937 + * + * It also creates a std::uniform_int_distribution to generate values in [0, 255] + * */ +void EncodeEntropyMt19937(std::mt19937& generator, ULID& ulid) { + ulid.data[6] = Distribution_0_255(generator); + ulid.data[7] = Distribution_0_255(generator); + ulid.data[8] = Distribution_0_255(generator); + ulid.data[9] = Distribution_0_255(generator); + ulid.data[10] = Distribution_0_255(generator); + ulid.data[11] = Distribution_0_255(generator); + ulid.data[12] = Distribution_0_255(generator); + ulid.data[13] = Distribution_0_255(generator); + ulid.data[14] = Distribution_0_255(generator); + ulid.data[15] = Distribution_0_255(generator); +} + +/** + * Encode will create an encoded ULID with a timestamp and a generator. + * */ +void Encode(time_t timestamp, const std::function& rng, ULID& ulid) { + EncodeTime(timestamp, ulid); + EncodeEntropy(rng, ulid); +} + +/** + * EncodeNowRand = EncodeTimeNow + EncodeEntropyRand. + * */ +void EncodeNowRand(ULID& ulid) { + EncodeTimeNow(ulid); + EncodeEntropyRand(ulid); +} + +/** + * Create will create a ULID with a timestamp and a generator. + * */ +ULID Create(time_t timestamp, const std::function& rng) { + ULID ulid; + Encode(timestamp, rng, ulid); + return ulid; +} + +/** + * CreateNowRand:EncodeNowRand = Create:Encode. + * */ +ULID CreateNowRand() { + ULID ulid; + EncodeNowRand(ulid); + return ulid; +} + +/** + * Crockford's Base32 + * */ +const char Encoding[33] = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"; + +/** + * MarshalTo will marshal a ULID to the passed character array. + * + * Implementation taken directly from oklog/ulid + * (https://sourcegraph.com/github.com/oklog/ulid@0774f81f6e44af5ce5e91c8d7d76cf710e889ebb/-/blob/ulid.go#L162-190) + * + * timestamp:
+ * dst[0]: first 3 bits of data[0]
+ * dst[1]: last 5 bits of data[0]
+ * dst[2]: first 5 bits of data[1]
+ * dst[3]: last 3 bits of data[1] + first 2 bits of data[2]
+ * dst[4]: bits 3-7 of data[2]
+ * dst[5]: last bit of data[2] + first 4 bits of data[3]
+ * dst[6]: last 4 bits of data[3] + first bit of data[4]
+ * dst[7]: bits 2-6 of data[4]
+ * dst[8]: last 2 bits of data[4] + first 3 bits of data[5]
+ * dst[9]: last 5 bits of data[5]
+ * + * entropy: + * follows similarly, except now all components are set to 5 bits. + * */ +void MarshalTo(const ULID& ulid, char dst[26]) { + // 10 byte timestamp + dst[0] = Encoding[(ulid.data[0] & 224) >> 5]; + dst[1] = Encoding[ulid.data[0] & 31]; + dst[2] = Encoding[(ulid.data[1] & 248) >> 3]; + dst[3] = Encoding[((ulid.data[1] & 7) << 2) | ((ulid.data[2] & 192) >> 6)]; + dst[4] = Encoding[(ulid.data[2] & 62) >> 1]; + dst[5] = Encoding[((ulid.data[2] & 1) << 4) | ((ulid.data[3] & 240) >> 4)]; + dst[6] = Encoding[((ulid.data[3] & 15) << 1) | ((ulid.data[4] & 128) >> 7)]; + dst[7] = Encoding[(ulid.data[4] & 124) >> 2]; + dst[8] = Encoding[((ulid.data[4] & 3) << 3) | ((ulid.data[5] & 224) >> 5)]; + dst[9] = Encoding[ulid.data[5] & 31]; + + // 16 bytes of entropy + dst[10] = Encoding[(ulid.data[6] & 248) >> 3]; + dst[11] = Encoding[((ulid.data[6] & 7) << 2) | ((ulid.data[7] & 192) >> 6)]; + dst[12] = Encoding[(ulid.data[7] & 62) >> 1]; + dst[13] = Encoding[((ulid.data[7] & 1) << 4) | ((ulid.data[8] & 240) >> 4)]; + dst[14] = Encoding[((ulid.data[8] & 15) << 1) | ((ulid.data[9] & 128) >> 7)]; + dst[15] = Encoding[(ulid.data[9] & 124) >> 2]; + dst[16] = Encoding[((ulid.data[9] & 3) << 3) | ((ulid.data[10] & 224) >> 5)]; + dst[17] = Encoding[ulid.data[10] & 31]; + dst[18] = Encoding[(ulid.data[11] & 248) >> 3]; + dst[19] = Encoding[((ulid.data[11] & 7) << 2) | ((ulid.data[12] & 192) >> 6)]; + dst[20] = Encoding[(ulid.data[12] & 62) >> 1]; + dst[21] = Encoding[((ulid.data[12] & 1) << 4) | ((ulid.data[13] & 240) >> 4)]; + dst[22] = Encoding[((ulid.data[13] & 15) << 1) | ((ulid.data[14] & 128) >> 7)]; + dst[23] = Encoding[(ulid.data[14] & 124) >> 2]; + dst[24] = Encoding[((ulid.data[14] & 3) << 3) | ((ulid.data[15] & 224) >> 5)]; + dst[25] = Encoding[ulid.data[15] & 31]; +} + +/** + * Marshal will marshal a ULID to a std::string. + * */ +std::string Marshal(const ULID& ulid) { + char data[27]; + data[26] = '\0'; + MarshalTo(ulid, data); + return std::string(data); +} + +/** + * MarshalBinaryTo will Marshal a ULID to the passed byte array + * */ +void MarshalBinaryTo(const ULID& ulid, uint8_t dst[16]) { + // timestamp + dst[0] = ulid.data[0]; + dst[1] = ulid.data[1]; + dst[2] = ulid.data[2]; + dst[3] = ulid.data[3]; + dst[4] = ulid.data[4]; + dst[5] = ulid.data[5]; + + // entropy + dst[6] = ulid.data[6]; + dst[7] = ulid.data[7]; + dst[8] = ulid.data[8]; + dst[9] = ulid.data[9]; + dst[10] = ulid.data[10]; + dst[11] = ulid.data[11]; + dst[12] = ulid.data[12]; + dst[13] = ulid.data[13]; + dst[14] = ulid.data[14]; + dst[15] = ulid.data[15]; +} + +/** + * MarshalBinary will Marshal a ULID to a byte vector. + * */ +std::vector MarshalBinary(const ULID& ulid) { + std::vector dst(16); + MarshalBinaryTo(ulid, dst.data()); + return dst; +} + +/** + * dec storesdecimal encodings for characters. + * 0xFF indicates invalid character. + * 48-57 are digits. + * 65-90 are capital alphabets. + * */ +const uint8_t dec[256] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + /* 0 1 2 3 4 5 6 7 */ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + /* 8 9 */ + 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + /* 10(A) 11(B) 12(C) 13(D) 14(E) 15(F) 16(G) */ + 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + /*17(H) 18(J) 19(K) 20(M) 21(N) */ + 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, 0x15, 0xFF, + /*22(P)23(Q)24(R) 25(S) 26(T) 27(V) 28(W) */ + 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, + /*29(X)30(Y)31(Z) */ + 0x1D, 0x1E, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +/** + * UnmarshalFrom will unmarshal a ULID from the passed character array. + * */ +void UnmarshalFrom(const char str[26], ULID& ulid) { + // timestamp + ulid.data[0] = (dec[int(str[0])] << 5) | dec[int(str[1])]; + ulid.data[1] = (dec[int(str[2])] << 3) | (dec[int(str[3])] >> 2); + ulid.data[2] = (dec[int(str[3])] << 6) | (dec[int(str[4])] << 1) | (dec[int(str[5])] >> 4); + ulid.data[3] = (dec[int(str[5])] << 4) | (dec[int(str[6])] >> 1); + ulid.data[4] = (dec[int(str[6])] << 7) | (dec[int(str[7])] << 2) | (dec[int(str[8])] >> 3); + ulid.data[5] = (dec[int(str[8])] << 5) | dec[int(str[9])]; + + // entropy + ulid.data[6] = (dec[int(str[10])] << 3) | (dec[int(str[11])] >> 2); + ulid.data[7] = (dec[int(str[11])] << 6) | (dec[int(str[12])] << 1) | (dec[int(str[13])] >> 4); + ulid.data[8] = (dec[int(str[13])] << 4) | (dec[int(str[14])] >> 1); + ulid.data[9] = (dec[int(str[14])] << 7) | (dec[int(str[15])] << 2) | (dec[int(str[16])] >> 3); + ulid.data[10] = (dec[int(str[16])] << 5) | dec[int(str[17])]; + ulid.data[11] = (dec[int(str[18])] << 3) | (dec[int(str[19])] >> 2); + ulid.data[12] = (dec[int(str[19])] << 6) | (dec[int(str[20])] << 1) | (dec[int(str[21])] >> 4); + ulid.data[13] = (dec[int(str[21])] << 4) | (dec[int(str[22])] >> 1); + ulid.data[14] = (dec[int(str[22])] << 7) | (dec[int(str[23])] << 2) | (dec[int(str[24])] >> 3); + ulid.data[15] = (dec[int(str[24])] << 5) | dec[int(str[25])]; +} + +/** + * Unmarshal will create a new ULID by unmarshaling the passed string. + * */ +ULID Unmarshal(const std::string& str) { + ULID ulid; + UnmarshalFrom(str.c_str(), ulid); + return ulid; +} + +/** + * UnmarshalBinaryFrom will unmarshal a ULID from the passed byte array. + * */ +void UnmarshalBinaryFrom(const uint8_t b[16], ULID& ulid) { + // timestamp + ulid.data[0] = b[0]; + ulid.data[1] = b[1]; + ulid.data[2] = b[2]; + ulid.data[3] = b[3]; + ulid.data[4] = b[4]; + ulid.data[5] = b[5]; + + // entropy + ulid.data[6] = b[6]; + ulid.data[7] = b[7]; + ulid.data[8] = b[8]; + ulid.data[9] = b[9]; + ulid.data[10] = b[10]; + ulid.data[11] = b[11]; + ulid.data[12] = b[12]; + ulid.data[13] = b[13]; + ulid.data[14] = b[14]; + ulid.data[15] = b[15]; +} + +/** + * Unmarshal will create a new ULID by unmarshaling the passed byte vector. + * */ +ULID UnmarshalBinary(const std::vector& b) { + ULID ulid; + UnmarshalBinaryFrom(b.data(), ulid); + return ulid; +} + +/** + * CompareULIDs will compare two ULIDs. + * returns: + * -1 if ulid1 is Lexicographically before ulid2 + * 1 if ulid1 is Lexicographically after ulid2 + * 0 if ulid1 is same as ulid2 + * */ +int CompareULIDs(const ULID& ulid1, const ULID& ulid2) { + // for (int i = 0 ; i < 16 ; i++) { + // if (ulid1.data[i] != ulid2.data[i]) { + // return (ulid1.data[i] < ulid2.data[i]) * -2 + 1; + // } + // } + + // unrolled loop + + if (ulid1.data[0] != ulid2.data[0]) { + return (ulid1.data[0] < ulid2.data[0]) * -2 + 1; + } + + if (ulid1.data[1] != ulid2.data[1]) { + return (ulid1.data[1] < ulid2.data[1]) * -2 + 1; + } + + if (ulid1.data[2] != ulid2.data[2]) { + return (ulid1.data[2] < ulid2.data[2]) * -2 + 1; + } + + if (ulid1.data[3] != ulid2.data[3]) { + return (ulid1.data[3] < ulid2.data[3]) * -2 + 1; + } + + if (ulid1.data[4] != ulid2.data[4]) { + return (ulid1.data[4] < ulid2.data[4]) * -2 + 1; + } + + if (ulid1.data[5] != ulid2.data[5]) { + return (ulid1.data[5] < ulid2.data[5]) * -2 + 1; + } + + if (ulid1.data[6] != ulid2.data[6]) { + return (ulid1.data[6] < ulid2.data[6]) * -2 + 1; + } + + if (ulid1.data[7] != ulid2.data[7]) { + return (ulid1.data[7] < ulid2.data[7]) * -2 + 1; + } + + if (ulid1.data[8] != ulid2.data[8]) { + return (ulid1.data[8] < ulid2.data[8]) * -2 + 1; + } + + if (ulid1.data[9] != ulid2.data[9]) { + return (ulid1.data[9] < ulid2.data[9]) * -2 + 1; + } + + if (ulid1.data[10] != ulid2.data[10]) { + return (ulid1.data[10] < ulid2.data[10]) * -2 + 1; + } + + if (ulid1.data[11] != ulid2.data[11]) { + return (ulid1.data[11] < ulid2.data[11]) * -2 + 1; + } + + if (ulid1.data[12] != ulid2.data[12]) { + return (ulid1.data[12] < ulid2.data[12]) * -2 + 1; + } + + if (ulid1.data[13] != ulid2.data[13]) { + return (ulid1.data[13] < ulid2.data[13]) * -2 + 1; + } + + if (ulid1.data[14] != ulid2.data[14]) { + return (ulid1.data[14] < ulid2.data[14]) * -2 + 1; + } + + if (ulid1.data[15] != ulid2.data[15]) { + return (ulid1.data[15] < ulid2.data[15]) * -2 + 1; + } + + return 0; +} + +/** + * Time will extract the timestamp used to generate a ULID + * */ +time_t Time(const ULID& ulid) { + time_t ans = 0; + + ans |= ulid.data[0]; + + ans <<= 8; + ans |= ulid.data[1]; + + ans <<= 8; + ans |= ulid.data[2]; + + ans <<= 8; + ans |= ulid.data[3]; + + ans <<= 8; + ans |= ulid.data[4]; + + ans <<= 8; + ans |= ulid.data[5]; + + return ans; +} + +}; // namespace ulid diff --git a/src/ulid_uint128.h b/src/ulid_uint128.h new file mode 100644 index 0000000..d912994 --- /dev/null +++ b/src/ulid_uint128.h @@ -0,0 +1,539 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace ulid { + +/** + * ULID is a 16 byte Universally Unique Lexicographically Sortable Identifier + * */ +typedef __uint128_t ULID; + +/** + * EncodeTime will encode the first 6 bytes of a uint8_t array to the passed + * timestamp + * */ +void EncodeTime(time_t timestamp, ULID& ulid) { + ULID t = static_cast(timestamp >> 40); + + t <<= 8; + t |= static_cast(timestamp >> 32); + + t <<= 8; + t |= static_cast(timestamp >> 24); + + t <<= 8; + t |= static_cast(timestamp >> 16); + + t <<= 8; + t |= static_cast(timestamp >> 8); + + t <<= 8; + t |= static_cast(timestamp); + + t <<= 80; + + ULID mask = 1; + mask <<= 80; + mask--; + + ulid = t | (ulid & mask); +} + +/** + * EncodeTimeNow will encode a ULID using the time obtained using std::time(nullptr) + * */ +void EncodeTimeNow(ULID& ulid) { + EncodeTime(std::time(nullptr), ulid); +} + +/** + * EncodeTimeSystemClockNow will encode a ULID using the time obtained using + * std::chrono::system_clock::now() by taking the timestamp in milliseconds. + * */ +void EncodeTimeSystemClockNow(ULID& ulid) { + auto now = std::chrono::system_clock::now(); + auto ms = std::chrono::duration_cast(now.time_since_epoch()); + EncodeTime(ms.count(), ulid); +} + +/** + * EncodeEntropy will encode the last 10 bytes of the passed uint8_t array with + * the values generated using the passed random number generator. + * */ +void EncodeEntropy(const std::function& rng, ULID& ulid) { + ulid = (ulid >> 80) << 80; + + ULID e = rng(); + + e <<= 8; + e |= rng(); + + e <<= 8; + e |= rng(); + + e <<= 8; + e |= rng(); + + e <<= 8; + e |= rng(); + + e <<= 8; + e |= rng(); + + e <<= 8; + e |= rng(); + + e <<= 8; + e |= rng(); + + e <<= 8; + e |= rng(); + + e <<= 8; + e |= rng(); + + ulid |= e; +} + +/** + * EncodeEntropyRand will encode a ulid using std::rand + * + * std::rand returns values in [0, RAND_MAX] + * */ +void EncodeEntropyRand(ULID& ulid) { + ulid = (ulid >> 80) << 80; + + ULID e = (std::rand() * 255ull) / RAND_MAX; + + e <<= 8; + e |= (std::rand() * 255ull) / RAND_MAX; + + e <<= 8; + e |= (std::rand() * 255ull) / RAND_MAX; + + e <<= 8; + e |= (std::rand() * 255ull) / RAND_MAX; + + e <<= 8; + e |= (std::rand() * 255ull) / RAND_MAX; + + e <<= 8; + e |= (std::rand() * 255ull) / RAND_MAX; + + e <<= 8; + e |= (std::rand() * 255ull) / RAND_MAX; + + e <<= 8; + e |= (std::rand() * 255ull) / RAND_MAX; + + e <<= 8; + e |= (std::rand() * 255ull) / RAND_MAX; + + e <<= 8; + e |= (std::rand() * 255ull) / RAND_MAX; + + ulid |= e; +} + +std::uniform_int_distribution Distribution_0_255(0, 255); + +/** + * EncodeEntropyMt19937 will encode a ulid using std::mt19937 + * + * It also creates a std::uniform_int_distribution to generate values in [0, 255] + * */ +void EncodeEntropyMt19937(std::mt19937& generator, ULID& ulid) { + ulid = (ulid >> 80) << 80; + + ULID e = Distribution_0_255(generator); + + e <<= 8; + e |= Distribution_0_255(generator); + + e <<= 8; + e |= Distribution_0_255(generator); + + e <<= 8; + e |= Distribution_0_255(generator); + + e <<= 8; + e |= Distribution_0_255(generator); + + e <<= 8; + e |= Distribution_0_255(generator); + + e <<= 8; + e |= Distribution_0_255(generator); + + e <<= 8; + e |= Distribution_0_255(generator); + + e <<= 8; + e |= Distribution_0_255(generator); + + e <<= 8; + e |= Distribution_0_255(generator); + + ulid |= e; +} + +/** + * Encode will create an encoded ULID with a timestamp and a generator. + * */ +void Encode(time_t timestamp, const std::function& rng, ULID& ulid) { + EncodeTime(timestamp, ulid); + EncodeEntropy(rng, ulid); +} + +/** + * EncodeNowRand = EncodeTimeNow + EncodeEntropyRand. + * */ +void EncodeNowRand(ULID& ulid) { + EncodeTimeNow(ulid); + EncodeEntropyRand(ulid); +} + +/** + * Create will create a ULID with a timestamp and a generator. + * */ +ULID Create(time_t timestamp, const std::function& rng) { + ULID ulid = 0; + Encode(timestamp, rng, ulid); + return ulid; +} + +/** + * CreateNowRand:EncodeNowRand = Create:Encode. + * */ +ULID CreateNowRand() { + ULID ulid = 0; + EncodeNowRand(ulid); + return ulid; +} + +/** + * Crockford's Base32 + * */ +const char Encoding[33] = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"; + +/** + * MarshalTo will marshal a ULID to the passed character array. + * + * Implementation taken directly from oklog/ulid + * (https://sourcegraph.com/github.com/oklog/ulid@0774f81f6e44af5ce5e91c8d7d76cf710e889ebb/-/blob/ulid.go#L162-190) + * + * timestamp: + * dst[0]: first 3 bits of data[0] + * dst[1]: last 5 bits of data[0] + * dst[2]: first 5 bits of data[1] + * dst[3]: last 3 bits of data[1] + first 2 bits of data[2] + * dst[4]: bits 3-7 of data[2] + * dst[5]: last bit of data[2] + first 4 bits of data[3] + * dst[6]: last 4 bits of data[3] + first bit of data[4] + * dst[7]: bits 2-6 of data[4] + * dst[8]: last 2 bits of data[4] + first 3 bits of data[5] + * dst[9]: last 5 bits of data[5] + * + * entropy: + * follows similarly, except now all components are set to 5 bits. + * */ +void MarshalTo(const ULID& ulid, char dst[26]) { + // 10 byte timestamp + dst[0] = Encoding[(static_cast(ulid >> 120) & 224) >> 5]; + dst[1] = Encoding[static_cast(ulid >> 120) & 31]; + dst[2] = Encoding[(static_cast(ulid >> 112) & 248) >> 3]; + dst[3] = Encoding[((static_cast(ulid >> 112) & 7) << 2) | ((static_cast(ulid >> 104) & 192) >> 6)]; + dst[4] = Encoding[(static_cast(ulid >> 104) & 62) >> 1]; + dst[5] = Encoding[((static_cast(ulid >> 104) & 1) << 4) | ((static_cast(ulid >> 96) & 240) >> 4)]; + dst[6] = Encoding[((static_cast(ulid >> 96) & 15) << 1) | ((static_cast(ulid >> 88) & 128) >> 7)]; + dst[7] = Encoding[(static_cast(ulid >> 88) & 124) >> 2]; + dst[8] = Encoding[((static_cast(ulid >> 88) & 3) << 3) | ((static_cast(ulid >> 80) & 224) >> 5)]; + dst[9] = Encoding[static_cast(ulid >> 80) & 31]; + + // 16 bytes of entropy + dst[10] = Encoding[(static_cast(ulid >> 72) & 248) >> 3]; + dst[11] = Encoding[((static_cast(ulid >> 72) & 7) << 2) | ((static_cast(ulid >> 64) & 192) >> 6)]; + dst[12] = Encoding[(static_cast(ulid >> 64) & 62) >> 1]; + dst[13] = Encoding[((static_cast(ulid >> 64) & 1) << 4) | ((static_cast(ulid >> 56) & 240) >> 4)]; + dst[14] = Encoding[((static_cast(ulid >> 56) & 15) << 1) | ((static_cast(ulid >> 48) & 128) >> 7)]; + dst[15] = Encoding[(static_cast(ulid >> 48) & 124) >> 2]; + dst[16] = Encoding[((static_cast(ulid >> 48) & 3) << 3) | ((static_cast(ulid >> 40) & 224) >> 5)]; + dst[17] = Encoding[static_cast(ulid >> 40) & 31]; + dst[18] = Encoding[(static_cast(ulid >> 32) & 248) >> 3]; + dst[19] = Encoding[((static_cast(ulid >> 32) & 7) << 2) | ((static_cast(ulid >> 24) & 192) >> 6)]; + dst[20] = Encoding[(static_cast(ulid >> 24) & 62) >> 1]; + dst[21] = Encoding[((static_cast(ulid >> 24) & 1) << 4) | ((static_cast(ulid >> 16) & 240) >> 4)]; + dst[22] = Encoding[((static_cast(ulid >> 16) & 15) << 1) | ((static_cast(ulid >> 8) & 128) >> 7)]; + dst[23] = Encoding[(static_cast(ulid >> 8) & 124) >> 2]; + dst[24] = Encoding[((static_cast(ulid >> 8) & 3) << 3) | (((static_cast(ulid)) & 224) >> 5)]; + dst[25] = Encoding[(static_cast(ulid)) & 31]; +} + +/** + * Marshal will marshal a ULID to a std::string. + * */ +std::string Marshal(const ULID& ulid) { + char data[27]; + data[26] = '\0'; + MarshalTo(ulid, data); + return std::string(data); +} + +/** + * MarshalBinaryTo will Marshal a ULID to the passed byte array + * */ +void MarshalBinaryTo(const ULID& ulid, uint8_t dst[16]) { + // timestamp + dst[0] = static_cast(ulid >> 120); + dst[1] = static_cast(ulid >> 112); + dst[2] = static_cast(ulid >> 104); + dst[3] = static_cast(ulid >> 96); + dst[4] = static_cast(ulid >> 88); + dst[5] = static_cast(ulid >> 80); + + // entropy + dst[6] = static_cast(ulid >> 72); + dst[7] = static_cast(ulid >> 64); + dst[8] = static_cast(ulid >> 56); + dst[9] = static_cast(ulid >> 48); + dst[10] = static_cast(ulid >> 40); + dst[11] = static_cast(ulid >> 32); + dst[12] = static_cast(ulid >> 24); + dst[13] = static_cast(ulid >> 16); + dst[14] = static_cast(ulid >> 8); + dst[15] = static_cast(ulid); +} + +/** + * MarshalBinary will Marshal a ULID to a byte vector. + * */ +std::vector MarshalBinary(const ULID& ulid) { + std::vector dst(16); + MarshalBinaryTo(ulid, dst.data()); + return dst; +} + +/** + * dec storesdecimal encodings for characters. + * 0xFF indicates invalid character. + * 48-57 are digits. + * 65-90 are capital alphabets. + * */ +const uint8_t dec[256] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + /* 0 1 2 3 4 5 6 7 */ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + /* 8 9 */ + 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + /* 10(A) 11(B) 12(C) 13(D) 14(E) 15(F) 16(G) */ + 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + /*17(H) 18(J) 19(K) 20(M) 21(N) */ + 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, 0x15, 0xFF, + /*22(P)23(Q)24(R) 25(S) 26(T) 27(V) 28(W) */ + 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, + /*29(X)30(Y)31(Z) */ + 0x1D, 0x1E, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +/** + * UnmarshalFrom will unmarshal a ULID from the passed character array. + * */ +void UnmarshalFrom(const char str[26], ULID& ulid) { + // timestamp + ulid = (dec[int(str[0])] << 5) | dec[int(str[1])]; + + ulid <<= 8; + ulid |= (dec[int(str[2])] << 3) | (dec[int(str[3])] >> 2); + + ulid <<= 8; + ulid |= (dec[int(str[3])] << 6) | (dec[int(str[4])] << 1) | (dec[int(str[5])] >> 4); + + ulid <<= 8; + ulid |= (dec[int(str[5])] << 4) | (dec[int(str[6])] >> 1); + + ulid <<= 8; + ulid |= (dec[int(str[6])] << 7) | (dec[int(str[7])] << 2) | (dec[int(str[8])] >> 3); + + ulid <<= 8; + ulid |= (dec[int(str[8])] << 5) | dec[int(str[9])]; + + // entropy + ulid <<= 8; + ulid |= (dec[int(str[10])] << 3) | (dec[int(str[11])] >> 2); + + ulid <<= 8; + ulid |= (dec[int(str[11])] << 6) | (dec[int(str[12])] << 1) | (dec[int(str[13])] >> 4); + + ulid <<= 8; + ulid |= (dec[int(str[13])] << 4) | (dec[int(str[14])] >> 1); + + ulid <<= 8; + ulid |= (dec[int(str[14])] << 7) | (dec[int(str[15])] << 2) | (dec[int(str[16])] >> 3); + + ulid <<= 8; + ulid |= (dec[int(str[16])] << 5) | dec[int(str[17])]; + + ulid <<= 8; + ulid |= (dec[int(str[18])] << 3) | (dec[int(str[19])] >> 2); + + ulid <<= 8; + ulid |= (dec[int(str[19])] << 6) | (dec[int(str[20])] << 1) | (dec[int(str[21])] >> 4); + + ulid <<= 8; + ulid |= (dec[int(str[21])] << 4) | (dec[int(str[22])] >> 1); + + ulid <<= 8; + ulid |= (dec[int(str[22])] << 7) | (dec[int(str[23])] << 2) | (dec[int(str[24])] >> 3); + + ulid <<= 8; + ulid |= (dec[int(str[24])] << 5) | dec[int(str[25])]; +} + +/** + * Unmarshal will create a new ULID by unmarshaling the passed string. + * */ +ULID Unmarshal(const std::string& str) { + ULID ulid; + UnmarshalFrom(str.c_str(), ulid); + return ulid; +} + +/** + * UnmarshalBinaryFrom will unmarshal a ULID from the passed byte array. + * */ +void UnmarshalBinaryFrom(const uint8_t b[16], ULID& ulid) { + // timestamp + ulid = b[0]; + + ulid <<= 8; + ulid |= b[1]; + + ulid <<= 8; + ulid |= b[2]; + + ulid <<= 8; + ulid |= b[3]; + + ulid <<= 8; + ulid |= b[4]; + + ulid <<= 8; + ulid |= b[5]; + + // entropy + ulid <<= 8; + ulid |= b[6]; + + ulid <<= 8; + ulid |= b[7]; + + ulid <<= 8; + ulid |= b[8]; + + ulid <<= 8; + ulid |= b[9]; + + ulid <<= 8; + ulid |= b[10]; + + ulid <<= 8; + ulid |= b[11]; + + ulid <<= 8; + ulid |= b[12]; + + ulid <<= 8; + ulid |= b[13]; + + ulid <<= 8; + ulid |= b[14]; + + ulid <<= 8; + ulid |= b[15]; +} + +/** + * Unmarshal will create a new ULID by unmarshaling the passed byte vector. + * */ +ULID UnmarshalBinary(const std::vector& b) { + ULID ulid; + UnmarshalBinaryFrom(b.data(), ulid); + return ulid; +} + +/** + * CompareULIDs will compare two ULIDs. + * returns: + * -1 if ulid1 is Lexicographically before ulid2 + * 1 if ulid1 is Lexicographically after ulid2 + * 0 if ulid1 is same as ulid2 + * */ +int CompareULIDs(const ULID& ulid1, const ULID& ulid2) { + return -2 * (ulid1 < ulid2) - 1 * (ulid1 == ulid2) + 1; +} + +/** + * Time will extract the timestamp used to generate a ULID + * */ +time_t Time(const ULID& ulid) { + time_t ans = 0; + + ans |= static_cast(ulid >> 120); + + ans <<= 8; + ans |= static_cast(ulid >> 112); + + ans <<= 8; + ans |= static_cast(ulid >> 104); + + ans <<= 8; + ans |= static_cast(ulid >> 96); + + ans <<= 8; + ans |= static_cast(ulid >> 88); + + ans <<= 8; + ans |= static_cast(ulid >> 80); + + return ans; +} + +}; // namespace ulid