initial commit

5 years ago · 7cd144cfb2
14 changed files with 779 additions and 15 deletions
--- a/.Rbuildignore
+++ b/.Rbuildignore
@ -1,3 +1,4 @@
+^LICENSE\.md$
 ^.vscode$
 ^.*\.Rproj$
 ^\.Rproj\.user$
--- a/24
+++ b/24
@ -1,25 +1,37 @@
 Package: tdigest
 Type: Package
-Title: tdigest title goes here otherwise CRAN checks fail
+Title: Accurate Quantiles Using 't-Digests'
 Version: 0.1.0
 Date: 2019-02-15
 Authors@R: c(
    person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), 
-           comment = c(ORCID = "0000-0001-5670-2640"))
+           comment = c(ORCID = "0000-0001-5670-2640")),
+    person("Gabriel", "Pichot", role = "aut", comment = "Original C++ coode")
  )
 Maintainer: Bob Rudis <bob@rud.is>
-Description: A good description goes here otherwise CRAN checks fail.
+Description: The t-digest construction algorithm uses a variant of 1-dimensional 
+    k-means clustering to produce a very compact data structure that allows 
+    accurate estimation of quantiles. This t-digest data structure can be used 
+    to estimate quantiles, compute other rank statistics or even to estimate 
+    related measures like trimmed means. The advantage of the t-digest over 
+    previous digests for this purpose is that the t-digest handles data with 
+    full floating point resolution. With small changes, the t-digest can handle 
+    values from any ordered set for which we can compute something akin to a mean. 
+    The accuracy of quantile estimates produced by t-digests can be orders of 
+    magnitude more accurate than those produced by previous digest algorithms.
 URL: https://gitlab.com/hrbrmstr/tdigest
 BugReports: https://gitlab.com/hrbrmstr/tdigest/issues
+SystemRequirements: C++11
 Encoding: UTF-8
-License: AGPL
+License: MIT + file LICENSE
 Suggests:
    testthat,
    covr
 Depends:
    R (>= 3.2.0)
 Imports:
-    httr,
-    jsonlite
+    Rcpp
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.1.1
+LinkingTo: 
+    Rcpp
--- a/2
+++ b/2
@ -0,0 +1,2 @@
+YEAR: 2019
+COPYRIGHT HOLDER: Bob Rudis
--- a/LICENSE.md
+++ b/LICENSE.md
@ -0,0 +1,21 @@
+# MIT License
+
+Copyright (c) 2019 Bob Rudis
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/4
+++ b/4
@ -1,4 +1,4 @@
 # Generated by roxygen2: do not edit by hand

-import(httr)
-importFrom(jsonlite,fromJSON)
+importFrom(Rcpp,sourceCpp)
+useDynLib(tdigest, .registration = TRUE)
--- a/R/tdigest-package.R
+++ b/R/tdigest-package.R
@ -1,12 +1,23 @@
-#' ...
-#' 
+#' Accurate Quantiles Using 't-Digests'
+#'
+#' The t-digest construction algorithm uses a variant of 1-dimensional
+#' k-means clustering to produce a very compact data structure that allows
+#' accurate estimation of quantiles. This t-digest data structure can be used
+#' to estimate quantiles, compute other rank statistics or even to estimate
+#' related measures like trimmed means. The advantage of the t-digest over
+#' previous digests for this purpose is that the t-digest handles data with
+#' full floating point resolution. With small changes, the t-digest can handle
+#' values from any ordered set for which we can compute something akin to a mean.
+#' The accuracy of quantile estimates produced by t-digests can be orders of
+#' magnitude more accurate than those produced by previous digest algorithms.
+#'
 #' - URL: <https://gitlab.com/hrbrmstr/tdigest>
 #' - BugReports: <https://gitlab.com/hrbrmstr/tdigest/issues>
-#' 
+#'
 #' @md
 #' @name tdigest
 #' @docType package
 #' @author Bob Rudis (bob@@rud.is)
-#' @import httr
-#' @importFrom jsonlite fromJSON
-NULL
+#' @useDynLib tdigest, .registration = TRUE
+#' @importFrom Rcpp sourceCpp
+NULL
--- a/README.Rmd
+++ b/README.Rmd
@ -14,8 +14,12 @@ options(width=120)

 # tdigest

+Accurate Quantiles Using 't-Digests'
+
 ## Description

+The t-digest construction algorithm uses a variant of 1-dimensional k-means clustering to produce a very compact data structure that allows accurate estimation of quantiles. This t-digest data structure can be used to estimate quantiles, compute other rank statistics or even to estimate related measures like trimmed means. The advantage of the t-digest over previous digests for this purpose is that the t-digest handles data with full floating point resolution. With small changes, the t-digest can handle values from any ordered set for which we can compute something akin to a mean. The accuracy of quantile estimates produced by t-digests can be orders of magnitude more accurate than those produced by previous digest algorithms.
+
 ## What's Inside The Tin

 The following functions are implemented:
--- a/man/tdigest.Rd
+++ b/man/tdigest.Rd
@ -4,8 +4,20 @@
 \name{tdigest}
 \alias{tdigest}
 \alias{tdigest-package}
-\title{...}
+\title{Accurate Quantiles Using 't-Digests'}
 \description{
+The t-digest construction algorithm uses a variant of 1-dimensional
+k-means clustering to produce a very compact data structure that allows
+accurate estimation of quantiles. This t-digest data structure can be used
+to estimate quantiles, compute other rank statistics or even to estimate
+related measures like trimmed means. The advantage of the t-digest over
+previous digests for this purpose is that the t-digest handles data with
+full floating point resolution. With small changes, the t-digest can handle
+values from any ordered set for which we can compute something akin to a mean.
+The accuracy of quantile estimates produced by t-digests can be orders of
+magnitude more accurate than those produced by previous digest algorithms.
+}
+\details{
 \itemize{
 \item URL: \url{https://gitlab.com/hrbrmstr/tdigest}
 \item BugReports: \url{https://gitlab.com/hrbrmstr/tdigest/issues}
--- a/src/.gitignore
+++ b/src/.gitignore
@ -0,0 +1,3 @@
+*.o
+*.so
+*.dll
--- a/src/Makevars
+++ b/src/Makevars
@ -0,0 +1,3 @@
+CXX_STD = CXX11
+PKG_CXXFLAGS =
+PKG_LIBS =
--- a/src/avltree.cpp
+++ b/src/avltree.cpp
@ -0,0 +1,256 @@
+#include "avltree.h"
+
+
+AvlTree::AvlTree(): _root(NIL) {
+    _depth[NIL]     = 0;
+    _parent[NIL]    = 0;
+    _left[NIL]      = 0;
+    _right[NIL]     = 0;
+}
+
+int AvlTree::first(int node) const {
+    if(node == NIL) {
+        return NIL;
+    }
+
+    while(true) {
+        const int left = leftNode(node);
+        if(left == NIL) {
+            break;
+        }
+        node = left;
+    }
+    return node;
+}
+
+int AvlTree::last(int node) const {
+    while(true) {
+        const int right = rightNode(node);
+        if(right == NIL) {
+            break;
+        }
+        node = right;
+    }
+    return node;
+}
+
+int AvlTree::nextNode(int node) const {
+    const int right = rightNode(node);
+    if(right != NIL) {
+        return first(right);
+    } else {
+        int parent = parentNode(node);
+        while(parent != NIL && node == rightNode(parent)) {
+            node = parent;
+            parent = parentNode(parent);
+        }
+        return parent;
+    }
+}
+
+int AvlTree::prevNode(int node) const {
+    const int left = leftNode(node);
+    if(left != NIL) {
+        return last(left);
+    } else {
+        int parent = parentNode(node);
+        while(parent != NIL && node == leftNode(parent)) {
+            node = parent;
+            parent = parentNode(parent);
+        }
+        return parent;
+    }
+}
+
+bool AvlTree::add(double x, int w) {
+    if(_root == NIL) {
+        _root = ++_n;
+        _values[_root] = x;
+        _count[_root] = w;
+        _left[_root] = NIL;
+        _right[_root] = NIL;
+        _parent[_root] = NIL;
+        // Update depth and aggregates
+        updateAggregates(_root);
+    } else {
+        int node = _root;
+        int parent = NIL;
+        int cmp;
+        do {
+            cmp = compare(node, x);
+            if(cmp < 0) {
+                parent = node;
+                node = leftNode(node);
+            } else if (cmp > 0) {
+                parent = node;
+                node = rightNode(node);
+            } else {
+                // we merge the node
+                merge(node, x, w);
+                return false;
+            }
+        } while(node != NIL);
+
+        node = ++_n;
+        _values[node] = x;
+        _count[node] = w;
+        _left[node] = NIL;
+        _right[node] = NIL;
+        _parent[node] = parent;
+        if(cmp < 0) {
+            _left[parent] = node;
+        } else {
+            assert(cmp > 0);
+            _right[parent] = node;
+        }
+
+        rebalance(node);
+
+        return true;
+    }
+}
+
+int AvlTree::find(double x) const {
+    for(int node = _root; node != NIL;) {
+        const int cmp = compare(node, x);
+        if(cmp < 0) {
+            node = leftNode(node);
+        } else if(cmp > 0) {
+            node = rightNode(node);
+        } else {
+            return node;
+        }
+    }
+    return NIL;
+}
+
+
+int AvlTree::floor(double x) const {
+    int f = NIL;
+    for(int node = _root; node != NIL; ) {
+        const int cmp = compare(node, x);
+        if(cmp <= 0) {
+            node = leftNode(node);
+        } else {
+            f = node;
+            node = rightNode(node);
+        }
+    }
+    return f;
+}
+
+int AvlTree::floorSum(long sum) const {
+    int f = NIL;
+    for(int node = _root; node != NIL; ) {
+        const int left = leftNode(node);
+        const long leftCount = aggregatedCount(left);
+        if(leftCount <= sum) {
+            f = node;
+            sum -= leftCount + count(node);
+            node = rightNode(node);
+        } else {
+            node = leftNode(node);
+        }
+    }
+    return f;
+}
+
+long AvlTree::ceilSum(int node) const {
+    const int left = leftNode(node);
+    long sum = aggregatedCount(left);
+    int n = node;
+    for(int p = parentNode(node); p != NIL; p = parentNode(n)) {
+        if(n == rightNode(p)) {
+            const int leftP = leftNode(p);
+            sum += count(p) + aggregatedCount(leftP);
+        }
+        n = p;
+    }
+    return sum;
+}
+
+void AvlTree::rebalance(int node) {
+    for(int n = node; n != NIL; ) {
+        const int p = parentNode(n);
+
+        updateAggregates(n);
+
+        switch(balanceFactor(n)) {
+            case -2: {
+                         const int right = rightNode(n);
+                         if(balanceFactor(right) == 1) {
+                             rotateRight(right);
+                         }
+                         rotateLeft(n);
+                         break;
+                     }
+            case 2: {
+                        const int left = leftNode(n);
+                        if(balanceFactor(left) == -1) {
+                            rotateLeft(left);
+                        }
+                        rotateRight(n);
+                        break;
+                    }
+            case -1:
+            case  1:
+            case  0:
+                    break;
+            default:
+                    // We should throw an error
+                    assert(true == false);
+        }
+
+        n = p;
+    }
+}
+
+void AvlTree::rotateLeft(int node) {
+    const int r  = rightNode(node);
+    const int lr = leftNode(r);
+
+    _right[node] = lr;
+    if(lr != NIL) {
+        _parent[lr] = node;
+    }
+
+    const int p = parentNode(node);
+    _parent[r] = p;
+    if(p == NIL) {
+        _root = r;
+    } else if(leftNode(p) == node) {
+        _left[p] = r;
+    } else {
+        assert(rightNode(p) == node);
+        _right[p] = r;
+    }
+    _left[r] = node;
+    _parent[node] = r;
+    updateAggregates(node);
+    updateAggregates(parentNode(node));
+}
+
+void AvlTree::rotateRight(int node) {
+    const int l = leftNode(node);
+    const int rl = rightNode(l);
+
+    _left[node] = rl;
+    if(rl != NIL) {
+        _parent[rl] = node;
+    }
+
+    const int p = parentNode(node);
+    _parent[l] = p;
+    if(p == NIL) {
+        _root = l;
+    } else if(rightNode(p) == node) {
+        _right[p] = l;
+    } else {
+        assert(leftNode(p) == node);
+        _left[p] = l;
+    }
+    _right[l] = node;
+    _parent[node] = l;
+    updateAggregates(node);
+    updateAggregates(parentNode(node));
+}
--- a/src/avltree.h
+++ b/src/avltree.h
@ -0,0 +1,257 @@
+#ifndef HEADER_AVLTREE
+#define HEADER_AVLTREE
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+
+
+using namespace std;
+
+
+class AvlTree {
+
+    private:
+        int       _root;
+        int       _n = 0;
+        // TODO We should reallocate tables (ie allow dynamic arrays)
+        int       _parent[1000];
+        int       _left[1000];
+        int       _right[1000];
+        char      _depth[1000];
+        int       _count[1000];
+        double    _values[1000];
+        int       _aggregatedCount[1000];
+
+    public:
+        static const int NIL = 0;
+
+        AvlTree();
+
+        //
+        // Node comparison
+        //
+
+        // O(1)
+        inline int compare(int node, double x) const {
+            if(value(node) < x) {
+                return 1;
+            } else if(value(node) == x) {
+                return 0;
+            } else {
+                return -1;
+            }
+        }
+    
+        // O(1)
+        inline int compare(int nodeA, int nodeB) const {
+            return compare(nodeA, value(nodeB));
+        }
+
+        //
+        // Tree accessors
+        //
+
+        // O(1)
+        inline int root() const {
+            return _root;
+        }
+        // O(1)
+        inline int size() const {
+            return _n;
+        }
+
+        //
+        // Node accessors
+        //
+
+        // O(1)
+        inline int parentNode(int node) const {
+            return _parent[node];
+        }
+        // O(1)
+        inline int leftNode(int node) const {
+            return _left[node];
+        }
+        // O(1)
+        inline int rightNode(int node) const {
+            return _right[node];
+        }
+        // O(1)
+        inline int depth(int node) const {
+            return _depth[node];
+        }
+        // O(1)
+        inline int count(int node) const {
+            return _count[node];
+        }
+        // O(1)
+        inline int aggregatedCount(int node) const {
+            return _aggregatedCount[node];
+        }
+        // O(1)
+        inline double value(int node) const {
+            return _values[node];
+        }
+
+        //
+        // Tree accessors
+        //
+
+        // O(log(n))
+        int first(int node) const;
+
+        // O(log(n))
+        inline int first() const {
+            return first(_root);
+        }
+
+        // O(log(n)) 
+        int last(int node) const;
+
+        // O(log(n))
+        int nextNode(int node) const;
+
+        // O(log(n))
+        int prevNode(int node) const;
+
+        //
+        // Mutators
+        //
+
+        // O(1)
+        inline void updateAggregates(int node) {
+            // Updating depth
+            _depth[node] = 1 + max(depth(leftNode(node)), depth(rightNode(node)));
+            _aggregatedCount[node] = count(node) + aggregatedCount(leftNode(node)) + aggregatedCount(rightNode(node));
+        }
+
+        // O(log(n))
+        void update(int node, double x, int w) {
+            _values[node] += w * (x - value(node)) / count(node);
+            _count[node] += w;
+            
+            for(int n = node; n != NIL; n = parentNode(n)) {
+               updateAggregates(n);
+            }
+        }
+
+        // O(log(n))
+        void merge(int node, double x, int w) {
+            assert(value(node) == x);
+            _count[node] += w;
+            
+            for(int n = node; n != NIL; n = parentNode(n)) {
+               updateAggregates(n);
+            }
+        }
+
+        // O(log(n)) 
+        bool add(double x, int w);
+
+        // O(log(n))
+        int find(double x) const;
+        
+        // O(log(n))
+        int floor(double x) const;
+
+        // O(log(n))
+        int floorSum(long sum) const;
+
+        // O(log(n))
+        long ceilSum(int node) const;
+
+    private:
+        // O(1)
+        inline int balanceFactor(int node) const {
+            return depth(leftNode(node)) - depth(rightNode(node));
+        }
+
+        // (O(log(n)^2)
+        void rebalance(int node);
+
+        // O(log(n))
+        void rotateLeft(int node);
+
+        // O(log(n))
+        // TODO to factor with rotateLeft
+        void rotateRight(int node);
+
+    public:
+        // 
+        // For test or debugging purposes
+        //
+
+        // Check balance integrity
+        bool checkBalance(int node) const {
+            if(node == NIL) {
+                return depth(node) == 0;
+            } else {
+                return depth(node) == 1 + max(depth(leftNode(node)), depth(rightNode(node)))
+                    && abs(depth(leftNode(node)) - depth(rightNode(node))) <= 1
+                    && checkBalance(leftNode(node))
+                    && checkBalance(rightNode(node))
+                ;
+            }
+        }
+        inline bool checkBalance() const {
+            return checkBalance(_root);
+        }
+
+        // Check aggregates integrity
+        bool checkAggregates(int node) const {
+            if(node == NIL) {
+                return count(node) == 0;
+            } else {
+                return _aggregatedCount[node] == _count[node] + _aggregatedCount[leftNode(node)] + _aggregatedCount[rightNode(node)]
+                    && checkAggregates(leftNode(node))
+                    && checkAggregates(rightNode(node))
+                ;
+            }
+        }
+        inline bool checkAggregates() const {
+            return checkAggregates(_root);
+        }
+
+        // Check integrity (order of nodes)
+        bool checkIntegrity(int node) const {
+            if(node == NIL) {
+                return true;
+            } else {
+                bool ok = true;
+                if(leftNode(node) != NIL) {
+                    ok &= _values[node] >= _values[leftNode(node)];
+                    ok &= checkIntegrity(leftNode(node));
+                }
+                if(rightNode(node) != NIL) {
+                    ok &= _values[node] <= _values[rightNode(node)];
+                    ok &= checkIntegrity(rightNode(node));
+                }
+                return ok;
+            }
+        }
+        inline bool checkIntegrity() const {
+            return checkIntegrity(_root);
+        }
+
+        // Print as rows
+        void print(int node) const {
+            if(node == NIL)
+                return;
+            cout << "Node " << node << "=> ";
+            cout << "Value:" << _values[node] << " ";
+            cout << "(" << _values[leftNode(node)] << ";";
+            cout << "" << _values[rightNode(node)] << ") ";
+            cout << "Depth: " << depth(node) << " ";
+            cout << "Count: " <<_count[node] << " ";
+            cout << "Aggregate: " << _aggregatedCount[node] << endl;
+            print(leftNode(node));
+            print(rightNode(node));
+        }
+        void print() const {
+            print(_root);
+        }
+
+};
+
+#endif
--- a/src/tdigest.cpp
+++ b/src/tdigest.cpp
@ -0,0 +1,62 @@
+#include "tdigest.h"
+
+
+void TDigest::compress() {
+    // TODO: implement this one
+}
+
+
+double TDigest::quantile(double q) {
+    if(q < 0 || q > 1) {
+        return 0; // TODO
+    }
+
+    if(_centroids->size() == 0) {
+        return 0; // TODO
+    } else if(_centroids->size() == 1) {
+        return _centroids->value(_centroids->first());
+    }
+
+    const double index = q * (_count - 1);
+
+    double previousMean = NAN;
+    double previousIndex = 0;
+    int next = _centroids->floorSum(index);
+    assert(next != AvlTree::NIL);
+    long total = _centroids->ceilSum(next);
+    const int prev = _centroids->prevNode(next);
+    if(prev != AvlTree::NIL) {
+        previousMean = _centroids->value(prev);
+        previousIndex = total - (_centroids->count(prev) + 1.0) / 2;
+    }
+
+    while(true) {
+        const double nextIndex = total + (_centroids->count(next) - 1.) / 2;
+        if(nextIndex >= index) {
+            if(previousMean == NAN) {
+                // Index is before first centroid
+                assert(total == 0);
+                if(nextIndex == previousIndex) {
+                    return _centroids->value(next);
+                }
+                // We assume a linear increase
+                int next2 = _centroids->value(next);
+                const double nextIndex2 = total + _centroids->count(next) + (_centroids->count(next2) - 1.) / 2;
+                previousMean = (nextIndex2 * _centroids->value(next) - nextIndex * _centroids->value(next2)) / (nextIndex2 - nextIndex);
+            }
+            return quantile(previousIndex, index, nextIndex, previousMean, _centroids->value(next));
+
+        } else if(_centroids->value(next) == AvlTree::NIL) {
+            // Beyond last centroid
+            const double nextIndex2 = _count - 1;
+            const double nextMean2 = (_centroids->value(next) * (nextIndex2 - previousIndex ) - previousMean * (nextIndex2 - nextIndex)) / (nextIndex - previousIndex);
+            return quantile(nextIndex, index, nextIndex2, _centroids->value(next), nextMean2);
+        }
+        total += _centroids->count(next);
+        previousMean = _centroids->value(next);
+        previousIndex = nextIndex;
+        next = _centroids->nextNode(next);
+    }
+
+}
+
--- a/src/tdigest.h
+++ b/src/tdigest.h
@ -0,0 +1,120 @@
+#ifndef HEADER_TDIGEST
+#define HEADER_TDIGEST
+
+#include <cfloat>
+
+#include "avltree.h"
+
+using namespace std;
+
+class TDigest {
+
+    private:
+        double    _compression     = 100;
+        double    _count           = 0;
+        AvlTree*  _centroids       = new AvlTree();
+
+    public:
+        TDigest (double compression): _compression(compression) {}
+
+        inline long size() const {
+            return _count;
+        }
+
+        inline void add(double x) {
+            add(x, 1);
+        }
+
+        inline void add(double x, int w) {
+
+            int start = _centroids->floor(x);
+            if(start == AvlTree::NIL) {
+                start = _centroids->first();
+            }
+
+            if(start == AvlTree::NIL) {
+                assert(_centroids->size() == 0);
+                _centroids->add(x, w);
+                _count += w;
+            } else {
+                double minDistance = DBL_MAX;
+                int lastNeighbor = AvlTree::NIL;
+                for(int neighbor = start; start != AvlTree::NIL; neighbor = _centroids->nextNode(neighbor)) {
+                    double z = abs(_centroids->value(neighbor) - x);
+                    if(z < minDistance) {
+                        start = neighbor;
+                        minDistance = z;
+                    } else {
+                        lastNeighbor = neighbor;
+                        break;
+                    }
+
+                }
+
+                int closest = AvlTree::NIL;
+                long sum = _centroids->ceilSum(start);
+                double n = 0;
+                for(int neighbor = start; neighbor != lastNeighbor; neighbor = _centroids->nextNode(neighbor)) {
+                    assert(minDistance == abs(_centroids->value(neighbor) - x));
+                    double q = _count == 1
+                        ? 0.5
+                        : (sum + (_centroids->count(neighbor) - 1 / 2. )) / (_count - 10)
+                    ;
+                    double k = 4 * _count * q * (1 - q) / _compression;
+
+                    if(_centroids->count(neighbor) + w <= k) {
+                        n++;
+                        if((float)rand() / RAND_MAX < 1 / n) {
+                            closest = neighbor;
+                        }
+                    }
+                    sum += _centroids->count(neighbor);
+
+                }
+
+                if(closest == AvlTree::NIL) {
+                    _centroids->add(x, w);
+                } else {
+                    _centroids->update(closest, x, w);
+                }
+                _count += w;
+
+                if(_centroids->size() > 20 * _compression) {
+                    cout << "Compress:" << _centroids->size() << endl;
+                    compress();
+                }
+            }
+
+        }
+
+        inline static double interpolate(double x, double a, double b) {
+            return (x - a) / (b - a);
+        }
+
+        inline static double quantile(
+                double previousIndex, double index, double nextIndex,
+                double previousMean, double nextMean
+        ) {
+            const double delta = nextIndex - previousIndex;
+            const double previousWeight = (nextIndex - index) / delta;
+            const double nextWeight = (index - previousIndex) / delta;
+            return previousMean * previousWeight + nextMean * nextWeight;
+        }
+
+        inline AvlTree* centroids() const {
+            return _centroids;
+        }
+
+        inline void merge(TDigest* digest) {
+            AvlTree* centroids = digest->centroids();
+            for(int n = centroids->first(); n != AvlTree::NIL; n = centroids->nextNode(n)) {
+                add(centroids->value(n), centroids->count(n));
+            }
+        }
+
+        void compress();
+        double quantile(double q);
+
+};
+
+#endif