diff --git a/CMakeLists.txt b/CMakeLists.txt
index c89d2d979e30142e96da464c9d9d46ec9aec9f2e..29a2c9066886554fc561f0a645f15459fbea99d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -130,7 +130,7 @@ install(TARGETS arbor-config-defs EXPORT arbor-targets)
 # for the arbor library.
 
 add_library(arbor-private-deps INTERFACE)
-target_link_libraries(arbor-private-deps INTERFACE arbor-config-defs)
+target_link_libraries(arbor-private-deps INTERFACE arbor-config-defs ext-random123)
 install(TARGETS arbor-private-deps EXPORT arbor-targets)
 
 # Interface library `arborenv-private-deps` collects dependencies, options etc.
diff --git a/arbor/include/arbor/morph/embed_pwlin.hpp b/arbor/include/arbor/morph/embed_pwlin.hpp
index 82d83a570bc6f95d163acd8d838869f1a47a9591..d6947ac87ed594ea52918aaf5e59296120b7c9fc 100644
--- a/arbor/include/arbor/morph/embed_pwlin.hpp
+++ b/arbor/include/arbor/morph/embed_pwlin.hpp
@@ -28,6 +28,10 @@ struct embed_pwlin {
 
     // Interpolated radius at location.
     double radius(mlocation) const;
+    mcable_list radius_cmp(msize_t bid, double rad_lim, comp_op op) const;
+
+    double directed_projection(mlocation) const;
+    mcable_list projection_cmp(msize_t bid, double proj_lim, comp_op op) const;
 
     // Computed length of mcable.
     double integrate_length(mcable c) const;
diff --git a/arbor/include/arbor/morph/locset.hpp b/arbor/include/arbor/morph/locset.hpp
index d4785d89a8e9bd53260ff89cadaed3f56083d505..3429717526f9a75a4a1c10de62bb2342bf8b1f43 100644
--- a/arbor/include/arbor/morph/locset.hpp
+++ b/arbor/include/arbor/morph/locset.hpp
@@ -118,8 +118,9 @@ private:
     };
 };
 
-namespace ls {
+class region;
 
+namespace ls {
 // Explicit location on morphology.
 locset location(msize_t branch, double pos);
 
@@ -138,6 +139,16 @@ locset named(std::string);
 // The null (empty) set.
 locset nil();
 
+// Most distal points of a region
+locset most_distal(region reg);
+
+// Most proximal point of a region
+locset most_proximal(region reg);
+
+// A range `left` to `right` of randomly selected locations with a
+// uniform distribution from region `reg` generated using `seed`
+locset uniform(region reg, unsigned left, unsigned right, uint64_t seed);
+
 // Proportional location on every branch.
 locset on_branches(double pos);
 
diff --git a/arbor/include/arbor/morph/primitives.hpp b/arbor/include/arbor/morph/primitives.hpp
index 4b9e223b993db2659e94c00d78796c9b5f53f76c..fa6cb3eab733136757acfc7a715a0b090a746b0a 100644
--- a/arbor/include/arbor/morph/primitives.hpp
+++ b/arbor/include/arbor/morph/primitives.hpp
@@ -28,6 +28,14 @@ mpoint lerp(const mpoint& a, const mpoint& b, double u);
 bool is_collocated(const mpoint& a, const mpoint& b);
 double distance(const mpoint& a, const mpoint& b);
 
+// Indicate allowed comparison operations for classifying regions
+enum class comp_op {
+    lt,
+    le,
+    gt,
+    ge
+};
+
 // A morphology sample consists of a location and an integer tag.
 // When loaded from an SWC file, the tag will correspond to the SWC label,
 // which are standardised as follows:
diff --git a/arbor/include/arbor/morph/region.hpp b/arbor/include/arbor/morph/region.hpp
index bd049fd68225f314ca2d92e1cf1e0bce33a774d1..629c7345b05e040f14baccecd2a08e65ca8190e6 100644
--- a/arbor/include/arbor/morph/region.hpp
+++ b/arbor/include/arbor/morph/region.hpp
@@ -116,6 +116,8 @@ private:
     };
 };
 
+class locset;
+
 namespace reg {
 
 // An empty region.
@@ -130,6 +132,28 @@ region branch(msize_t);
 // Region with all segments with segment tag id.
 region tagged(int id);
 
+// Region with all segments distal from another region
+region distal_interval(locset start, double distance);
+
+// Region with all segments proximal from another region
+region proximal_interval(locset end, double distance);
+
+// Region with all segments with radius less than/less than or equal to r
+region radius_lt(region reg, double r);
+region radius_le(region reg, double r);
+
+// Region with all segments with radius greater than/greater than or equal to r
+region radius_gt(region reg, double r);
+region radius_ge(region reg, double r);
+
+// Region with all segments with projection less than/less than or equal to r
+region z_dist_from_soma_lt(double r);
+region z_dist_from_soma_le(double r);
+
+// Region with all segments with projection greater than/greater than or equal to r
+region z_dist_from_soma_gt(double r);
+region z_dist_from_soma_ge(double r);
+
 // Region with all segments in a cell.
 region all();
 
diff --git a/arbor/morph/embed_pwlin.cpp b/arbor/morph/embed_pwlin.cpp
index e04436b4598224ab3a975ac60f5c527e40fd11b2..d81e9e4c6145c40c4223ed7149b7200abc3c33fc 100644
--- a/arbor/morph/embed_pwlin.cpp
+++ b/arbor/morph/embed_pwlin.cpp
@@ -53,14 +53,48 @@ double integrate(const branch_pw_ratpoly<p, q>& f, unsigned bid, const pw_consta
     return accum;
 }
 
+template <typename operation>
+mcable_list data_cmp(const branch_pw_ratpoly<1, 0>& f, unsigned bid, double val, operation op) {
+    mcable_list L;
+    const auto& pw = f.at(bid);
+    for (const auto& piece: pw) {
+        auto extents = piece.first;
+        auto left_val = piece.second(0);
+        auto right_val = piece.second(1);
+
+        if (!op(left_val, val) && !op(right_val, val)) {
+            continue;
+        }
+        if (op(left_val, val) && op(right_val, val)) {
+            L.push_back({bid, extents.first, extents.second});
+            continue;
+        }
+
+        auto cable_loc = (val - left_val)/(right_val - left_val);
+        auto edge = math::lerp(extents.first, extents.second, cable_loc);
+
+        if (op(left_val, val)) {
+            L.push_back({bid, extents.first, edge});
+            continue;
+        }
+        if (!op(left_val, val)) {
+            L.push_back({bid, edge, extents.second});
+            continue;
+        }
+    }
+    return L;
+}
+
 struct embed_pwlin_data {
     branch_pw_ratpoly<1, 0> length;
+    branch_pw_ratpoly<1, 0> directed_projection;
     branch_pw_ratpoly<1, 0> radius;
     branch_pw_ratpoly<2, 0> area;
     branch_pw_ratpoly<1, 1> ixa;
 
     explicit embed_pwlin_data(msize_t n_branch):
         length(n_branch),
+        directed_projection(n_branch),
         radius(n_branch),
         area(n_branch),
         ixa(n_branch)
@@ -71,6 +105,10 @@ double embed_pwlin::radius(mlocation loc) const {
     return interpolate(data_->radius, loc.branch, loc.pos);
 }
 
+double embed_pwlin::directed_projection(arb::mlocation loc) const {
+    return interpolate(data_->directed_projection, loc.branch, loc.pos);
+}
+
 double embed_pwlin::integrate_length(msize_t bid, const pw_constant_fn& g) const {
     return integrate(data_->length, bid, g);
 }
@@ -97,6 +135,26 @@ double embed_pwlin::integrate_ixa(mcable c) const {
     return integrate_ixa(c.branch, pw_constant_fn{{c.prox_pos, c.dist_pos}, {1.}});
 }
 
+mcable_list embed_pwlin::radius_cmp(msize_t bid, double val, comp_op op) const {
+    switch (op) {
+        case comp_op::lt: return data_cmp(data_->radius, bid, val, [](auto l, auto r){return l <  r;});
+        case comp_op::le: return data_cmp(data_->radius, bid, val, [](auto l, auto r){return l <= r;});
+        case comp_op::gt: return data_cmp(data_->radius, bid, val, [](auto l, auto r){return l >  r;});
+        case comp_op::ge: return data_cmp(data_->radius, bid, val, [](auto l, auto r){return l >= r;});
+        default: return {};
+    }
+}
+
+mcable_list embed_pwlin::projection_cmp(msize_t bid, double val, comp_op op) const {
+    switch (op) {
+        case comp_op::lt: return data_cmp(data_->directed_projection, bid, val, [](auto l, auto r){return l <  r;});
+        case comp_op::le: return data_cmp(data_->directed_projection, bid, val, [](auto l, auto r){return l <= r;});
+        case comp_op::gt: return data_cmp(data_->directed_projection, bid, val, [](auto l, auto r){return l >  r;});
+        case comp_op::ge: return data_cmp(data_->directed_projection, bid, val, [](auto l, auto r){return l >= r;});
+        default: return {};
+    }
+}
+
 // Initialization, creation of geometric data.
 
 embed_pwlin::embed_pwlin(const arb::morphology& m) {
@@ -109,16 +167,18 @@ embed_pwlin::embed_pwlin(const arb::morphology& m) {
     const auto& samples = m.samples();
     sample_locations_.resize(m.num_samples());
 
+    double proj_shift = samples.front().loc.z;
+
     for (msize_t bid = 0; bid<n_branch; ++bid) {
         unsigned parent = m.branch_parent(bid);
         auto sample_indices = util::make_range(m.branch_indexes(bid));
-
         if (bid==0 && m.spherical_root()) {
             arb_assert(sample_indices.size()==1);
 
             // Treat spherical root as area-equivalent cylinder.
             double r = samples[0].loc.radius;
 
+            data_->directed_projection[bid].push_back(0., 1., rat_element<1, 0>(-r, r));
             data_->length[bid].push_back(0., 1., rat_element<1, 0>(0, r*2));
             data_->radius[bid].push_back(0., 1., rat_element<1, 0>(r, r));
 
@@ -155,13 +215,15 @@ embed_pwlin::embed_pwlin(const arb::morphology& m) {
             double length_0 = parent==mnpos? 0: data_->length[parent].back().second[1];
             data_->length[bid].push_back(0., 1, rat_element<1, 0>(length_0, length_0+branch_length));
 
-            double area_0 = parent=mnpos? 0: data_->area[parent].back().second[1];
-            double ixa_0 = parent=mnpos? 0: data_->ixa[parent].back().second[1];
+            double area_0 = parent==mnpos? 0: data_->area[parent].back().second[1];
+            double ixa_0 = parent==mnpos? 0: data_->ixa[parent].back().second[1];
 
             if (length_scale==0) {
                 // Zero-length branch? Weird, but make best show of it.
                 double r = samples[sample_indices[0]].loc.radius;
+                double z = samples[sample_indices[0]].loc.z;
                 data_->radius[bid].push_back(0., 1., rat_element<1, 0>(r, r));
+                data_->directed_projection[bid].push_back(0., 1., rat_element<1, 0>(z-proj_shift, z-proj_shift));
                 data_->area[bid].push_back(0., 1., rat_element<2, 0>(area_0, area_0, area_0));
                 data_->ixa[bid].push_back(0., 1., rat_element<1, 1>(ixa_0, ixa_0, ixa_0));
             }
@@ -173,6 +235,10 @@ embed_pwlin::embed_pwlin(const arb::morphology& m) {
                     double p1 = sample_locations_[sample_indices[i]].pos;
                     if (p0==p1) continue;
 
+                    double z0 = samples[sample_indices[i-1]].loc.z - proj_shift;
+                    double z1 = samples[sample_indices[i]].loc.z - proj_shift;
+                    data_->directed_projection[bid].push_back(p0, p1, rat_element<1, 0>(z0, z1));
+
                     double r0 = samples[sample_indices[i-1]].loc.radius;
                     double r1 = samples[sample_indices[i]].loc.radius;
                     data_->radius[bid].push_back(p0, p1, rat_element<1, 0>(r0, r1));
diff --git a/arbor/morph/locset.cpp b/arbor/morph/locset.cpp
index 1f7eafaa123195bda775cca63d6a4c638029b1ff..19b50d84e20745ee7ff2acedba3d7ddf2c7bb54a 100644
--- a/arbor/morph/locset.cpp
+++ b/arbor/morph/locset.cpp
@@ -2,13 +2,19 @@
 #include <iostream>
 #include <numeric>
 
+#include <arbor/math.hpp>
 #include <arbor/morph/locset.hpp>
 #include <arbor/morph/morphexcept.hpp>
 #include <arbor/morph/morphology.hpp>
 #include <arbor/morph/mprovider.hpp>
 #include <arbor/morph/primitives.hpp>
+#include <arbor/morph/region.hpp>
 
+#include "util/cbrng.hpp"
+#include "util/partition.hpp"
 #include "util/rangeutil.hpp"
+#include "util/transform.hpp"
+#include "util/span.hpp"
 #include "util/strprintf.hpp"
 
 namespace arb {
@@ -160,6 +166,125 @@ std::ostream& operator<<(std::ostream& o, const named_& x) {
     return o << "(locset \"" << x.name << "\")";
 }
 
+// Most distal points of a region
+
+struct most_distal_: locset_tag {
+    explicit most_distal_(region reg): reg(std::move(reg)) {}
+    region reg;
+};
+
+locset most_distal(region reg) {
+    return locset(most_distal_{std::move(reg)});
+}
+
+mlocation_list thingify_(const most_distal_& n, const mprovider& p) {
+    mlocation_list L;
+
+    auto cables = thingify(n.reg, p);
+    util::sort(cables, [](const auto& l, const auto& r){return (l.branch < r.branch) && (l.dist_pos < r.dist_pos);});
+
+    std::unordered_set<msize_t> branches_visited;
+    for (auto it= cables.rbegin(); it!= cables.rend(); it++) {
+        auto bid = (*it).branch;
+        auto pos = (*it).dist_pos;
+
+        // Check if any other points on the branch or any of its children has been added as a distal point
+        if (branches_visited.count(bid)) continue;
+        L.push_back({bid, pos});
+        while (bid != mnpos) {
+            branches_visited.insert(bid);
+            bid = p.morphology().branch_parent(bid);
+        }
+    }
+
+    util::sort(L);
+    return L;
+}
+
+std::ostream& operator<<(std::ostream& o, const most_distal_& x) {
+    return o << "(locset \"" << x.reg << "\")";
+}
+
+// Most distal points of a region
+
+struct most_proximal_: locset_tag {
+    explicit most_proximal_(region reg): reg(std::move(reg)) {}
+    region reg;
+};
+
+locset most_proximal(region reg) {
+    return locset(most_proximal_{std::move(reg)});
+}
+
+mlocation_list thingify_(const most_proximal_& n, const mprovider& p) {
+    auto cables = thingify(n.reg, p);
+    arb_assert(test_invariants(cables));
+
+    auto most_prox = cables.front();
+    return {{most_prox.branch, most_prox.prox_pos}};
+}
+
+std::ostream& operator<<(std::ostream& o, const most_proximal_& x) {
+    return o << "(locset \"" << x.reg << "\")";
+}
+
+
+// Uniform locset.
+
+struct uniform_ {
+    region reg;
+    unsigned left;
+    unsigned right;
+    uint64_t seed;
+};
+
+locset uniform(arb::region reg, unsigned left, unsigned right, uint64_t seed) {
+    return locset(uniform_{reg, left, right, seed});
+}
+
+mlocation_list thingify_(const uniform_& u, const mprovider& p) {
+    mlocation_list L;
+    auto morpho = p.morphology();
+    auto embed = p.embedding();
+
+    // Thingify the region and store relevant data
+    auto reg_cables = thingify(u.reg, p);
+
+    std::vector<double> lengths_bounds;
+    auto lengths_part = util::make_partition(lengths_bounds,
+                                       util::transform_view(reg_cables, [&embed](const auto& c) {
+                                           return embed.integrate_length(c);
+                                       }));
+
+    auto region_length = lengths_part.bounds().second;
+
+    // Generate uniform random positions along the extent of the full region
+    auto random_pos = util::uniform(u.seed, u.left, u.right);
+    std::transform(random_pos.begin(), random_pos.end(), random_pos.begin(),
+            [&region_length](auto& c){return c*region_length;});
+    util::sort(random_pos);
+
+    // Match random_extents to cables and find position on the associated branch
+    unsigned cable_idx = 0;
+    auto range = lengths_part[cable_idx];
+
+    for (auto e: random_pos) {
+        while (e > range.second) {
+            range = lengths_part[++cable_idx];
+        }
+        auto cable = reg_cables[cable_idx];
+        auto pos_on_cable = (e - range.first)/(range.second - range.first);
+        auto pos_on_branch = math::lerp(cable.prox_pos, cable.dist_pos, pos_on_cable);
+        L.push_back({cable.branch, pos_on_branch});
+    }
+
+    return L;
+}
+
+std::ostream& operator<<(std::ostream& o, const uniform_& u) {
+    return o << "(uniform from region: \"" << u.reg << "\"; using seed: " << u.seed
+             << "; range: {" << u.left << ", " << u.right << "})";
+}
 
 // Intersection of two point sets.
 
diff --git a/arbor/morph/region.cpp b/arbor/morph/region.cpp
index 3db12f50fb317606054a817f66befb99ca12446f..9aafa123c7beacdcf28f3b542a272093427e658b 100644
--- a/arbor/morph/region.cpp
+++ b/arbor/morph/region.cpp
@@ -1,6 +1,7 @@
 #include <set>
 #include <string>
 #include <vector>
+#include <stack>
 
 #include <arbor/morph/locset.hpp>
 #include <arbor/morph/primitives.hpp>
@@ -163,6 +164,7 @@ mcable_list remove_covered_points(mcable_list cables, const morphology& m) {
             }
         }
     }
+
     util::sort(erase_indices);
     for (auto it = erase_indices.rbegin(); it != erase_indices.rend(); it++) {
         cables.erase(cables.begin() + *it);
@@ -283,7 +285,6 @@ std::ostream& operator<<(std::ostream& o, const tagged_& t) {
     return o << "(tag " << t.tag << ")";
 }
 
-
 // Region comprising whole morphology.
 
 struct all_: region_tag {};
@@ -306,9 +307,323 @@ std::ostream& operator<<(std::ostream& o, const all_& t) {
     return o << "(all)";
 }
 
+// Region with all segments distal from another region
 
-// Named region.
+struct distal_interval_ {
+    locset start;
+    double distance; //um
+};
+
+region distal_interval(locset start, double distance) {
+    return region(distal_interval_{start, distance});
+}
+
+mcable_list thingify_(const distal_interval_& reg, const mprovider& p) {
+    const auto& m = p.morphology();
+    const auto& e = p.embedding();
+
+    std::vector<mcable> L;
+
+    auto start = thingify(reg.start, p);
+    auto distance = reg.distance;
+
+    struct branch_interval {
+        msize_t bid;
+        double distance;
+    };
+
+    for (auto c: start) {
+        std::stack<branch_interval> branches_reached;
+        bool first_branch = true;
+
+        // if we're starting at the end of a branch, start traversal with its children
+        if (c.pos < 1) {
+            branches_reached.push({c.branch, distance});
+        } else {
+            first_branch = false;
+            L.push_back({c.branch,1,1});
+            for (auto child: m.branch_children(c.branch)) {
+                branches_reached.push({child, distance});
+            }
+        }
+
+        while (!branches_reached.empty()) {
+            auto bi = branches_reached.top();
+            branches_reached.pop();
+
+            auto branch = bi.bid;
+            auto rem_dist = bi.distance;
+
+            auto branch_length = e.branch_length(branch);
+            auto prox_pos = first_branch*c.pos;
+            auto dist_pos = rem_dist / branch_length + prox_pos;
+
+            if (dist_pos <= 1) {
+                L.push_back({branch, prox_pos, dist_pos});
+            } else {
+                L.push_back({branch, prox_pos, 1});
+                rem_dist = rem_dist - (1 - prox_pos)*branch_length;
+                for (auto child: m.branch_children(branch)) {
+                    branches_reached.push({child, rem_dist});
+                }
+            }
+            first_branch = false;
+        }
+    }
+    return remove_covered_points(remove_cover(L, m), m);
+}
+
+std::ostream& operator<<(std::ostream& o, const distal_interval_& d) {
+    return o << "(distal_interval: " << d.start << ", " << d.distance << ")";
+}
+
+// Region with all segments proximal from another region
+
+struct proximal_interval_ {
+    locset end;
+    double distance; //um
+};
+
+region proximal_interval(locset end, double distance) {
+    return region(proximal_interval_{end, distance});
+}
+
+mcable_list thingify_(const proximal_interval_& reg, const mprovider& p) {
+    const auto& m = p.morphology();
+    const auto& e = p.embedding();
+
+    std::vector<mcable> L;
+
+    auto start = thingify(reg.end, p);
+    auto distance = reg.distance;
+
+    for (auto c: start) {
+        auto branch = c.branch;
+        auto branch_length = e.branch_length(branch);
+        auto rem_dist = distance;
+
+        auto dist_pos = c.pos;
+        auto prox_pos = dist_pos - distance / branch_length;
+
+        while (prox_pos < 0) {
+            L.push_back({branch, 0, dist_pos});
+
+            rem_dist = rem_dist - dist_pos*branch_length;
+
+            branch = m.branch_parent(branch);
+            if (branch == mnpos) {
+                break;
+            }
+
+            dist_pos = 1;
+            prox_pos = dist_pos - rem_dist / e.branch_length(branch);
+        }
+        if (branch != mnpos) {
+            L.push_back({branch, prox_pos, dist_pos});
+        }
+    }
+    return remove_cover(L, m);
+}
+
+std::ostream& operator<<(std::ostream& o, const proximal_interval_& d) {
+    return o << "(distal_interval: " << d.end << ", " << d.distance << ")";
+}
+
+mcable_list radius_cmp(const mprovider& p, region r, double v, comp_op op) {
+    const auto& e = p.embedding();
+
+    std::vector<mcable> L;
+    auto reg = thingify(r, p);
+    auto val = v;
+    for (auto c: reg) {
+        for (auto r: e.radius_cmp(c.branch, val, op)) {
+            if (is_disjoint(c, r)) continue;
+            L.push_back(make_intersection(c, r));
+        }
+    }
+    return remove_cover(L, p.morphology());
+}
+
+// Region with all segments with radius less than r
+struct radius_lt_ {
+    region reg;
+    double val; //um
+};
+
+region radius_lt(region reg, double val) {
+    return region(radius_lt_{reg, val});
+}
+
+mcable_list thingify_(const radius_lt_& r, const mprovider& p) {
+    return radius_cmp(p, r.reg, r.val, comp_op::lt);
+}
+
+std::ostream& operator<<(std::ostream& o, const radius_lt_& r) {
+    return o << "(radius_lt: " << r.reg << ", " << r.val << ")";
+}
+
+// Region with all segments with radius less than r
+struct radius_le_ {
+    region reg;
+    double val; //um
+};
+
+region radius_le(region reg, double val) {
+    return region(radius_le_{reg, val});
+}
 
+mcable_list thingify_(const radius_le_& r, const mprovider& p) {
+    return radius_cmp(p, r.reg, r.val, comp_op::le);
+}
+
+std::ostream& operator<<(std::ostream& o, const radius_le_& r) {
+    return o << "(radius_le: " << r.reg << ", " << r.val << ")";
+}
+
+// Region with all segments with radius greater than r
+struct radius_gt_ {
+    region reg;
+    double val; //um
+};
+
+region radius_gt(region reg, double val) {
+    return region(radius_gt_{reg, val});
+}
+
+mcable_list thingify_(const radius_gt_& r, const mprovider& p) {
+    return radius_cmp(p, r.reg, r.val, comp_op::gt);
+}
+
+std::ostream& operator<<(std::ostream& o, const radius_gt_& r) {
+    return o << "(radius_gt: " << r.reg << ", " << r.val << ")";
+}
+
+// Region with all segments with radius greater than or equal to r
+struct radius_ge_ {
+    region reg;
+    double val; //um
+};
+
+region radius_ge(region reg, double val) {
+    return region(radius_gt_{reg, val});
+}
+
+mcable_list thingify_(const radius_ge_& r, const mprovider& p) {
+    return radius_cmp(p, r.reg, r.val, comp_op::ge);
+}
+
+std::ostream& operator<<(std::ostream& o, const radius_ge_& r) {
+    return o << "(radius_ge: " << r.reg << ", " << r.val << ")";
+}
+
+mcable_list projection_cmp(const mprovider& p, double v, comp_op op) {
+    const auto& m = p.morphology();
+    const auto& e = p.embedding();
+
+    std::vector<mcable> L;
+    auto val = v;
+    for (auto i: util::make_span(m.num_branches())) {
+        util::append(L, e.projection_cmp(i, val, op));
+    }
+    return remove_cover(L, p.morphology());
+}
+
+// Region with all segments with projection less than val
+struct projection_lt_{
+    double val; //um
+};
+
+region projection_lt(double val) {
+    return region(projection_lt_{val});
+}
+
+mcable_list thingify_(const projection_lt_& r, const mprovider& p) {
+    return projection_cmp(p, r.val, comp_op::lt);
+}
+
+std::ostream& operator<<(std::ostream& o, const projection_lt_& r) {
+    return o << "(projection_lt: " << r.val << ")";
+}
+
+// Region with all segments with projection less than or equal to val
+struct projection_le_{
+    double val; //um
+};
+
+region projection_le(double val) {
+    return region(projection_le_{val});
+}
+
+mcable_list thingify_(const projection_le_& r, const mprovider& p) {
+    return projection_cmp(p, r.val, comp_op::le);
+}
+
+std::ostream& operator<<(std::ostream& o, const projection_le_& r) {
+    return o << "(projection_le: " << r.val << ")";
+}
+
+// Region with all segments with projection greater than val
+struct projection_gt_ {
+    double val; //um
+};
+
+region projection_gt(double val) {
+    return region(projection_gt_{val});
+}
+
+mcable_list thingify_(const projection_gt_& r, const mprovider& p) {
+    return projection_cmp(p, r.val, comp_op::gt);
+}
+
+std::ostream& operator<<(std::ostream& o, const projection_gt_& r) {
+    return o << "(projection_gt: " << r.val << ")";
+}
+
+// Region with all segments with projection greater than val
+struct projection_ge_ {
+    double val; //um
+};
+
+region projection_ge(double val) {
+    return region(projection_ge_{val});
+}
+
+mcable_list thingify_(const projection_ge_& r, const mprovider& p) {
+    return projection_cmp(p, r.val, comp_op::ge);
+}
+
+std::ostream& operator<<(std::ostream& o, const projection_ge_& r) {
+    return o << "(projection_ge: " << r.val << ")";
+}
+
+region z_dist_from_soma_lt(double r0) {
+    if (r0 == 0) {
+        return {};
+    }
+    region lt = reg::projection_lt(r0);
+    region gt = reg::projection_gt(-r0);
+    return intersect(std::move(lt), std::move(gt));
+}
+
+region z_dist_from_soma_le(double r0) {
+    region le = reg::projection_le(r0);
+    region ge = reg::projection_ge(-r0);
+    return intersect(std::move(le), std::move(ge));
+}
+
+region z_dist_from_soma_gt(double r0) {
+    region lt = reg::projection_lt(-r0);
+    region gt = reg::projection_gt(r0);
+    return region{join(std::move(lt), std::move(gt))};
+}
+
+region z_dist_from_soma_ge(double r0) {
+    region lt = reg::projection_le(-r0);
+    region gt = reg::projection_ge(r0);
+    return region{join(std::move(lt), std::move(gt))};
+}
+
+// Named region.
 struct named_: region_tag {
     explicit named_(std::string name): name(std::move(name)) {}
     std::string name;
diff --git a/arbor/util/cbrng.hpp b/arbor/util/cbrng.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6f78e44942c6f8ba882265f2b9d669c6541a3191
--- /dev/null
+++ b/arbor/util/cbrng.hpp
@@ -0,0 +1,41 @@
+#pragma once
+#include <vector>
+
+#include <Random123/threefry.h>
+#include <Random123/uniform.hpp>
+
+namespace arb {
+namespace util {
+
+std::vector<double> uniform(uint64_t seed, unsigned left, unsigned right) {
+    typedef r123::Threefry2x64 cbrng;
+    std::vector<double> r;
+
+    cbrng::key_type key = {{seed}};
+    cbrng::ctr_type ctr = {{0,0}};
+    cbrng g;
+
+    unsigned i = left;
+    if (i%2 && i<=right) {
+        ctr[0] = i/2;
+        cbrng::ctr_type rand = g(ctr, key);
+        r.push_back(r123::u01<double>(rand[1]));;
+        ++i;
+    }
+    while (i < 2*((right+1)/2)) {
+        ctr[0] = i/2;
+        cbrng::ctr_type rand = g(ctr, key);
+        r.push_back(r123::u01<double>(rand[0]));
+        r.push_back(r123::u01<double>(rand[1]));
+        i += 2;
+    }
+    if (i<=right) {
+        ctr[0] = i/2;
+        cbrng::ctr_type rand = g(ctr, key);
+        r.push_back(r123::u01<double>(rand[0]));
+    }
+    return r;
+}
+
+}
+}
\ No newline at end of file
diff --git a/ext/CMakeLists.txt b/ext/CMakeLists.txt
index 49aa903e132f58a154f43f7f64c3b22320ef7171..634a28898410c4bbd6d3217dc9d7a04655dd82f1 100644
--- a/ext/CMakeLists.txt
+++ b/ext/CMakeLists.txt
@@ -8,6 +8,12 @@ target_include_directories(ext-json INTERFACE json/single_include)
 add_library(ext-tclap INTERFACE)
 target_include_directories(ext-tclap INTERFACE tclap/include)
 
+# Random123 (DE Shaw Research) counter-based random number generators (header-only)
+
+add_library(ext-random123 INTERFACE)
+target_include_directories(ext-random123 INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>/random123/include)
+install(TARGETS ext-random123 EXPORT arbor-targets)
+
 # Google benchmark for microbenchmarks:
 
 check_git_submodule(gbench google-benchmark)
diff --git a/ext/random123/LICENSE b/ext/random123/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..c6094acaf20c26df516f01549101dabc7fde5c93
--- /dev/null
+++ b/ext/random123/LICENSE
@@ -0,0 +1,31 @@
+/** @page LICENSE
+Copyright 2010-2012, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
diff --git a/ext/random123/include/Random123/MicroURNG.hpp b/ext/random123/include/Random123/MicroURNG.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ea77325514c07ab3e4393467dba018e42c4f412
--- /dev/null
+++ b/ext/random123/include/Random123/MicroURNG.hpp
@@ -0,0 +1,146 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __MicroURNG_dot_hpp__
+#define __MicroURNG_dot_hpp__
+
+#include <stdexcept>
+#include <limits>
+
+namespace r123{
+/**
+    Given a CBRNG whose ctr_type has an unsigned integral value_type,
+    MicroURNG<CBRNG>(c, k) is a type that satisfies the
+    requirements of a C++11 Uniform Random Number Generator.
+
+    The intended purpose is for a MicroURNG to be passed
+    as an argument to a C++11 Distribution, e.g.,
+    std::normal_distribution.  See examples/MicroURNG.cpp.
+
+    The MicroURNG functor has a period of "only"
+
+       ctr_type.size()*2^32,
+
+    after which it will silently repeat.
+
+    The high 32 bits of the highest word in the counter c, passed to
+    the constructor must be zero.  MicroURNG uses these bits to
+    "count".
+
+    Older versions of the library permitted a second template
+    parameter by which the caller could control the number of
+    bits devoted to the URNG's internal counter.  This flexibility
+    has been disabled because URNGs created with different
+    numbers of counter bits could, conceivably "collide".
+
+\code
+       typedef ?someCBRNG? RNG;
+       RNG::ctr_type c = ...; // under application control
+       RNG::key_type k = ...; // 
+       std::normal_distribution<float> nd;
+       MicroURNG<RNG> urng(c, k);
+       for(???){
+         ...
+         nd(urng);  // may be called several hundred times with BITS=10
+         ...
+       }
+\endcode
+*/
+
+template<typename CBRNG>
+class MicroURNG{
+    // According to C++11, a URNG requires only a result_type,
+    // operator()(), min() and max() methods.  Everything else
+    // (ctr_type, key_type, reset() method, etc.) is "value added"
+    // for the benefit of users that "know" that they're dealing with
+    // a MicroURNG.
+public:
+    typedef CBRNG cbrng_type;
+    static const int BITS = 32;
+    typedef typename cbrng_type::ctr_type ctr_type;
+    typedef typename cbrng_type::key_type key_type;
+    typedef typename cbrng_type::ukey_type ukey_type;
+    typedef typename ctr_type::value_type result_type;
+
+    R123_STATIC_ASSERT( std::numeric_limits<result_type>::digits >= BITS, "The result_type must have at least 32 bits" );
+
+    result_type operator()(){
+        if(last_elem == 0){
+            // jam n into the high bits of c
+            const size_t W = std::numeric_limits<result_type>::digits;
+            ctr_type c = c0;
+            c[c0.size()-1] |= n<<(W-BITS);
+            rdata = b(c,k);
+            n++;
+            last_elem = rdata.size();
+        }
+        return rdata[--last_elem];
+    }
+    MicroURNG(cbrng_type _b, ctr_type _c0, ukey_type _uk) : b(_b), c0(_c0), k(_uk), n(0), last_elem(0) {
+        chkhighbits();
+    }
+    MicroURNG(ctr_type _c0, ukey_type _uk) : b(), c0(_c0), k(_uk), n(0), last_elem(0) {
+        chkhighbits();
+    }
+
+    // _Min and _Max work around a bug in the library shipped with MacOS Xcode 4.5.2.
+    // See the commment in conventional/Engine.hpp.  
+    const static result_type _Min = 0;
+    const static result_type _Max = ~((result_type)0);
+
+    static R123_CONSTEXPR result_type min R123_NO_MACRO_SUBST () { return _Min; }
+    static R123_CONSTEXPR result_type max R123_NO_MACRO_SUBST () { return _Max; }
+    // extra methods:
+    const ctr_type& counter() const{ return c0; }
+    void reset(ctr_type _c0, ukey_type _uk){
+        c0 = _c0;
+        chkhighbits();
+        k = _uk;
+        n = 0;
+        last_elem = 0;
+    }
+
+private:
+    cbrng_type b;
+    ctr_type c0;
+    key_type k;
+    R123_ULONG_LONG n;
+    size_t last_elem;
+    ctr_type rdata;
+    void chkhighbits(){
+        result_type r = c0[c0.size()-1];
+        result_type mask = ((uint64_t)std::numeric_limits<result_type>::max R123_NO_MACRO_SUBST ())>>BITS;
+        if((r&mask) != r)
+            throw std::runtime_error("MicroURNG: c0, does not have high bits clear");
+    }
+};
+} // namespace r123
+#endif
diff --git a/ext/random123/include/Random123/ReinterpretCtr.hpp b/ext/random123/include/Random123/ReinterpretCtr.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..164a38b0a56bc09232d9a7caa8601c29ebc2b0a9
--- /dev/null
+++ b/ext/random123/include/Random123/ReinterpretCtr.hpp
@@ -0,0 +1,88 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __ReinterpretCtr_dot_hpp__
+#define __ReinterpretCtr_dot_hpp__
+
+#include "features/compilerfeatures.h"
+#include <cstring>
+
+namespace r123{
+/*!
+  ReinterpretCtr uses memcpy to map back and forth
+  between a CBRNG's ctr_type and the specified ToType.  For example,
+  after:
+
+    typedef ReinterpretCtr<r123array4x32, Philox2x64> G;
+
+  G is a bona fide CBRNG with ctr_type r123array4x32.
+
+  WARNING:  ReinterpretCtr is endian dependent.  The
+  values returned by G, declared as above,
+  will depend on the endianness of the machine on which it runs.
+ */
+
+template <typename ToType, typename CBRNG>
+struct ReinterpretCtr{
+    typedef ToType ctr_type;
+    typedef typename CBRNG::key_type key_type;
+    typedef typename CBRNG::ctr_type bctype;
+    typedef typename CBRNG::ukey_type ukey_type;
+    R123_STATIC_ASSERT(sizeof(ToType) == sizeof(bctype) && sizeof(typename bctype::value_type) != 16, 
+                       "ReinterpretCtr:  sizeof(ToType) is not the same as sizeof(CBRNG::ctr_type) or CBRNG::ctr_type::value_type looks like it might be __m128i");
+    // It's amazingly difficult to safely do conversions with __m128i.
+    // If we use the operator() implementation below with a CBRNG
+    // whose ctr_type is r123array1xm128i, gcc4.6 optimizes away the
+    // memcpys, inlines the operator()(c,k), and produces assembly
+    // language that ends with an aesenclast instruction with a
+    // destination operand pointing to an unaligned memory address ...
+    // Segfault!  See:  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444
+    // MSVC also produces code that crashes.  We suspect a
+    // similar mechanism but haven't done the debugging necessary to
+    // be sure.  We were able to 'fix' gcc4.6 by making bc a mutable
+    // data member rather than declaring it in the scope of
+    // operator().  That didn't fix the MSVC problems, though.
+    //
+    // Conclusion - don't touch __m128i, at least for now.  The
+    // easiest (but highly imprecise) way to do that is the static
+    // assertion above that rejects bctype::value_types of size 16. -
+    // Sep 2011.
+    ctr_type  operator()(ctr_type c, key_type k){
+        bctype bc;
+        std::memcpy(&bc, &c, sizeof(c));
+        CBRNG b;
+        bc = b(bc, k);
+        std::memcpy(&c, &bc, sizeof(bc));
+        return c;
+    }
+};
+} // namespace r123
+#endif
diff --git a/ext/random123/include/Random123/aes.h b/ext/random123/include/Random123/aes.h
new file mode 100644
index 0000000000000000000000000000000000000000..3095fac37de5d051b602fb53eb887ebf1ac59863
--- /dev/null
+++ b/ext/random123/include/Random123/aes.h
@@ -0,0 +1,398 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __Random123_aes_dot_hpp__
+#define __Random123_aes_dot_hpp__
+
+#include "features/compilerfeatures.h"
+#include "array.h"
+
+/* Implement a bona fide AES block cipher.  It's minimally
+// checked against the test vector in FIPS-197 in ut_aes.cpp. */
+#if R123_USE_AES_NI
+
+/** @ingroup AESNI */
+typedef struct r123array1xm128i aesni1xm128i_ctr_t;
+/** @ingroup AESNI */
+typedef struct r123array1xm128i aesni1xm128i_ukey_t;
+/** @ingroup AESNI */
+typedef struct r123array4x32 aesni4x32_ukey_t;
+/** @ingroup AESNI */
+enum r123_enum_aesni1xm128i { aesni1xm128i_rounds = 10 };
+
+/** \cond HIDDEN_FROM_DOXYGEN */
+R123_STATIC_INLINE __m128i AES_128_ASSIST (__m128i temp1, __m128i temp2) { 
+    __m128i temp3; 
+    temp2 = _mm_shuffle_epi32 (temp2 ,0xff); 
+    temp3 = _mm_slli_si128 (temp1, 0x4);
+    temp1 = _mm_xor_si128 (temp1, temp3);
+    temp3 = _mm_slli_si128 (temp3, 0x4);
+    temp1 = _mm_xor_si128 (temp1, temp3);
+    temp3 = _mm_slli_si128 (temp3, 0x4);
+    temp1 = _mm_xor_si128 (temp1, temp3);
+    temp1 = _mm_xor_si128 (temp1, temp2); 
+    return temp1; 
+}
+
+R123_STATIC_INLINE void aesni1xm128iexpand(aesni1xm128i_ukey_t uk, __m128i ret[11])
+{
+    __m128i rkey = uk.v[0].m;
+    __m128i tmp2;
+
+    ret[0] = rkey;
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[1] = rkey;
+
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x2);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[2] = rkey;
+
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x4);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[3] = rkey;
+
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x8);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[4] = rkey;
+
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x10);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[5] = rkey;
+
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x20);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[6] = rkey;
+
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x40);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[7] = rkey;
+
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x80);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[8] = rkey;
+
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1b);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[9] = rkey;
+
+    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x36);
+    rkey = AES_128_ASSIST(rkey, tmp2);
+    ret[10] = rkey;
+}
+/** \endcond */
+    
+#ifdef __cplusplus
+/** @ingroup AESNI */
+struct aesni1xm128i_key_t{ 
+    __m128i k[11]; 
+    aesni1xm128i_key_t(){
+        aesni1xm128i_ukey_t uk;
+        uk.v[0].m = _mm_setzero_si128();
+        aesni1xm128iexpand(uk, k);
+    }
+    aesni1xm128i_key_t(const aesni1xm128i_ukey_t& uk){
+        aesni1xm128iexpand(uk, k);
+    }
+    aesni1xm128i_key_t(const aesni4x32_ukey_t& uk){
+        aesni1xm128i_ukey_t uk128;
+        uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
+        aesni1xm128iexpand(uk128, k);
+    }
+    aesni1xm128i_key_t& operator=(const aesni1xm128i_ukey_t& uk){
+        aesni1xm128iexpand(uk, k);
+        return *this;
+    }
+    aesni1xm128i_key_t& operator=(const aesni4x32_ukey_t& uk){
+        aesni1xm128i_ukey_t uk128;
+        uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
+        aesni1xm128iexpand(uk128, k);
+        return *this;
+    }
+    bool operator==(const aesni1xm128i_key_t& rhs) const{
+        for(int i=0; i<11; ++i){
+            // Sigh... No r123m128i(__m128i) constructor!
+            r123m128i li; li.m = k[i];
+            r123m128i ri; ri.m = rhs.k[i];
+            if( li != ri ) return false;
+        }
+        return true;
+    }
+    bool operator!=(const aesni1xm128i_key_t& rhs) const{
+        return !(*this == rhs);
+    }
+    friend std::ostream& operator<<(std::ostream& os, const aesni1xm128i_key_t& v){
+        r123m128i ki;
+        for(int i=0; i<10; ++i){
+            ki.m = v.k[i];
+            os << ki << " ";
+        }
+        ki.m = v.k[10];
+        return os << ki;
+    }
+    friend std::istream& operator>>(std::istream& is, aesni1xm128i_key_t& v){
+        r123m128i ki;
+        for(int i=0; i<11; ++i){
+            is >> ki;
+            v.k[i] = ki;
+        }
+        return is;
+    }
+};
+#else
+typedef struct { 
+    __m128i k[11]; 
+}aesni1xm128i_key_t;
+
+/** @ingroup AESNI */
+R123_STATIC_INLINE aesni1xm128i_key_t aesni1xm128ikeyinit(aesni1xm128i_ukey_t uk){
+    aesni1xm128i_key_t ret;
+    aesni1xm128iexpand(uk, ret.k);
+    return ret;
+}
+#endif
+
+/** @ingroup AESNI */
+R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i(aesni1xm128i_ctr_t in, aesni1xm128i_key_t k) {
+    __m128i x = _mm_xor_si128(k.k[0], in.v[0].m);
+    x = _mm_aesenc_si128(x, k.k[1]);
+    x = _mm_aesenc_si128(x, k.k[2]);
+    x = _mm_aesenc_si128(x, k.k[3]);
+    x = _mm_aesenc_si128(x, k.k[4]);
+    x = _mm_aesenc_si128(x, k.k[5]);
+    x = _mm_aesenc_si128(x, k.k[6]);
+    x = _mm_aesenc_si128(x, k.k[7]);
+    x = _mm_aesenc_si128(x, k.k[8]);
+    x = _mm_aesenc_si128(x, k.k[9]);
+    x = _mm_aesenclast_si128(x, k.k[10]);
+    {
+      aesni1xm128i_ctr_t ret;
+      ret.v[0].m = x;
+      return ret;
+    }
+}
+
+/** @ingroup AESNI */
+R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i_R(unsigned R, aesni1xm128i_ctr_t in, aesni1xm128i_key_t k){
+    R123_ASSERT(R==10);
+    return aesni1xm128i(in, k);
+}
+
+
+/** @ingroup AESNI */
+typedef struct r123array4x32 aesni4x32_ctr_t;
+/** @ingroup AESNI */
+typedef aesni1xm128i_key_t aesni4x32_key_t;
+/** @ingroup AESNI */
+enum r123_enum_aesni4x32 { aesni4x32_rounds = 10 };
+/** @ingroup AESNI */
+R123_STATIC_INLINE aesni4x32_key_t aesni4x32keyinit(aesni4x32_ukey_t uk){
+    aesni1xm128i_ukey_t uk128;
+    aesni4x32_key_t ret;
+    uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
+    aesni1xm128iexpand(uk128, ret.k);
+    return ret;
+}
+
+/** @ingroup AESNI */
+/** The aesni4x32_R function provides a C API to the @ref AESNI "AESNI" CBRNG, allowing the number of rounds to be specified explicitly **/
+R123_STATIC_INLINE aesni4x32_ctr_t aesni4x32_R(unsigned int Nrounds, aesni4x32_ctr_t c, aesni4x32_key_t k){
+    aesni1xm128i_ctr_t c128;
+    c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
+    c128 = aesni1xm128i_R(Nrounds, c128, k);
+    _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
+    return c;
+}
+
+#define aesni4x32_rounds aesni1xm128i_rounds
+
+/** The aesni4x32 macro provides a C API to the @ref AESNI "AESNI" CBRNG, uses the default number of rounds i.e. \c aesni4x32_rounds **/
+/** @ingroup AESNI */
+#define aesni4x32(c,k) aesni4x32_R(aesni4x32_rounds, c, k)
+
+#ifdef __cplusplus
+namespace r123{
+/** 
+@defgroup AESNI ARS and AESNI Classes and Typedefs
+
+The ARS4x32, ARS1xm128i, AESNI4x32 and AESNI1xm128i classes export the member functions, typedefs and
+operator overloads required by a @ref CBRNG "CBRNG" class.
+
+ARS1xm128i and AESNI1xm128i are based on the AES block cipher and rely on the AES-NI hardware instructions
+available on some some new (2011) CPUs.
+
+The ARS1xm128i CBRNG and the use of AES for random number generation are described in 
+<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers:  As Easy as 1, 2, 3</i> </a>.
+Although it uses some cryptographic primitives, ARS1xm128i uses a cryptographically weak key schedule and is \b not suitable for cryptographic use.
+
+@class AESNI1xm128i
+@ingroup AESNI
+AESNI exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
+
+AESNI1xm128i uses the crypotgraphic AES round function, including the cryptographic key schedule.
+
+In contrast to the other CBRNGs in the Random123 library, the AESNI1xm128i_R::key_type is opaque
+and is \b not identical to the AESNI1xm128i_R::ukey_type.  Creating a key_type, using either the constructor
+or assignment operator, is significantly more time-consuming than running the bijection (hundreds
+of clock cycles vs. tens of clock cycles).
+
+AESNI1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which
+should occur only when the compiler is configured to generate AES-NI instructions (or
+when defaults are overridden by compile-time, compiler-command-line options).
+
+As of September 2011, the authors know of no statistical flaws with AESNI1xm128i.  It
+would be an event of major cryptographic note if any such flaws were ever found.
+*/
+struct AESNI1xm128i{
+    typedef aesni1xm128i_ctr_t ctr_type;
+    typedef aesni1xm128i_ukey_t ukey_type;
+    typedef aesni1xm128i_key_t key_type;
+    static const unsigned int rounds=10;
+    ctr_type operator()(ctr_type ctr, key_type key) const{
+        return aesni1xm128i(ctr, key);
+    }
+};
+
+/* @class AESNI4x32 */
+struct AESNI4x32{
+    typedef aesni4x32_ctr_t ctr_type;
+    typedef aesni4x32_ukey_t ukey_type;
+    typedef aesni4x32_key_t key_type;
+    static const unsigned int rounds=10;
+    ctr_type operator()(ctr_type ctr, key_type key) const{
+        return aesni4x32(ctr, key);
+    }
+};
+
+/** @ingroup AESNI
+    @class AESNI1xm128i_R
+
+AESNI1xm128i_R is provided for completeness, but is only instantiable with ROUNDS=10, in
+which case it is identical to AESNI1xm128i */
+template <unsigned ROUNDS=10> 
+struct AESNI1xm128i_R : public AESNI1xm128i{
+    R123_STATIC_ASSERT(ROUNDS==10, "AESNI1xm128i_R<R> is only valid with R=10");
+};
+
+/** @class AESNI4x32_R **/
+template <unsigned ROUNDS=10> 
+struct AESNI4x32_R : public AESNI4x32{
+    R123_STATIC_ASSERT(ROUNDS==10, "AESNI4x32_R<R> is only valid with R=10");
+};
+} // namespace r123
+#endif /* __cplusplus */
+
+#endif /* R123_USE_AES_NI */
+
+#if R123_USE_AES_OPENSSL
+#include "string.h"
+#include <openssl/aes.h>
+typedef struct r123array16x8 aesopenssl16x8_ctr_t;
+typedef struct r123array16x8 aesopenssl16x8_ukey_t;
+#ifdef __cplusplus
+struct aesopenssl16x8_key_t{
+    AES_KEY k;
+    aesopenssl16x8_key_t(){
+        aesopenssl16x8_ukey_t ukey={{}};
+        AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
+    }
+    aesopenssl16x8_key_t(const aesopenssl16x8_ukey_t& ukey){
+        AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
+    }
+    aesopenssl16x8_key_t& operator=(const aesopenssl16x8_ukey_t& ukey){
+        AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
+        return *this;
+    }
+    bool operator==(const aesopenssl16x8_key_t& rhs) const{
+        return (k.rounds == rhs.k.rounds) && 0==::memcmp(&k.rd_key[0], &rhs.k.rd_key[0], (k.rounds+1) * 4 * sizeof(uint32_t));
+    }
+    bool operator!=(const aesopenssl16x8_key_t& rhs) const{
+        return !(*this == rhs);
+    }
+    friend std::ostream& operator<<(std::ostream& os, const aesopenssl16x8_key_t& v){
+        os << v.k.rounds;
+        const unsigned int *p = &v.k.rd_key[0];
+        for(int i=0; i<(v.k.rounds+1); ++i){
+            os << " " << p[0] << " " << p[1] << " " << p[2] << " " << p[3];
+            p += 4;
+        }
+        return os;
+    }
+    friend std::istream& operator>>(std::istream& is, aesopenssl16x8_key_t& v){
+        is >> v.k.rounds;
+        unsigned int *p = &v.k.rd_key[0];
+        for(int i=0; i<(v.k.rounds+1); ++i){
+            is >> p[0] >> p[1] >> p[2] >> p[3];
+            p += 4;
+        }
+        return is;
+    }
+};
+#else
+typedef struct aesopenssl16x8_key_t{
+    AES_KEY k;
+}aesopenssl16x8_key_t;
+R123_STATIC_INLINE struct aesopenssl16x8_key_t aesopenssl16x8keyinit(aesopenssl16x8_ukey_t uk){
+    aesopenssl16x8_key_t ret;
+    AES_set_encrypt_key((const unsigned char *)&uk.v[0], 128, &ret.k);
+    return ret;
+}
+#endif
+
+R123_STATIC_INLINE R123_FORCE_INLINE(aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key));
+R123_STATIC_INLINE
+aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key){
+    aesopenssl16x8_ctr_t ret;
+    AES_encrypt((const unsigned char*)&ctr.v[0], (unsigned char *)&ret.v[0], &key.k);
+    return ret;
+}
+
+#define aesopenssl16x8_rounds aesni4x32_rounds
+#define aesopenssl16x8(c,k) aesopenssl16x8_R(aesopenssl16x8_rounds)
+
+#ifdef __cplusplus
+namespace r123{
+struct AESOpenSSL16x8{
+    typedef aesopenssl16x8_ctr_t ctr_type;
+    typedef aesopenssl16x8_key_t key_type;
+    typedef aesopenssl16x8_ukey_t ukey_type;
+    static const unsigned int rounds=10;
+    ctr_type operator()(const ctr_type& in, const key_type& k){
+        ctr_type out;
+        AES_encrypt((const unsigned char *)&in[0], (unsigned char *)&out[0], &k.k);
+        return out;
+    }
+};
+} // namespace r123
+#endif /* __cplusplus */
+#endif /* R123_USE_AES_OPENSSL */
+
+#endif
diff --git a/ext/random123/include/Random123/array.h b/ext/random123/include/Random123/array.h
new file mode 100644
index 0000000000000000000000000000000000000000..c560c3fee1a860f5c8f1785c1b181961be285226
--- /dev/null
+++ b/ext/random123/include/Random123/array.h
@@ -0,0 +1,348 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef _r123array_dot_h__
+#define _r123array_dot_h__
+#include "features/compilerfeatures.h"
+#include "features/sse.h"
+
+#if !defined(__cplusplus) || defined(__METAL_MACOS__)
+#define CXXMETHODS(_N, W, T)
+#define CXXOVERLOADS(_N, W, T)
+#define CXXMETHODS_REQUIRING_STL
+#else
+
+#include <stddef.h>
+#include <algorithm>
+#include <stdexcept>
+#include <iterator>
+#include <limits>
+#include <iostream>
+
+/** @defgroup arrayNxW The r123arrayNxW classes 
+
+    Each of the r123arrayNxW is a fixed size array of N W-bit unsigned integers.
+    It is functionally equivalent to the C++11 std::array<N, uintW_t>,
+    but does not require C++11 features or libraries.
+
+    In addition to meeting most of the requirements of a Container,
+    it also has a member function, incr(), which increments the zero-th
+    element and carrys overflows into higher indexed elements.  Thus,
+    by using incr(), sequences of up to 2^(N*W) distinct values
+    can be produced. 
+
+    If SSE is supported by the compiler, then the class
+    r123array1xm128i is also defined, in which the data member is an
+    array of one r123m128i object.
+
+    When compiling with __CUDA_ARCH__ defined, the reverse iterator
+    methods (rbegin, rend, crbegin, crend) are not defined because
+    CUDA does not support std::reverse_iterator.
+
+*/
+
+/** @cond HIDDEN_FROM_DOXYGEN */
+
+template <typename value_type>
+inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
+    value_type v=0;
+    for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
+        v |= ((value_type)(*p32++)) << (32*i);
+    return v;
+}
+
+/** @endcond */
+
+#ifdef __CUDA_ARCH__
+/* CUDA can't handle std::reverse_iterator.  We *could* implement it
+   ourselves, but let's not bother until somebody really feels a need
+   to reverse-iterate through an r123array */
+#define CXXMETHODS_REQUIRING_STL
+#else
+#define  CXXMETHODS_REQUIRING_STL \
+    public: \
+    typedef std::reverse_iterator<iterator> reverse_iterator;           \
+    typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
+    R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); }                         \
+    R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
+    R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); }        \
+    R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
+    R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
+    R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } 
+#endif
+
+// Work-alike methods and typedefs modeled on std::array:
+#define CXXMETHODS(_N, W, T)                                            \
+    typedef T value_type;                                               \
+    typedef T* iterator;                                                \
+    typedef const T* const_iterator;                                    \
+    typedef value_type& reference;                                      \
+    typedef const value_type& const_reference;                          \
+    typedef size_t size_type;                                           \
+    typedef ptrdiff_t difference_type;                                  \
+    typedef T* pointer;                                                 \
+    typedef const T* const_pointer;                                     \
+    /* Boost.array has static_size.  C++11 specializes tuple_size */    \
+    enum {static_size = _N};                                            \
+    R123_CUDA_DEVICE reference operator[](size_type i){return v[i];}                     \
+    R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];}        \
+    R123_CUDA_DEVICE reference at(size_type i){ if(i >=  _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
+    R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >=  _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
+    R123_CUDA_DEVICE size_type size() const { return  _N; }                              \
+    R123_CUDA_DEVICE size_type max_size() const { return _N; }                           \
+    R123_CUDA_DEVICE bool empty() const { return _N==0; };                               \
+    R123_CUDA_DEVICE iterator begin() { return &v[0]; }                                  \
+    R123_CUDA_DEVICE iterator end() { return &v[_N]; }                                   \
+    R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; }                      \
+    R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; }                       \
+    R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; }                     \
+    R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; }                      \
+    R123_CUDA_DEVICE pointer data(){ return &v[0]; }                                     \
+    R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; }                         \
+    R123_CUDA_DEVICE reference front(){ return v[0]; }                                   \
+    R123_CUDA_DEVICE const_reference front() const{ return v[0]; }                       \
+    R123_CUDA_DEVICE reference back(){ return v[_N-1]; }                                 \
+    R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; }                     \
+    R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
+	/* CUDA3 does not have std::equal */ \
+	for (size_t i = 0; i < _N; ++i) \
+	    if (v[i] != rhs.v[i]) return false; \
+	return true; \
+    } \
+    R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
+    /* CUDA3 does not have std::fill_n */ \
+    R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
+    R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
+	/* CUDA3 does not have std::swap_ranges */ \
+	for (size_t i = 0; i < _N; ++i) { \
+	    T tmp = v[i]; \
+	    v[i] = rhs.v[i]; \
+	    rhs.v[i] = tmp; \
+	} \
+    } \
+    R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){                         \
+        /* This test is tricky because we're trying to avoid spurious   \
+           complaints about illegal shifts, yet still be compile-time   \
+           evaulated. */                                                \
+        if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
+            return incr_carefully(n);                                   \
+        if(n==1){                                                       \
+            ++v[0];                                                     \
+            if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this;   \
+        }else{                                                          \
+            v[0] += n;                                                  \
+            if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this;  \
+        }                                                               \
+        /* We expect that the N==?? tests will be                       \
+           constant-folded/optimized away by the compiler, so only the  \
+           overflow tests (!!v[i]) remain to be done at runtime.  For  \
+           small values of N, it would be better to do this as an       \
+           uncondtional sequence of adc.  An experiment/optimization    \
+           for another day...                                           \
+           N.B.  The weird subscripting: v[_N>3?3:0] is to silence      \
+           a spurious error from icpc                                   \
+           */                                                           \
+        ++v[_N>1?1:0];                                                  \
+        if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
+        ++v[_N>2?2:0];                                                  \
+        if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this;  \
+        ++v[_N>3?3:0];                                                  \
+        for(size_t i=4; i<_N; ++i){                                     \
+            if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this;        \
+            ++v[i];                                                     \
+        }                                                               \
+        return *this;                                                   \
+    }                                                                   \
+    /* seed(SeedSeq) would be a constructor if having a constructor */  \
+    /* didn't cause headaches with defaults */                          \
+    template <typename SeedSeq>                                         \
+    R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){      \
+        r123array##_N##x##W ret;                                        \
+        const size_t Ngen = _N*((3+sizeof(value_type))/4);              \
+        uint32_t u32[Ngen];                                             \
+        uint32_t *p32 = &u32[0];                                        \
+        ss.generate(&u32[0], &u32[Ngen]);                               \
+        for(size_t i=0; i<_N; ++i){                                     \
+            ret.v[i] = assemble_from_u32<value_type>(p32);              \
+            p32 += (3+sizeof(value_type))/4;                            \
+        }                                                               \
+        return ret;                                                     \
+    }                                                                   \
+protected:                                                              \
+    R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
+        /* n may be greater than the maximum value of a single value_type */ \
+        value_type vtn;                                                 \
+        vtn = n;                                                        \
+        v[0] += n;                                                      \
+        const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
+        for(size_t i=1; i<_N; ++i){                                     \
+            if(rshift){                                                 \
+                n >>= rshift;                                           \
+            }else{                                                      \
+                n=0;                                                    \
+            }                                                           \
+            if( v[i-1] < vtn )                                          \
+                ++n;                                                    \
+            if( n==0 ) break;                                           \
+            vtn = n;                                                    \
+            v[i] += n;                                                  \
+        }                                                               \
+        return *this;                                                   \
+    }                                                                   \
+
+/** @cond HIDDEN_FROM_DOXYGEN */
+
+// There are several tricky considerations for the insertion and extraction
+// operators:
+// - we would like to be able to print r123array16x8 as a sequence of 16 integers,
+//   not as 16 bytes.
+// - we would like to be able to print r123array1xm128i.
+// - we do not want an int conversion operator in r123m128i because it causes
+//   lots of ambiguity problems with automatic promotions.
+// Solution: r123arrayinsertable and r123arrayextractable
+
+template<typename T>
+struct r123arrayinsertable{
+    const T& v;
+    r123arrayinsertable(const T& t_) : v(t_) {} 
+    friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
+        return os << t.v;
+    }
+};
+
+template<>
+struct r123arrayinsertable<uint8_t>{
+    const uint8_t& v;
+    r123arrayinsertable(const uint8_t& t_) : v(t_) {} 
+    friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
+        return os << (int)t.v;
+    }
+};
+
+template<typename T>
+struct r123arrayextractable{
+    T& v;
+    r123arrayextractable(T& t_) : v(t_) {}
+    friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
+        return is >> t.v;
+    }
+};
+
+template<>
+struct r123arrayextractable<uint8_t>{
+    uint8_t& v;
+    r123arrayextractable(uint8_t& t_) : v(t_) {} 
+    friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
+        int i;
+        is >>  i;
+        t.v = i;
+        return is;
+    }
+};
+/** @endcond */
+
+#define CXXOVERLOADS(_N, W, T)                                          \
+                                                                        \
+inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){   \
+    os << r123arrayinsertable<T>(a.v[0]);                                  \
+    for(size_t i=1; i<_N; ++i)                                          \
+        os << " " << r123arrayinsertable<T>(a.v[i]);                       \
+    return os;                                                          \
+}                                                                       \
+                                                                        \
+inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){         \
+    for(size_t i=0; i<_N; ++i){                                         \
+        r123arrayextractable<T> x(a.v[i]);                                 \
+        is >> x;                                                        \
+    }                                                                   \
+    return is;                                                          \
+}                                                                       \
+                                                                        \
+namespace r123{                                                        \
+ typedef r123array##_N##x##W Array##_N##x##W;                          \
+}
+                                                                        
+#endif /* __cplusplus */
+
+/* _r123array_tpl expands to a declaration of struct r123arrayNxW.  
+
+   In C, it's nothing more than a struct containing an array of N
+   objects of type T.
+
+   In C++ it's the same, but endowed with an assortment of member
+   functions, typedefs and friends.  In C++, r123arrayNxW looks a lot
+   like std::array<T,N>, has most of the capabilities of a container,
+   and satisfies the requirements outlined in compat/Engine.hpp for
+   counter and key types.  ArrayNxW, in the r123 namespace is
+   a typedef equivalent to r123arrayNxW.
+*/
+
+#define _r123array_tpl(_N, W, T)                   \
+    /** @ingroup arrayNxW */                        \
+    /** @see arrayNxW */                            \
+struct r123array##_N##x##W{                         \
+ T v[_N];                                       \
+ CXXMETHODS(_N, W, T)                           \
+ CXXMETHODS_REQUIRING_STL                       \
+};                                              \
+                                                \
+CXXOVERLOADS(_N, W, T)
+
+
+_r123array_tpl(1, 32, uint32_t)  /* r123array1x32 */
+_r123array_tpl(2, 32, uint32_t)  /* r123array2x32 */
+_r123array_tpl(4, 32, uint32_t)  /* r123array4x32 */
+_r123array_tpl(8, 32, uint32_t)  /* r123array8x32 */
+
+#if R123_USE_64BIT
+_r123array_tpl(1, 64, uint64_t)  /* r123array1x64 */
+_r123array_tpl(2, 64, uint64_t)  /* r123array2x64 */
+_r123array_tpl(4, 64, uint64_t)  /* r123array4x64 */
+#endif
+
+_r123array_tpl(16, 8, uint8_t)  /* r123array16x8 for ARSsw, AESsw */
+
+#if R123_USE_SSE
+_r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */
+#endif
+
+/* In C++, it's natural to use sizeof(a::value_type), but in C it's
+   pretty convoluted to figure out the width of the value_type of an
+   r123arrayNxW:
+*/
+#define R123_W(a)   (8*sizeof(((a *)0)->v[0]))
+
+/** @namespace r123
+  Most of the Random123 C++ API is contained in the r123 namespace. 
+*/
+
+#endif
+
diff --git a/ext/random123/include/Random123/ars.h b/ext/random123/include/Random123/ars.h
new file mode 100644
index 0000000000000000000000000000000000000000..a027b6fe043e55da528569cf4b22aa10296fd6a8
--- /dev/null
+++ b/ext/random123/include/Random123/ars.h
@@ -0,0 +1,204 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __Random123_ars_dot_hpp__
+#define __Random123_ars_dot_hpp__
+
+#include "features/compilerfeatures.h"
+#include "array.h"
+
+#if R123_USE_AES_NI
+
+#ifndef ARS1xm128i_DEFAULT_ROUNDS
+#define ARS1xm128i_DEFAULT_ROUNDS 7
+#endif
+
+/** @ingroup AESNI */
+enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
+
+/* ARS1xm128i with Weyl keys.  Fast, and Crush-resistant, but NOT CRYPTO. */
+/** @ingroup AESNI */
+typedef struct r123array1xm128i ars1xm128i_ctr_t;
+/** @ingroup AESNI */
+typedef struct r123array1xm128i ars1xm128i_key_t;
+/** @ingroup AESNI */
+typedef struct r123array1xm128i ars1xm128i_ukey_t;
+/** @ingroup AESNI */
+R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
+/** @ingroup AESNI */
+R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
+    __m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B), /* sqrt(3) - 1.0 */
+                                   R123_64BIT(0x9E3779B97F4A7C15)); /* golden ratio */
+    /* N.B.  the aesenc instructions do the xor *after*
+    // so if we want to follow the AES pattern, we
+    // have to do the initial xor explicitly */
+    __m128i kk = k.v[0].m;
+    __m128i v = _mm_xor_si128(in.v[0].m, kk);
+    ars1xm128i_ctr_t ret;
+    R123_ASSERT(Nrounds<=10);
+    if( Nrounds>1 ){
+        kk = _mm_add_epi64(kk, kweyl);
+        v = _mm_aesenc_si128(v, kk);
+    }
+    if( Nrounds>2 ){
+        kk = _mm_add_epi64(kk, kweyl);
+        v = _mm_aesenc_si128(v, kk);
+    }
+    if( Nrounds>3 ){
+        kk = _mm_add_epi64(kk, kweyl);
+        v = _mm_aesenc_si128(v, kk);
+    }
+    if( Nrounds>4 ){
+        kk = _mm_add_epi64(kk, kweyl);
+        v = _mm_aesenc_si128(v, kk);
+    }
+    if( Nrounds>5 ){
+        kk = _mm_add_epi64(kk, kweyl);
+        v = _mm_aesenc_si128(v, kk);
+    }
+    if( Nrounds>6 ){
+        kk = _mm_add_epi64(kk, kweyl);
+        v = _mm_aesenc_si128(v, kk);
+    }
+    if( Nrounds>7 ){
+        kk = _mm_add_epi64(kk, kweyl);
+        v = _mm_aesenc_si128(v, kk);
+    }
+    if( Nrounds>8 ){
+        kk = _mm_add_epi64(kk, kweyl);
+        v = _mm_aesenc_si128(v, kk);
+    }
+    if( Nrounds>9 ){
+        kk = _mm_add_epi64(kk, kweyl);
+        v = _mm_aesenc_si128(v, kk);
+    }
+    kk = _mm_add_epi64(kk, kweyl);
+    v = _mm_aesenclast_si128(v, kk);
+    ret.v[0].m = v;
+    return ret;
+}
+
+/** @def ars1xm128i
+@ingroup AESNI
+The ars1mx128i macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars1xm128i_rounds **/
+#define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
+
+/** @ingroup AESNI */
+typedef struct r123array4x32 ars4x32_ctr_t;
+/** @ingroup AESNI */
+typedef struct r123array4x32 ars4x32_key_t;
+/** @ingroup AESNI */
+typedef struct r123array4x32 ars4x32_ukey_t;
+/** @ingroup AESNI */
+enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
+/** @ingroup AESNI */
+R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
+/** @ingroup AESNI */
+R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
+    ars1xm128i_ctr_t c128;
+    ars1xm128i_key_t k128;
+    c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
+    k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
+    c128 = ars1xm128i_R(Nrounds, c128, k128);
+    _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
+    return c;
+}
+
+/** @def ars4x32
+@ingroup AESNI
+The ars4x32 macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars4x32_rounds **/
+#define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
+
+#ifdef __cplusplus
+namespace r123{
+/** 
+@ingroup AESNI
+
+ARS1xm128i_R exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
+
+ARS1xm128i uses the crypotgraphic AES round function, but a @b non-cryptographc key schedule
+to save time and space.
+
+ARS1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which
+should occur only when the compiler is configured to generate AES-NI instructions (or
+when defaults are overridden by compile-time, compiler-command-line options).
+
+The template argument, ROUNDS, is the number of times the ARS round
+functions will be applied.
+
+As of September 2011, the authors know of no statistical flaws with
+ROUNDS=5 or more.
+
+@class ARS1xm128i_R
+
+*/
+template<unsigned int ROUNDS>
+struct ARS1xm128i_R{
+    typedef ars1xm128i_ctr_t ctr_type;
+    typedef ars1xm128i_key_t key_type;
+    typedef ars1xm128i_key_t ukey_type;
+    static const unsigned int rounds=ROUNDS;
+    R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
+        return ars1xm128i_R(ROUNDS, ctr, key);
+    }
+};
+
+/** @class ARS4x32_R
+    @ingroup AESNI
+*/
+
+template<unsigned int ROUNDS>
+struct ARS4x32_R{
+    typedef ars4x32_ctr_t ctr_type;
+    typedef ars4x32_key_t key_type;
+    typedef ars4x32_key_t ukey_type;
+    static const unsigned int rounds=ROUNDS;
+    R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
+        return ars4x32_R(ROUNDS, ctr, key);
+    }
+};
+/**
+@ingroup AESNI
+
+@class ARS1xm128i_R
+  ARS1xm128i is equivalent to ARS1xm128i_R<7>.    With 7 rounds,
+  the ARS1xm128i CBRNG  has a considerable safety margin over the minimum number
+  of rounds with no known statistical flaws, but still has excellent
+  performance. */
+typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
+typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
+} // namespace r123
+
+#endif /* __cplusplus */
+
+#endif /* R123_USE_AES_NI */
+
+#endif
diff --git a/ext/random123/include/Random123/boxmuller.hpp b/ext/random123/include/Random123/boxmuller.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9c91cf879109133a80844c7c69f26d8e448578fa
--- /dev/null
+++ b/ext/random123/include/Random123/boxmuller.hpp
@@ -0,0 +1,139 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+// This file implements the Box-Muller method for generating gaussian
+// random variables (GRVs).  Box-Muller has the advantage of
+// deterministically requiring exactly two uniform random variables as
+// input and producing exactly two GRVs as output, which makes it
+// especially well-suited to the counter-based generators in
+// Random123.  Other methods (e.g., Ziggurat, polar) require an
+// indeterminate number of inputs for each output and so require a
+// 'MicroURNG' to be used with Random123.  The down side of Box-Muller
+// is that it calls sincos, log and sqrt, which may be slow.  However,
+// on GPUs, these functions are remarkably fast, which makes
+// Box-Muller the fastest GRV generator we know of on GPUs.
+//
+// This file exports two structs and one overloaded function,
+// all in the r123 namespace:
+//   struct r123::float2{ float x,y; }
+//   struct r123::double2{ double x,y; }
+//
+//   r123::float2  r123::boxmuller(uint32_t u0, uint32_t u1);
+//   r123::double2 r123::boxmuller(uint64_t u0, uint64_t u1);
+//  
+// float2 and double2 are identical to their synonymous global-
+// namespace structures in CUDA.
+//
+// This file may not be as portable, and has not been tested as
+// rigorously as other files in the library, e.g., the generators.
+// Nevertheless, we hope it is useful and we encourage developers to
+// copy it and modify it for their own use.  We invite comments and
+// improvements.
+
+#ifndef _r123_BOXMULLER_HPP__
+#define _r123_BOXMULLER_HPP__
+
+#include <Random123/features/compilerfeatures.h>
+#include <Random123/uniform.hpp>
+#include <math.h>
+
+namespace r123{
+
+#if !defined(__CUDACC__)
+typedef struct { float x, y; } float2;
+typedef struct { double x, y; } double2;
+#else
+typedef ::float2 float2;
+typedef ::double2 double2;
+#endif
+
+#if !defined(R123_NO_SINCOS) && defined(__APPLE__)
+/* MacOS X 10.10.5 (2015) doesn't have sincosf */
+#define R123_NO_SINCOS 1
+#endif
+
+#if R123_NO_SINCOS /* enable this if sincos and sincosf are not in the math library */
+R123_CUDA_DEVICE R123_STATIC_INLINE void sincosf(float x, float *s, float *c) {
+    *s = sinf(x);
+    *c = cosf(x);
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE void sincos(double x, double *s, double *c) {
+    *s = sin(x);
+    *c = cos(x);
+}
+#endif /* sincos is not in the math library */
+
+#if !defined(CUDART_VERSION) || CUDART_VERSION < 5000 /* enabled if sincospi and sincospif are not in math lib */
+
+R123_CUDA_DEVICE R123_STATIC_INLINE void sincospif(float x, float *s, float *c){
+    const float PIf = 3.1415926535897932f;
+    sincosf(PIf*x, s, c);
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE void sincospi(double x, double *s, double *c) {
+    const double PI = 3.1415926535897932;
+    sincos(PI*x, s, c);
+}
+#endif /* sincospi is not in math lib */
+
+/*
+ * take two 32bit unsigned random values and return a float2 with
+ * two random floats in a normal distribution via a Box-Muller transform
+ */
+R123_CUDA_DEVICE R123_STATIC_INLINE float2 boxmuller(uint32_t u0, uint32_t u1) {
+    float r;
+    float2 f;
+    sincospif(uneg11<float>(u0), &f.x, &f.y);
+    r = sqrtf(-2.f * logf(u01<float>(u1))); // u01 is guaranteed to avoid 0.
+    f.x *= r;
+    f.y *= r;
+    return f;
+}
+
+/*
+ * take two 64bit unsigned random values and return a double2 with
+ * two random doubles in a normal distribution via a Box-Muller transform
+ */
+R123_CUDA_DEVICE R123_STATIC_INLINE double2 boxmuller(uint64_t u0, uint64_t u1) {
+    double r;
+    double2 f;
+
+    sincospi(uneg11<double>(u0), &f.x, &f.y);
+    r = sqrt(-2. * log(u01<double>(u1))); // u01 is guaranteed to avoid 0.
+    f.x *= r;
+    f.y *= r;
+    return f;
+}
+} // namespace r123
+
+#endif /* BOXMULLER_H__ */
diff --git a/ext/random123/include/Random123/conventional/Engine.hpp b/ext/random123/include/Random123/conventional/Engine.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bd2da2e1911b789adbba32c51e9835128ed96842
--- /dev/null
+++ b/ext/random123/include/Random123/conventional/Engine.hpp
@@ -0,0 +1,276 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __Engine_dot_hpp_
+#define __Engine_dot_hpp_
+
+#include "../features/compilerfeatures.h"
+#include "../array.h"
+#include <limits>
+#include <stdexcept>
+#include <sstream>
+#include <algorithm>
+#include <vector>
+#if R123_USE_CXX11_TYPE_TRAITS
+#include <type_traits>
+#endif
+
+namespace r123{
+/**
+  If G satisfies the requirements of a CBRNG, and has a ctr_type whose
+  value_type is an unsigned integral type, then Engine<G> satisfies
+  the requirements of a C++11 "Uniform Random Number Engine" and can
+  be used in any context where such an object is expected.
+
+  Note that wrapping a counter based RNG with a traditional API in
+  this way obscures much of the power of counter based PRNGs.
+  Nevertheless, it may be of value in applications that are already
+  coded to work with the C++11 random number engines.
+
+  The MicroURNG template in MicroURNG.hpp
+  provides the more limited functionality of a C++11 "Uniform
+  Random Number Generator", but leaves the application in control
+  of counters and keys and hence may be preferable to the Engine template.
+  For example, a MicroURNG allows one to use C++11 "Random Number
+  Distributions"  without giving up control over the counters
+  and keys.
+*/ 
+
+template<typename CBRNG>
+struct Engine {
+    typedef CBRNG cbrng_type;
+    typedef typename CBRNG::ctr_type ctr_type;
+    typedef typename CBRNG::key_type key_type;
+    typedef typename CBRNG::ukey_type ukey_type;
+    typedef typename ctr_type::value_type result_type;
+
+protected:
+    cbrng_type b;
+    key_type key;
+    ctr_type c;
+    ctr_type v;
+
+    void fix_invariant(){
+        if( v.back() != 0 ) {
+            result_type vv = v.back();
+            v = b(c, key);
+            v.back() = vv;
+	}
+    }        
+public:
+    explicit Engine() : b(), c() {
+	ukey_type x = {{}};
+        v.back() = 0;
+        key = x;
+    }
+    explicit Engine(result_type r) : b(), c() {
+        ukey_type x = {{typename ukey_type::value_type(r)}};
+        v.back() = 0;
+        key = x;
+    }
+    // 26.5.3 says that the SeedSeq templates shouldn't particpate in
+    // overload resolution unless the type qualifies as a SeedSeq.
+    // How that is determined is unspecified, except that "as a
+    // minimum a type shall not qualify as a SeedSeq if it is
+    // implicitly convertible to a result_type."  
+    //
+    // First, we make sure that even the non-const copy constructor
+    // works as expected.  In addition, if we've got C++11
+    // type_traits, we use enable_if and is_convertible to implement
+    // the convertible-to-result_type restriction.  Otherwise, the
+    // template is unconditional and will match in some surpirsing
+    // and undesirable situations.
+    Engine(Engine& e) : b(e.b), key(e.key), c(e.c){
+        v.back() = e.v.back();
+        fix_invariant();
+    }
+    Engine(const Engine& e) : b(e.b), key(e.key), c(e.c){
+        v.back() = e.v.back();
+        fix_invariant();
+    }
+
+    template <typename SeedSeq>
+    explicit Engine(SeedSeq &s
+#if R123_USE_CXX11_TYPE_TRAITS
+                    , typename std::enable_if<!std::is_convertible<SeedSeq, result_type>::value>::type* =0
+#endif
+                    )
+        : b(), c() {
+        ukey_type ukey = ukey_type::seed(s);
+        key = ukey;
+        v.back() = 0;
+    }
+    void seed(result_type r){
+        *this = Engine(r);
+    }
+    template <typename SeedSeq>
+    void seed(SeedSeq &s
+#if R123_USE_CXX11_TYPE_TRAITS
+                    , typename std::enable_if<!std::is_convertible<SeedSeq, result_type>::value>::type* =0
+#endif
+              ){ 
+        *this = Engine(s);
+    }
+    void seed(){
+        *this = Engine();
+    }
+    friend bool operator==(const Engine& lhs, const Engine& rhs){
+        return lhs.c==rhs.c && lhs.v.back() == rhs.v.back() && lhs.key == rhs.key;
+    }
+    friend bool operator!=(const Engine& lhs, const Engine& rhs){
+        return lhs.c!=rhs.c || lhs.v.back()!=rhs.v.back() || lhs.key!=rhs.key;
+    }
+
+    friend std::ostream& operator<<(std::ostream& os, const Engine& be){
+        return os << be.c << " " << be.key << " " << be.v.back();
+    }
+
+    friend std::istream& operator>>(std::istream& is, Engine& be){
+        is >> be.c >> be.key >> be.v.back();
+        be.fix_invariant();
+        return is;
+    }
+
+    // The <random> shipped with MacOS Xcode 4.5.2 imposes a
+    // non-standard requirement that URNGs also have static data
+    // members: _Min and _Max.  Later versions of libc++ impose the
+    // requirement only when constexpr isn't supported.  Although the
+    // Xcode 4.5.2 requirement is clearly non-standard, it is unlikely
+    // to be fixed and it is very easy work around.  We certainly
+    // don't want to go to great lengths to accommodate every buggy
+    // library we come across, but in this particular case, the effort
+    // is low and the benefit is high, so it's worth doing.  Thanks to
+    // Yan Zhou for pointing this out to us.  See similar code in
+    // ../MicroURNG.hpp
+    const static result_type _Min = 0;
+    const static result_type _Max = ~((result_type)0);
+
+    static R123_CONSTEXPR result_type min R123_NO_MACRO_SUBST () { return _Min; }
+    static R123_CONSTEXPR result_type max R123_NO_MACRO_SUBST () { return _Max; }
+
+    result_type operator()(){
+        if( c.size() == 1 )     // short-circuit the scalar case.  Compilers aren't mind-readers.
+            return b(c.incr(), key)[0];
+        result_type& elem = v.back();
+        if( elem == 0 ){
+            v = b(c.incr(), key);
+            result_type ret = v.back();
+            elem = c.size()-1;
+            return ret;
+        }
+        return v[--elem];
+    }
+
+    void discard(R123_ULONG_LONG skip){
+        // don't forget:  elem counts down
+        size_t nelem = c.size();
+	size_t sub = skip % nelem;
+        result_type& elem  = v.back();
+        skip /= nelem;
+	if (elem < sub) {
+	    elem += nelem;
+	    skip++;
+	}
+	elem -= sub;
+        c.incr(skip);
+        fix_invariant();
+    }
+         
+    //--------------------------
+    // Some bonus methods, not required for a Random Number
+    // Engine
+
+    // Constructors and seed() method for ukey_type seem useful
+    // We need const and non-const to supersede the SeedSeq template.
+    explicit Engine(const ukey_type &uk) : key(uk), c(){ v.back() = 0; }
+    explicit Engine(ukey_type &uk) : key(uk), c(){  v.back() = 0; }
+    void seed(const ukey_type& uk){
+        *this = Engine(uk);
+    }        
+    void seed(ukey_type& uk){
+        *this = Engine(uk);
+    }        
+
+#if R123_USE_CXX11_TYPE_TRAITS
+    template <typename DUMMY=void>
+    explicit Engine(const key_type& k,
+                    typename std::enable_if<!std::is_same<ukey_type, key_type>::value, DUMMY>::type* = 0)
+        : key(k), c(){ v.back() = 0; }
+
+    template <typename DUMMY=void>
+    void seed(const key_type& k,
+              typename std::enable_if<!std::is_same<ukey_type, key_type>::value, DUMMY>::type* = 0){
+        *this = Engine(k);
+    }
+#endif
+
+    // Forward the e(counter) to the CBRNG we are templated
+    // on, using the current value of the key.
+    ctr_type operator()(const ctr_type& c) const{
+        return b(c, key);
+    }
+
+    key_type getkey() const{
+        return key;
+    }
+
+    // N.B.  setkey(k) is different from seed(k) because seed(k) zeros
+    // the counter (per the C++11 requirements for an Engine), whereas
+    // setkey does not.
+    void setkey(const key_type& k){
+        key = k;
+        fix_invariant();
+    }
+
+    // Maybe the caller want's to know the details of
+    // the internal state, e.g., so it can call a different
+    // bijection with the same counter.
+    std::pair<ctr_type, result_type> getcounter() const {
+        return std::make_pair(c, v.back());
+    }
+
+    // And the inverse.
+    void setcounter(const ctr_type& _c, result_type _elem){
+        static const size_t nelem = c.size();
+        if( _elem >= nelem )
+            throw std::range_error("Engine::setcounter called  with elem out of range");
+        c = _c;
+        v.back() = _elem;
+        fix_invariant();
+    }
+
+    void setcounter(const std::pair<ctr_type, result_type>& ce){
+        setcounter(ce.first, ce.second);
+    }
+};
+} // namespace r123
+
+#endif
diff --git a/ext/random123/include/Random123/conventional/gsl_cbrng.h b/ext/random123/include/Random123/conventional/gsl_cbrng.h
new file mode 100644
index 0000000000000000000000000000000000000000..44457d002b0d40ec5319236f67bcd3ec2a3ce629
--- /dev/null
+++ b/ext/random123/include/Random123/conventional/gsl_cbrng.h
@@ -0,0 +1,128 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __r123_compat_gslrng_dot_h__
+#define __r123_compat_gslrng_dot_h__
+
+#include <gsl/gsl_rng.h>
+#include <string.h>
+
+/**
+   The macro:  GSL_CBRNG(NAME, CBRNGNAME)
+   declares the necessary structs and  constants that define a
+   gsl_rng_NAME type based on the counter-based RNG CBRNGNAME.  For example:
+
+   Usage:
+
+   @code
+   #include <Random123/threefry.h>
+   #include <Random123/conventional/gsl_cbrng.h>  // this file
+   GSL_CBRNG(cbrng, threefry4x32); // creates gsl_rng_cbrng
+
+   int main(int argc, char **argv){
+       gsl_rng *r = gsl_rng_alloc(gsl_rng_cbrng);
+       ... use r as you would use any other gsl_rng ...   
+    }
+    @endcode
+
+    It requires that NAME be the name of a CBRNG that follows the
+    naming and stylistic conventions of the Random123 library.
+
+    Note that wrapping a \ref CBRNG "counter-based PRNG" with a traditional API in
+    this way obscures much of the power of the CBRNG API.
+    Nevertheless, it may be of value to applications that are already
+    coded to work with GSL random number generators, and that wish
+    to use the RNGs in the Random123 library.
+
+ */ 
+
+#define GSL_CBRNG(NAME, CBRNGNAME)                                      \
+const gsl_rng_type *gsl_rng_##NAME;                                     \
+                                                                        \
+typedef struct{                                                         \
+    CBRNGNAME##_ctr_t ctr;                                                   \
+    CBRNGNAME##_ctr_t r;                                                     \
+    CBRNGNAME##_key_t key;                                                   \
+    int elem;                                                           \
+} NAME##_state;                                                         \
+                                                                        \
+static unsigned long int NAME##_get(void *vstate){                      \
+    NAME##_state *st = (NAME##_state *)vstate;                          \
+    const int N=sizeof(st->ctr.v)/sizeof(st->ctr.v[0]);                 \
+    if( st->elem == 0 ){                                                \
+        ++st->ctr.v[0];                                                 \
+        if( N>1 && st->ctr.v[0] == 0 ) ++st->ctr.v[1];                  \
+        if( N>2 && st->ctr.v[1] == 0 ) ++st->ctr.v[2];                  \
+        if( N>3 && st->ctr.v[2] == 0 ) ++st->ctr.v[3];                  \
+        st->r = CBRNGNAME(st->ctr, st->key);                                 \
+        st->elem = N;                                                   \
+    }                                                                   \
+    return 0xffffffffUL & st->r.v[--st->elem];                          \
+}                                                                       \
+                                                                        \
+static double                                                           \
+NAME##_get_double (void * vstate)                                       \
+{                                                                       \
+    return NAME##_get (vstate)/4294967296.0;                            \
+}                                                                       \
+                                                                        \
+static void NAME##_set(void *vstate, unsigned long int s){              \
+    NAME##_state *st = (NAME##_state *)vstate;                          \
+    st->elem = 0;                                                       \
+    /* Assume that key and ctr have an array member, v,                 \
+       as if they are r123arrayNxW.  If not, this will fail             \
+       to compile.  In particular, this macro fails to compile          \
+       when the underlying CBRNG requires use of keyinit */             \
+    memset(&st->ctr.v[0], 0, sizeof(st->ctr.v));                        \
+    memset(&st->key.v[0], 0, sizeof(st->key.v));                        \
+    /* GSL 1.15 documentation says this about gsl_rng_set:              \
+         Note that the most generators only accept 32-bit seeds, with higher \
+         values being reduced modulo 2^32.  For generators with smaller \
+         ranges the maximum seed value will typically be lower.         \
+     so we won't jump through any hoops here to deal with               \
+     high bits if sizeof(unsigned long) > sizeof(uint32_t). */          \
+    st->key.v[0] = s;                                                   \
+}                                                                       \
+                                                                        \
+static const gsl_rng_type NAME##_type = {                               \
+    #NAME,                                                              \
+    0xffffffffUL,                                                       \
+    0,                                                                  \
+    sizeof(NAME##_state),                                               \
+    &NAME##_set,                                                        \
+    &NAME##_get,                                                        \
+    &NAME##_get_double                                                  \
+};                                                                      \
+                                                                        \
+const gsl_rng_type *gsl_rng_##NAME = &NAME##_type
+
+#endif
+
diff --git a/ext/random123/include/Random123/features/clangfeatures.h b/ext/random123/include/Random123/features/clangfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..1e3c8cfdb9ccaf5e5c0491af373922e9855f1620
--- /dev/null
+++ b/ext/random123/include/Random123/features/clangfeatures.h
@@ -0,0 +1,93 @@
+/*
+Copyright 2010-2016, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __clangfeatures_dot_hpp
+#define __clangfeatures_dot_hpp
+
+#ifndef R123_USE_X86INTRIN_H
+#if (defined(__x86_64__)||defined(__i386__))
+#define R123_USE_X86INTRIN_H 1
+#else
+#define R123_USE_X86INTRIN_H 0
+#endif
+#endif
+
+#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS
+#define R123_USE_CXX11_UNRESTRICTED_UNIONS __has_feature(cxx_unrestricted_unions)
+#endif
+
+#ifndef R123_USE_CXX11_STATIC_ASSERT
+#define R123_USE_CXX11_STATIC_ASSERT __has_feature(cxx_static_assert)
+#endif
+
+// With clang-3.6, -Wall warns about unused-local-typedefs.
+// The "obvious" thing to do is to ignore -Wunused-local-typedefs,
+// but that doesn't work because earlier versions of clang blow
+// up on an 'unknown warning group'.  So we briefly ignore -Wall...
+// It's tempting to just give up on static assertions in pre-c++11 code.
+#if !R123_USE_CXX11_STATIC_ASSERT && !defined(R123_STATIC_ASSERT)
+#define R123_STATIC_ASSERT(expr, msg) \
+_Pragma("clang diagnostic push")                      \
+_Pragma("clang diagnostic ignored \"-Wall\"")     \
+typedef char static_assertion[(!!(expr))*2-1] \
+_Pragma("clang diagnostic pop")
+#endif
+
+#ifndef R123_USE_CXX11_CONSTEXPR
+#define R123_USE_CXX11_CONSTEXPR __has_feature(cxx_constexpr)
+#endif
+
+#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS
+#define R123_USE_CXX11_EXPLICIT_CONVERSIONS __has_feature(cxx_explicit_conversions)
+#endif
+
+// With clang-3.0, the apparently simpler:
+//  #define R123_USE_CXX11_RANDOM __has_include(<random>)
+// dumps core.
+#ifndef R123_USE_CXX11_RANDOM
+#if __cplusplus>=201103L && __has_include(<random>)
+#define R123_USE_CXX11_RANDOM 1
+#else
+#define R123_USE_CXX11_RANDOM 0
+#endif
+#endif
+
+#ifndef R123_USE_CXX11_TYPE_TRAITS
+#if __cplusplus>=201103L && __has_include(<type_traits>)
+#define R123_USE_CXX11_TYPE_TRAITS 1
+#else
+#define R123_USE_CXX11_TYPE_TRAITS 0
+#endif
+#endif
+
+#include "gccfeatures.h"
+
+#endif
diff --git a/ext/random123/include/Random123/features/compilerfeatures.h b/ext/random123/include/Random123/features/compilerfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..2341a7a01ef53fd6add4a381588fa6a4e84029b5
--- /dev/null
+++ b/ext/random123/include/Random123/features/compilerfeatures.h
@@ -0,0 +1,343 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/**
+
+@page porting Preprocessor symbols for porting Random123 to different platforms.
+
+The Random123 library is portable across C, C++, CUDA, OpenCL environments,
+and multiple operating systems (Linux, Windows 7, Mac OS X, FreeBSD, Solaris).
+This level of portability requires the abstraction of some features
+and idioms that are either not standardized (e.g., asm statments), or for which 
+different vendors have their own standards (e.g., SSE intrinsics) or for
+which vendors simply refuse to conform to well-established standards (e.g., <inttypes.h>).
+
+Random123/features/compilerfeatures.h
+conditionally includes a compiler-or-OS-specific Random123/featires/XXXfeatures.h file which
+defines appropriate values for the preprocessor symbols which can be used with
+a specific compiler or OS.  Those symbols will then
+be used by other header files and source files in the Random123
+library (and may be used by applications) to control what actually
+gets presented to the compiler.
+
+Most of the symbols are boolean valued.  In general, they will
+\b always be defined with value either 1 or 0, so do
+\b NOT use \#ifdef.  Use \#if R123_USE_SOMETHING instead.
+
+Library users can override any value by defining the pp-symbol with a compiler option,
+e.g.,
+
+    cc -DR123_USE_MULHILO64_C99 
+
+will use a strictly c99 version of the full-width 64x64->128-bit multiplication
+function, even if it would be disabled by default.
+
+All boolean-valued pre-processor symbols in Random123/features/compilerfeatures.h start with the prefix R123_USE_
+@verbatim
+         AES_NI
+         AES_OPENSSL
+         SSE4_2
+         SSE4_1
+         SSE
+
+         STD_RANDOM
+
+         GNU_UINT128
+         ASM_GNU
+         ASM_MSASM
+
+         CPUID_MSVC
+
+         CXX11_RANDOM
+         CXX11_TYPE_TRAITS
+         CXX11_STATIC_ASSERT
+         CXX11_CONSTEXPR
+         CXX11_UNRESTRICTED_UNIONS
+         CXX11_EXPLICIT_CONVERSIONS
+         CXX11_LONG_LONG
+         CXX11_STD_ARRAY
+         CXX11 
+   
+         X86INTRIN_H
+         IA32INTRIN_H
+         XMMINTRIN_H
+         EMMINTRIN_H
+         SMMINTRIN_H
+         WMMINTRIN_H
+         INTRIN_H
+
+         MULHILO32_ASM
+         MULHILO64_ASM
+         MULHILO64_MSVC_INTRIN
+         MULHILO64_CUDA_INTRIN
+         MULHILO64_OPENCL_INTRIN
+         MULHILO64_C99
+
+         U01_DOUBLE
+	 
+@endverbatim
+Most have obvious meanings.  Some non-obvious ones:
+
+AES_NI and AES_OPENSSL are not mutually exclusive.  You can have one,
+both or neither.
+
+GNU_UINT128 says that it's safe to use __uint128_t, but it
+does not require its use.  In particular, it should be
+used in mulhilo<uint64_t> only if MULHILO64_ASM is unset.
+
+If the XXXINTRIN_H macros are true, then one should
+@code
+#include <xxxintrin.h>
+@endcode
+to gain accesss to compiler intrinsics.
+
+The CXX11_SOME_FEATURE macros allow the code to use specific
+features of the C++11 language and library.  The catchall
+In the absence of a specific CXX11_SOME_FEATURE, the feature
+is controlled by the catch-all R123_USE_CXX11 macro.
+
+U01_DOUBLE defaults on, and can be turned off (set to 0)
+if one does not want the utility functions that convert to double
+(i.e. u01_*_53()), e.g. on OpenCL without the cl_khr_fp64 extension.
+
+There are a number of invariants that are always true.  Application code may
+choose to rely on these:
+
+<ul>
+<li>ASM_GNU and ASM_MASM are mutually exclusive
+<li>The "higher" SSE values imply the lower ones.
+</ul>
+
+There are also non-boolean valued symbols:
+
+<ul>
+<li>R123_STATIC_INLINE -
+  According to both C99 and GNU99, the 'static inline' declaration allows
+  the compiler to not emit code if the function is not used.  
+  Note that the semantics of 'inline', 'static' and 'extern' in
+  gcc have changed over time and are subject to modification by
+  command line options, e.g., -std=gnu89, -fgnu-inline.
+  Nevertheless, it appears that the meaning of 'static inline' 
+  has not changed over time and (with a little luck) the use of 'static inline'
+  here will be portable between versions of gcc and to other C99
+  compilers.
+  See: http://gcc.gnu.org/onlinedocs/gcc/Inline.html
+       http://www.greenend.org.uk/rjk/2003/03/inline.html
+
+<li>R123_FORCE_INLINE(decl) -
+  which expands to 'decl', adorned with the compiler-specific
+  embellishments to strongly encourage that the declared function be
+  inlined.  If there is no such compiler-specific magic, it should
+  expand to decl, unadorned.
+   
+<li>R123_CUDA_DEVICE - which expands to __device__ (or something else with
+  sufficiently similar semantics) when CUDA is in use, and expands
+  to nothing in other cases.
+
+<li>R123_METAL_THREAD_ADDRESS_SPACE - which expands to 'thread' (or
+  something else with sufficiently similar semantics) when compiling a
+  Metal kernel, and expands to nothing in other cases.
+
+<li>R123_ASSERT(x) - which expands to assert(x), or maybe to nothing at
+  all if we're in an environment so feature-poor that you can't even
+  call assert (I'm looking at you, CUDA and OpenCL), or even include
+  assert.h safely (OpenCL).
+
+<li>R123_STATIC_ASSERT(expr,msg) - which expands to
+  static_assert(expr,msg), or to an expression that
+  will raise a compile-time exception if expr is not true.
+
+<li>R123_ULONG_LONG - which expands to a declaration of the longest available
+  unsigned integer.
+
+<li>R123_64BIT(x) - expands to something equivalent to
+  UINT64_C(x) from <stdint.h>, even in environments where <stdint.h>
+  is not available, e.g., MSVC and OpenCL.
+
+<li>R123_BUILTIN_EXPECT(expr,likely_value) - expands to something with
+  the semantics of gcc's __builtin_expect(expr,likely_value).  If
+  the environment has nothing like __builtin_expect, it should expand
+  to just expr.
+</ul>
+
+
+\cond HIDDEN_FROM_DOXYGEN
+*/
+
+/* 
+N.B.  When something is added to the list of features, it should be
+added to each of the *features.h files, AND to examples/ut_features.cpp.
+*/
+
+/* N.B.  most other compilers (icc, nvcc, open64, llvm) will also define __GNUC__, so order matters. */
+#if defined(__METAL_MACOS__)
+#include "metalfeatures.h"
+#elif defined(__OPENCL_VERSION__) && __OPENCL_VERSION__ > 0
+#include "openclfeatures.h"
+#elif defined(__CUDACC__)
+#include "nvccfeatures.h"
+#elif defined(__ICC)
+#include "iccfeatures.h"
+#elif defined(__xlC__)
+#include "xlcfeatures.h"
+#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+#include "sunprofeatures.h"
+#elif defined(__OPEN64__)
+#include "open64features.h"
+#elif defined(__clang__)
+#include "clangfeatures.h"
+#elif defined(__GNUC__)
+#include "gccfeatures.h"
+#elif defined(__PGI)
+#include "pgccfeatures.h"
+#elif defined(_MSC_FULL_VER)
+#include "msvcfeatures.h"
+#else
+#error "Can't identify compiler.  You'll need to add a new xxfeatures.hpp"
+{ /* maybe an unbalanced brace will terminate the compilation */
+#endif
+
+#ifndef R123_USE_CXX11
+#define R123_USE_CXX11 (__cplusplus >= 201103L)
+#endif
+
+#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS
+#define R123_USE_CXX11_UNRESTRICTED_UNIONS R123_USE_CXX11
+#endif
+
+#ifndef R123_USE_CXX11_STATIC_ASSERT
+#define R123_USE_CXX11_STATIC_ASSERT R123_USE_CXX11
+#endif
+
+#ifndef R123_USE_CXX11_CONSTEXPR
+#define R123_USE_CXX11_CONSTEXPR R123_USE_CXX11
+#endif
+
+#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS
+#define R123_USE_CXX11_EXPLICIT_CONVERSIONS R123_USE_CXX11
+#endif
+
+#ifndef R123_USE_CXX11_RANDOM
+#define R123_USE_CXX11_RANDOM R123_USE_CXX11
+#endif
+
+#ifndef R123_USE_CXX11_TYPE_TRAITS
+#define R123_USE_CXX11_TYPE_TRAITS R123_USE_CXX11
+#endif
+
+#ifndef R123_USE_CXX11_LONG_LONG
+#define R123_USE_CXX11_LONG_LONG R123_USE_CXX11
+#endif
+
+#ifndef R123_USE_CXX11_STD_ARRAY
+#define R123_USE_CXX11_STD_ARRAY R123_USE_CXX11
+#endif
+
+#ifndef R123_USE_MULHILO64_C99
+#define R123_USE_MULHILO64_C99 0
+#endif
+
+#ifndef R123_USE_MULHILO64_MULHI_INTRIN
+#define R123_USE_MULHILO64_MULHI_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO32_MULHI_INTRIN
+#define R123_USE_MULHILO32_MULHI_INTRIN 0
+#endif
+
+#ifndef R123_STATIC_ASSERT
+#if R123_USE_CXX11_STATIC_ASSERT
+#define R123_STATIC_ASSERT(expr, msg) static_assert(expr, msg)
+#else
+    /* if msg always_looked_like_this, we could paste it into the name.  Worth it? */
+#define R123_STATIC_ASSERT(expr, msg) typedef char static_assertion[(!!(expr))*2-1]
+#endif
+#endif
+
+#ifndef R123_CONSTEXPR
+#if R123_USE_CXX11_CONSTEXPR
+#define R123_CONSTEXPR constexpr
+#else
+#define R123_CONSTEXPR
+#endif
+#endif
+
+#ifndef R123_USE_64BIT
+#define R123_USE_64BIT 1
+#endif    
+
+#ifndef R123_USE_PHILOX_64BIT
+#define R123_USE_PHILOX_64BIT (R123_USE_64BIT && (R123_USE_MULHILO64_ASM || R123_USE_MULHILO64_MSVC_INTRIN || R123_USE_MULHILO64_CUDA_INTRIN || R123_USE_GNU_UINT128 || R123_USE_MULHILO64_C99 || R123_USE_MULHILO64_OPENCL_INTRIN || R123_USE_MULHILO64_MULHI_INTRIN))
+#endif
+
+#ifndef R123_ULONG_LONG
+#if defined(__cplusplus) && !R123_USE_CXX11_LONG_LONG
+/* C++98 doesn't have long long.  It doesn't have uint64_t either, but
+   we will have typedef'ed uint64_t to something in the xxxfeatures.h.
+   With luck, it won't elicit complaints from -pedantic.  Cross your
+   fingers... */
+#define R123_ULONG_LONG uint64_t
+#else
+#define R123_ULONG_LONG unsigned long long
+#endif
+#endif
+
+/* UINT64_C should have been #defined by XXXfeatures.h, either by
+   #include <stdint.h> or through compiler-dependent hacks */
+#ifndef R123_64BIT
+#define R123_64BIT(x) UINT64_C(x)
+#endif
+
+#ifndef R123_THROW
+#define R123_THROW(x)    throw (x)
+#endif
+
+#ifndef R123_METAL_THREAD_ADDRESS_SPACE
+#define R123_METAL_THREAD_ADDRESS_SPACE
+#endif
+
+#ifndef R123_METAL_CONSTANT_ADDRESS_SPACE
+#define R123_METAL_CONSTANT_ADDRESS_SPACE
+#endif
+    
+/*
+ * Windows.h (and perhaps other "well-meaning" code define min and
+ * max, so there's a high chance that our definition of min, max
+ * methods or use of std::numeric_limits min and max will cause
+ * complaints in any program that happened to include Windows.h or
+ * suchlike first.  We use the null macro below in our own header
+ * files definition or use of min, max to defensively preclude
+ * this problem.  It may not be enough; one might need to #define
+ * NOMINMAX before including Windows.h or compile with -DNOMINMAX.
+ */
+#define R123_NO_MACRO_SUBST
+
+/** \endcond */
diff --git a/ext/random123/include/Random123/features/gccfeatures.h b/ext/random123/include/Random123/features/gccfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..701f3c667801421d7638d6cf7474f5bc461f8411
--- /dev/null
+++ b/ext/random123/include/Random123/features/gccfeatures.h
@@ -0,0 +1,263 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __gccfeatures_dot_hpp
+#define __gccfeatures_dot_hpp
+
+#define R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
+
+#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__) && !defined(__arm__) && !defined(__aarch64__)
+#  error "This code has only been tested on x86, powerpc and a few arm platforms."
+#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task>
+{ /* maybe an unbalanced brace will terminate the compilation */
+ /* Feel free to try the Random123 library on other architectures by changing
+ the conditions that reach this error, but you should consider it a
+ porting exercise and expect to encounter bugs and deficiencies.
+ Please let the authors know of any successes (or failures). */
+#endif
+
+#ifdef __powerpc__
+#include <ppu_intrinsics.h>
+#endif
+
+#ifndef R123_STATIC_INLINE
+#define R123_STATIC_INLINE static __inline__
+#endif
+
+#ifndef R123_FORCE_INLINE
+#if R123_GNUC_VERSION >= 40000
+#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
+#else
+#define R123_FORCE_INLINE(decl) decl
+#endif
+#endif
+
+#ifndef R123_CUDA_DEVICE
+#define R123_CUDA_DEVICE
+#endif
+
+#ifndef R123_ASSERT
+#include <assert.h>
+#define R123_ASSERT(x) assert(x)
+#endif
+
+#ifndef R123_BUILTIN_EXPECT
+#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely)
+#endif
+
+/* According to the C++0x standard, we should be able to test the numeric
+   value of __cplusplus == 199701L for C++98, __cplusplus == 201103L for C++11
+   But gcc has had an open bug  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=1773
+   since early 2001, which was finally fixed in 4.7 (early 2012).  For
+   earlier versions, the only way  to detect whether --std=c++0x was requested
+   on the command line is to look at the __GCC_EXPERIMENTAL_CXX0X__ pp-symbol.
+*/
+#if defined(__GCC_EXPERIMENTAL_CXX0X__)
+#define GNU_CXX11 (__cplusplus>=201103L || (R123_GNUC_VERSION<40700 && 1/* defined(__GCC_EXPERIMENTAL_CXX0X__) */))
+#else
+#define GNU_CXX11 (__cplusplus>=201103L || (R123_GNUC_VERSION<40700 && 0/* defined(__GCC_EXPERIMENTAL_CXX0X__) */))
+#endif
+
+#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS
+#define R123_USE_CXX11_UNRESTRICTED_UNIONS ((R123_GNUC_VERSION >= 40600) && GNU_CXX11)
+#endif
+
+#ifndef R123_USE_CXX11_STATIC_ASSERT
+#define R123_USE_CXX11_STATIC_ASSERT ((R123_GNUC_VERSION >= 40300) && GNU_CXX11)
+#endif
+
+#ifndef R123_USE_CXX11_CONSTEXPR
+#define R123_USE_CXX11_CONSTEXPR ((R123_GNUC_VERSION >= 40600) && GNU_CXX11)
+#endif
+
+#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS
+#define R123_USE_CXX11_EXPLICIT_CONVERSIONS ((R123_GNUC_VERSION >= 40500) && GNU_CXX11)
+#endif
+
+#ifndef R123_USE_CXX11_RANDOM
+#define R123_USE_CXX11_RANDOM ((R123_GNUC_VERSION>=40500) && GNU_CXX11)
+#endif
+
+#ifndef R123_USE_CXX11_TYPE_TRAITS
+#define R123_USE_CXX11_TYPE_TRAITS ((R123_GNUC_VERSION>=40400) && GNU_CXX11)
+#endif
+
+#ifndef R123_USE_AES_NI
+#ifdef __AES__
+#define R123_USE_AES_NI 1
+#else
+#define R123_USE_AES_NI 0
+#endif
+#endif
+
+#ifndef R123_USE_SSE4_2
+#ifdef __SSE4_2__
+#define R123_USE_SSE4_2 1
+#else
+#define R123_USE_SSE4_2 0
+#endif
+#endif
+
+#ifndef R123_USE_SSE4_1
+#ifdef __SSE4_1__
+#define R123_USE_SSE4_1 1
+#else
+#define R123_USE_SSE4_1 0
+#endif
+#endif
+
+#ifndef R123_USE_SSE
+/* There's no point in trying to compile SSE code in Random123
+   unless SSE2 is available. */
+#ifdef __SSE2__
+#define R123_USE_SSE 1
+#else
+#define R123_USE_SSE 0
+#endif
+#endif
+
+#ifndef R123_USE_AES_OPENSSL
+/* There isn't really a good way to tell at compile time whether
+   openssl is available.  Without a pre-compilation configure-like
+   tool, it's less error-prone to guess that it isn't available.  Add
+   -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
+   play with openssl */
+#define R123_USE_AES_OPENSSL 0
+#endif
+
+#ifndef R123_USE_GNU_UINT128
+#if defined(__x86_64__) || defined(__aarch64__)
+#define R123_USE_GNU_UINT128 1
+#else
+#define R123_USE_GNU_UINT128 0
+#endif
+#endif
+
+#ifndef R123_USE_ASM_GNU
+#if (defined(__x86_64__)||defined(__i386__))
+#define R123_USE_ASM_GNU 1
+#else
+#define R123_USE_ASM_GNU 1
+#endif    
+#endif
+
+#ifndef R123_USE_CPUID_MSVC
+#define R123_USE_CPUID_MSVC 0
+#endif
+
+#ifndef R123_USE_X86INTRIN_H
+#if (defined(__x86_64__)||defined(__i386__))
+#define R123_USE_X86INTRIN_H (1/* (defined(__x86_64__)||defined(__i386__)) */  && R123_GNUC_VERSION >= 40402)
+#else
+#define R123_USE_X86INTRIN_H (0/* (defined(__x86_64__)||defined(__i386__)) */  && R123_GNUC_VERSION >= 40402)
+#endif
+#endif
+
+#ifndef R123_USE_IA32INTRIN_H
+#define R123_USE_IA32INTRIN_H 0
+#endif
+
+#ifndef R123_USE_XMMINTRIN_H
+#define R123_USE_XMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_EMMINTRIN_H
+/* gcc -m64 on Solaris 10 defines __SSE2__ but doesn't have 
+   emmintrin.h in the include search path.  This is
+   so broken that I refuse to try to work around it.  If this
+   affects you, figure out where your emmintrin.h lives and
+   add an appropriate -I to your CPPFLAGS.  Or add -DR123_USE_SSE=0. */
+#define R123_USE_EMMINTRIN_H (R123_USE_SSE && (R123_GNUC_VERSION < 40402))
+#endif
+
+#ifndef R123_USE_SMMINTRIN_H
+#define R123_USE_SMMINTRIN_H ((R123_USE_SSE4_1 || R123_USE_SSE4_2) && (R123_GNUC_VERSION < 40402))
+#endif
+
+#ifndef R123_USE_WMMINTRIN_H
+#define R123_USE_WMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_INTRIN_H
+#define R123_USE_INTRIN_H 0
+#endif
+
+#ifndef R123_USE_MULHILO32_ASM
+#define R123_USE_MULHILO32_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_ASM
+#define R123_USE_MULHILO64_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_MSVC_INTRIN
+#define R123_USE_MULHILO64_MSVC_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
+#define R123_USE_MULHILO64_CUDA_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
+#define R123_USE_MULHILO64_OPENCL_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_MULHI_INTRIN
+#if (defined(__powerpc64__))
+#define R123_USE_MULHILO64_MULHI_INTRIN 1
+#else
+#define R123_USE_MULHILO64_MULHI_INTRIN 0
+#endif
+#endif
+
+#ifndef R123_MULHILO64_MULHI_INTRIN
+#define R123_MULHILO64_MULHI_INTRIN __mulhdu
+#endif
+
+#ifndef R123_USE_MULHILO32_MULHI_INTRIN
+#define R123_USE_MULHILO32_MULHI_INTRIN 0
+#endif
+
+#ifndef R123_MULHILO32_MULHI_INTRIN
+#define R123_MULHILO32_MULHI_INTRIN __mulhwu
+#endif
+
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+#include <stdint.h>
+#ifndef UINT64_C
+#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
+#endif
+
+/* If you add something, it must go in all the other XXfeatures.hpp
+   and in ../ut_features.cpp */
+#endif
diff --git a/ext/random123/include/Random123/features/iccfeatures.h b/ext/random123/include/Random123/features/iccfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..7e72dec1d31027eac66e00fe92b4cd926b235d23
--- /dev/null
+++ b/ext/random123/include/Random123/features/iccfeatures.h
@@ -0,0 +1,212 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __icpcfeatures_dot_hpp
+#define __icpcfeatures_dot_hpp
+
+// icc relies on gcc libraries and other toolchain components.
+#define R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
+
+#if !defined(__x86_64__) && !defined(__i386__)
+#  error "This code has only been tested on x86 platforms."
+{ // maybe an unbalanced brace will terminate the compilation
+// You are invited to try Easy123 on other architectures, by changing
+// the conditions that reach this error, but you should consider it a
+// porting exercise and expect to encounter bugs and deficiencies.
+// Please let the authors know of any successes (or failures).
+#endif
+
+#ifndef R123_STATIC_INLINE
+#define R123_STATIC_INLINE static inline
+#endif
+
+#ifndef R123_FORCE_INLINE
+#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
+#endif
+
+#ifndef R123_CUDA_DEVICE
+#define R123_CUDA_DEVICE
+#endif
+
+#ifndef R123_ASSERT
+#include <assert.h>
+#define R123_ASSERT(x) assert(x)
+#endif
+
+#ifndef R123_BUILTIN_EXPECT
+#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely)
+#endif
+
+// The basic idiom is:
+// #ifndef R123_SOMETHING
+// #if some condition
+// #define R123_SOMETHING 1
+// #else
+// #define R123_SOMETHING 0
+// #endif
+// #endif
+// This idiom allows an external user to override any decision
+// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0
+
+// An alternative idiom is:
+// #ifndef R123_SOMETHING
+// #define R123_SOMETHING (some boolean expression)
+// #endif
+// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE
+// pp-symbols.
+
+#ifndef R123_USE_SSE4_2
+#ifdef __SSE4_2__
+#define R123_USE_SSE4_2 1
+#else
+#define R123_USE_SSE4_2 0
+#endif
+#endif
+
+#ifndef R123_USE_SSE4_1
+#ifdef __SSE4_1__
+#define R123_USE_SSE4_1 1
+#else
+#define R123_USE_SSE4_1 0
+#endif
+#endif
+
+#ifndef R123_USE_SSE
+#ifdef __SSE2__
+#define R123_USE_SSE 1
+#else
+#define R123_USE_SSE 0
+#endif
+#endif
+
+#ifndef R123_USE_AES_NI
+// Unlike gcc, icc (version 12) does not pre-define an __AES__
+// pp-symbol when -maes or -xHost is on the command line.  This feels
+// like a defect in icc (it defines __SSE4_2__ in analogous
+// circumstances), but until Intel fixes it, we're better off erring
+// on the side of caution and not generating instructions that are
+// going to raise SIGILL when executed.  To get the AES-NI
+// instructions with icc, the caller must puts something like
+// -DR123_USE_AES_NI=1 or -D__AES__ on the command line.  FWIW, the
+// AES-NI Whitepaper by Gueron says that icc has supported AES-NI from
+// 11.1 onwards.
+//
+#if defined(__AES__)
+#define R123_USE_AES_NI ((__ICC>=1101) && 1/*defined(__AES__)*/)
+#else
+#define R123_USE_AES_NI ((__ICC>=1101) && 0/*defined(__AES__)*/)
+#endif
+#endif
+
+#ifndef R123_USE_AES_OPENSSL
+/* There isn't really a good way to tell at compile time whether
+   openssl is available.  Without a pre-compilation configure-like
+   tool, it's less error-prone to guess that it isn't available.  Add
+   -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
+   play with openssl */
+#define R123_USE_AES_OPENSSL 0
+#endif
+
+#ifndef R123_USE_GNU_UINT128
+#define R123_USE_GNU_UINT128 0
+#endif
+
+#ifndef R123_USE_ASM_GNU
+#define R123_USE_ASM_GNU 1
+#endif
+
+#ifndef R123_USE_CPUID_MSVC
+#define R123_USE_CPUID_MSVC 0
+#endif
+
+#ifndef R123_USE_X86INTRIN_H
+#define R123_USE_X86INTRIN_H 0
+#endif
+
+#ifndef R123_USE_IA32INTRIN_H
+#define R123_USE_IA32INTRIN_H 1
+#endif
+
+#ifndef R123_USE_XMMINTRIN_H
+#define R123_USE_XMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_EMMINTRIN_H
+#define R123_USE_EMMINTRIN_H 1
+#endif
+
+#ifndef R123_USE_SMMINTRIN_H
+#define R123_USE_SMMINTRIN_H 1
+#endif
+
+#ifndef R123_USE_WMMINTRIN_H
+#define R123_USE_WMMINTRIN_H 1
+#endif
+
+#ifndef R123_USE_INTRIN_H
+#define R123_USE_INTRIN_H 0
+#endif
+
+#ifndef R123_USE_MULHILO16_ASM
+#define R123_USE_MULHILO16_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO32_ASM
+#define R123_USE_MULHILO32_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_ASM
+#define R123_USE_MULHILO64_ASM 1
+#endif
+
+#ifndef R123_USE_MULHILO64_MSVC_INTRIN
+#define R123_USE_MULHILO64_MSVC_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
+#define R123_USE_MULHILO64_CUDA_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
+#define R123_USE_MULHILO64_OPENCL_INTRIN 0
+#endif
+
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+#include <stdint.h>
+#ifndef UINT64_C
+#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
+#endif
+
+// If you add something, it must go in all the other XXfeatures.hpp
+// and in ../ut_features.cpp
+#endif
diff --git a/ext/random123/include/Random123/features/metalfeatures.h b/ext/random123/include/Random123/features/metalfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..bafe51a6840cb3ae5ca7fc1145cf5442db0657e5
--- /dev/null
+++ b/ext/random123/include/Random123/features/metalfeatures.h
@@ -0,0 +1,111 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Written by Tom Schoonjans <Tom.Schoonjans@me.com>
+ */
+
+#ifndef __metalfeatures_dot_hpp
+#define __metalfeatures_dot_hpp
+
+#ifndef R123_STATIC_INLINE
+#define R123_STATIC_INLINE inline
+#endif
+
+#ifndef R123_FORCE_INLINE
+#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
+#endif
+
+#ifndef R123_CUDA_DEVICE
+#define R123_CUDA_DEVICE
+#endif
+
+#ifndef R123_METAL_THREAD_ADDRESS_SPACE
+#define R123_METAL_THREAD_ADDRESS_SPACE thread
+#endif
+
+#ifndef R123_METAL_CONSTANT_ADDRESS_SPACE
+#define R123_METAL_CONSTANT_ADDRESS_SPACE constant
+#endif
+
+#ifndef R123_ASSERT
+#define R123_ASSERT(x)
+#endif
+
+#ifndef R123_BUILTIN_EXPECT
+#define R123_BUILTIN_EXPECT(expr,likely) expr
+#endif
+
+#ifndef R123_USE_GNU_UINT128
+#define R123_USE_GNU_UINT128 0
+#endif
+
+#ifndef R123_USE_MULHILO64_ASM
+#define R123_USE_MULHILO64_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_MSVC_INTRIN
+#define R123_USE_MULHILO64_MSVC_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
+#define R123_USE_MULHILO64_CUDA_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
+#define R123_USE_MULHILO64_OPENCL_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO32_MULHI_INTRIN
+#define R123_USE_MULHILO32_MULHI_INTRIN 1
+#endif
+
+#if R123_USE_MULHILO32_MULHI_INTRIN
+#include <metal_integer>
+#define R123_MULHILO32_MULHI_INTRIN metal::mulhi
+#endif
+
+#ifndef R123_USE_AES_NI
+#define R123_USE_AES_NI 0
+#endif
+
+#ifndef R123_USE_64BIT
+#define R123_USE_64BIT 0 /* Metal currently (Feb 2019, Specification-2) does not support 64-bit variable types */
+#endif
+
+#ifndef R123_ULONG_LONG
+/* the longest integer type in Metal (Feb 2019, Specification-2) is a
+ * 32-bit unsigned int.  Let's hope for the best... */
+#define R123_ULONG_LONG unsigned int 
+#endif
+
+#endif
diff --git a/ext/random123/include/Random123/features/msvcfeatures.h b/ext/random123/include/Random123/features/msvcfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..9eb9520912daf66869a6cf9fd027c37f06a8a3d4
--- /dev/null
+++ b/ext/random123/include/Random123/features/msvcfeatures.h
@@ -0,0 +1,200 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __msvcfeatures_dot_hpp
+#define __msvcfeatures_dot_hpp
+
+//#if _MSVC_FULL_VER <= 15
+//#error "We've only tested MSVC_FULL_VER==15."
+//#endif
+
+#if !defined(_M_IX86) && !defined(_M_X64)
+#  error "This code has only been tested on x86 platforms."
+{ // maybe an unbalanced brace will terminate the compilation
+// You are invited to try Random123 on other architectures, by changing
+// the conditions that reach this error, but you should consider it a
+// porting exercise and expect to encounter bugs and deficiencies.
+// Please let the authors know of any successes (or failures).
+#endif
+
+#ifndef R123_STATIC_INLINE
+#define R123_STATIC_INLINE static __inline
+#endif
+
+#ifndef R123_FORCE_INLINE
+#define R123_FORCE_INLINE(decl) _forceinline decl
+#endif
+
+#ifndef R123_CUDA_DEVICE
+#define R123_CUDA_DEVICE
+#endif
+
+#ifndef R123_ASSERT
+#include <assert.h>
+#define R123_ASSERT(x) assert(x)
+#endif
+
+#ifndef R123_BUILTIN_EXPECT
+#define R123_BUILTIN_EXPECT(expr,likely) expr
+#endif
+
+// The basic idiom is:
+// #ifndef R123_SOMETHING
+// #if some condition
+// #define R123_SOMETHING 1
+// #else
+// #define R123_SOMETHING 0
+// #endif
+// #endif
+// This idiom allows an external user to override any decision
+// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0
+
+// An alternative idiom is:
+// #ifndef R123_SOMETHING
+// #define R123_SOMETHING (some boolean expression)
+// #endif
+// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE
+// pp-symbols.
+
+#ifndef R123_USE_AES_NI
+#if defined(_M_X64)
+#define R123_USE_AES_NI 1
+#else
+#define R123_USE_AES_NI 0
+#endif
+#endif
+
+#ifndef R123_USE_SSE4_2
+#if defined(_M_X64)
+#define R123_USE_SSE4_2 1
+#else
+#define R123_USE_SSE4_2 0
+#endif
+#endif
+
+#ifndef R123_USE_SSE4_1
+#if defined(_M_X64)
+#define R123_USE_SSE4_1 1
+#else
+#define R123_USE_SSE4_1 0
+#endif
+#endif
+
+#ifndef R123_USE_SSE
+#define R123_USE_SSE 1
+#endif
+
+#ifndef R123_USE_AES_OPENSSL
+#define R123_USE_AES_OPENSSL 0
+#endif
+
+#ifndef R123_USE_GNU_UINT128
+#define R123_USE_GNU_UINT128 0
+#endif
+
+#ifndef R123_USE_ASM_GNU
+#define R123_USE_ASM_GNU 0
+#endif
+
+#ifndef R123_USE_CPUID_MSVC
+#define R123_USE_CPUID_MSVC 1
+#endif
+
+#ifndef R123_USE_X86INTRIN_H
+#define R123_USE_X86INTRIN_H 0
+#endif
+
+#ifndef R123_USE_IA32INTRIN_H
+#define R123_USE_IA32INTRIN_H 0
+#endif
+
+#ifndef R123_USE_XMMINTRIN_H
+#define R123_USE_XMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_EMMINTRIN_H
+#define R123_USE_EMMINTRIN_H 1
+#endif
+
+#ifndef R123_USE_SMMINTRIN_H
+#define R123_USE_SMMINTRIN_H 1
+#endif
+
+#ifndef R123_USE_WMMINTRIN_H
+#define R123_USE_WMMINTRIN_H 1
+#endif
+
+#ifndef R123_USE_INTRIN_H
+#define R123_USE_INTRIN_H 1
+#endif
+
+#ifndef R123_USE_MULHILO16_ASM
+#define R123_USE_MULHILO16_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO32_ASM
+#define R123_USE_MULHILO32_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_ASM
+#define R123_USE_MULHILO64_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_MSVC_INTRIN
+#if defined(_M_X64)
+#define R123_USE_MULHILO64_MSVC_INTRIN 1
+#else
+#define R123_USE_MULHILO64_MSVC_INTRIN 0
+#endif
+#endif
+
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
+#define R123_USE_MULHILO64_CUDA_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
+#define R123_USE_MULHILO64_OPENCL_INTRIN 0
+#endif
+
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+#include <stdint.h>
+#ifndef UINT64_C
+#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
+#endif
+
+#pragma warning(disable:4244)
+#pragma warning(disable:4996)
+
+// If you add something, it must go in all the other XXfeatures.hpp
+// and in ../ut_features.cpp
+#endif
diff --git a/ext/random123/include/Random123/features/nvccfeatures.h b/ext/random123/include/Random123/features/nvccfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..d1ff8bf521a05d45232e922c9a875439bc84f837
--- /dev/null
+++ b/ext/random123/include/Random123/features/nvccfeatures.h
@@ -0,0 +1,125 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __r123_nvcc_features_dot_h__
+#define __r123_nvcc_features_dot_h__
+
+#if !defined(CUDART_VERSION)
+#error "why are we in nvccfeatures.h if CUDART_VERSION is not defined"
+#endif
+
+#if CUDART_VERSION < 4010
+#error "CUDA versions earlier than 4.1 produce incorrect results for some templated functions in namespaces.  Random123 isunsupported.  See comments in nvccfeatures.h"
+// This test was added in Random123-1.08 (August, 2013) because we
+// discovered that Ftype(maxTvalue<T>()) with Ftype=double and
+// T=uint64_t in examples/uniform.hpp produces -1 for CUDA4.0 and
+// earlier.  We can't be sure this bug doesn't also affect invocations
+// of other templated functions, e.g., essentially all of Random123.
+// Thus, we no longer trust CUDA versions earlier than 4.1 even though
+// we had previously tested and timed Random123 with CUDA 3.x and 4.0.
+// If you feel lucky or desperate, you can change #error to #warning, but
+// please take extra care to be sure that you are getting correct
+// results.
+#endif
+
+// nvcc falls through to gcc or msvc.  So first define
+// a couple of things and then include either gccfeatures.h
+// or msvcfeatures.h
+
+//#ifdef  __CUDA_ARCH__ allows Philox32 and Philox64 to be compiled
+//for both device and host functions in CUDA by setting compiler flags
+//for the device function
+#ifdef  __CUDA_ARCH__
+#ifndef R123_CUDA_DEVICE
+#define R123_CUDA_DEVICE __device__
+#endif
+
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
+#define R123_USE_MULHILO64_CUDA_INTRIN 1
+#endif
+
+#ifndef R123_THROW
+// No exceptions in CUDA, at least upto 4.0
+#define R123_THROW(x)    R123_ASSERT(0)
+#endif
+
+#ifndef R123_ASSERT
+#define R123_ASSERT(x) if((x)) ; else asm("trap;")
+#endif
+
+#else // ! __CUDA_ARCH__
+// If we're using nvcc not compiling for the CUDA architecture,
+// then we must be compiling for the host.  In that case,
+// tell the philox code to use the mulhilo64 asm because
+// nvcc doesn't grok uint128_t.
+#ifndef R123_USE_MULHILO64_ASM
+#define R123_USE_MULHILO64_ASM 1
+#endif
+
+#endif // __CUDA_ARCH__
+
+#ifndef R123_BUILTIN_EXPECT
+#define R123_BUILTIN_EXPECT(expr,likely) expr
+#endif
+
+#ifndef R123_USE_AES_NI
+#define R123_USE_AES_NI 0
+#endif
+
+#ifndef R123_USE_SSE4_2
+#define R123_USE_SSE4_2 0
+#endif
+
+#ifndef R123_USE_SSE4_1
+#define R123_USE_SSE4_1 0
+#endif
+
+#ifndef R123_USE_SSE
+#define R123_USE_SSE 0
+#endif
+
+#ifndef R123_USE_GNU_UINT128
+#define R123_USE_GNU_UINT128 0
+#endif
+
+#ifndef R123_ULONG_LONG
+// uint64_t, which is what we'd get without this, is
+// not the same as unsigned long long
+#define R123_ULONG_LONG unsigned long long
+#endif
+
+#if defined(__GNUC__)
+#include "gccfeatures.h"
+#elif defined(_MSC_FULL_VER)
+#include "msvcfeatures.h"
+#endif
+
+#endif
diff --git a/ext/random123/include/Random123/features/open64features.h b/ext/random123/include/Random123/features/open64features.h
new file mode 100644
index 0000000000000000000000000000000000000000..8da9f5f51efab021c644b632b4499f12fa0220d9
--- /dev/null
+++ b/ext/random123/include/Random123/features/open64features.h
@@ -0,0 +1,50 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __open64features_dot_hpp
+#define __open64features_dot_hpp
+
+/* The gcc features are mostly right.  We just override a few and then include gccfeatures.h */
+
+/* Open64 4.2.3 and 4.2.4 accept the __uint128_t code without complaint
+   but produce incorrect code for 64-bit philox.  The MULHILO64_ASM
+   seems to work fine */
+#ifndef R123_USE_GNU_UINT128
+#define R123_USE_GNU_UINT128 0
+#endif
+
+#ifndef R123_USE_MULHILO64_ASM
+#define R123_USE_MULHILO64_ASM 1
+#endif
+
+#include "gccfeatures.h"
+
+#endif
diff --git a/ext/random123/include/Random123/features/openclfeatures.h b/ext/random123/include/Random123/features/openclfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..af03d3092318c6c27f1a65ce8104c1609b1e66e1
--- /dev/null
+++ b/ext/random123/include/Random123/features/openclfeatures.h
@@ -0,0 +1,89 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __openclfeatures_dot_hpp
+#define __openclfeatures_dot_hpp
+
+#ifndef R123_STATIC_INLINE
+#define R123_STATIC_INLINE inline
+#endif
+
+#ifndef R123_FORCE_INLINE
+#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
+#endif
+
+#ifndef R123_CUDA_DEVICE
+#define R123_CUDA_DEVICE
+#endif
+
+#ifndef R123_ASSERT
+#define R123_ASSERT(x)
+#endif
+
+#ifndef R123_BUILTIN_EXPECT
+#define R123_BUILTIN_EXPECT(expr,likely) expr
+#endif
+
+#ifndef R123_USE_GNU_UINT128
+#define R123_USE_GNU_UINT128 0
+#endif
+
+#ifndef R123_USE_MULHILO64_ASM
+#define R123_USE_MULHILO64_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_MSVC_INTRIN
+#define R123_USE_MULHILO64_MSVC_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
+#define R123_USE_MULHILO64_CUDA_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
+#define R123_USE_MULHILO64_OPENCL_INTRIN 1
+#endif
+
+#ifndef R123_USE_AES_NI
+#define R123_USE_AES_NI 0
+#endif
+
+// XXX ATI APP SDK 2.4 clBuildProgram SEGVs if one uses uint64_t instead of
+// ulong to mul_hi.  And gets lots of complaints from stdint.h
+// on some machines.
+// But these typedefs mean we cannot include stdint.h with
+// these headers?  Do we need R123_64T, R123_32T, R123_8T?
+typedef ulong uint64_t;
+typedef uint  uint32_t;
+typedef uchar uint8_t;
+#define UINT64_C(x) ((ulong)(x##UL))
+
+#endif
diff --git a/ext/random123/include/Random123/features/pgccfeatures.h b/ext/random123/include/Random123/features/pgccfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..18ace1353b4e0e6201c823e17b5325c2a9b05afe
--- /dev/null
+++ b/ext/random123/include/Random123/features/pgccfeatures.h
@@ -0,0 +1,194 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Copyright (c) 2013, Los Alamos National Security, LLC
+All rights reserved.
+
+Copyright 2013. Los Alamos National Security, LLC. This software was produced
+under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
+Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
+the U.S. Department of Energy. The U.S. Government has rights to use,
+reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+to produce derivative works, such modified software should be clearly marked,
+so as not to confuse it with the version available from LANL.
+*/
+#ifndef __pgccfeatures_dot_hpp
+#define __pgccfeatures_dot_hpp
+
+#if !defined(__x86_64__) && !defined(__i386__)
+#  error "This code has only been tested on x86 platforms."
+#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task>
+{ /* maybe an unbalanced brace will terminate the compilation */
+ /* Feel free to try the Random123 library on other architectures by changing
+ the conditions that reach this error, but you should consider it a
+ porting exercise and expect to encounter bugs and deficiencies.
+ Please let the authors know of any successes (or failures). */
+#endif
+
+#ifndef R123_STATIC_INLINE
+#define R123_STATIC_INLINE static inline
+#endif
+
+/* Found this example in PGI's emmintrin.h. */
+#ifndef R123_FORCE_INLINE
+#define R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__))
+#endif
+
+#ifndef R123_CUDA_DEVICE
+#define R123_CUDA_DEVICE
+#endif
+
+#ifndef R123_ASSERT
+#include <assert.h>
+#define R123_ASSERT(x) assert(x)
+#endif
+
+#ifndef R123_BUILTIN_EXPECT
+#define R123_BUILTIN_EXPECT(expr,likely) (expr)
+#endif
+
+/* PGI through 13.2 doesn't appear to support AES-NI. */
+#ifndef R123_USE_AES_NI
+#define R123_USE_AES_NI 0
+#endif
+
+/* PGI through 13.2 appears to support MMX, SSE, SSE3, SSE3, SSSE3, SSE4a, and
+   ABM, but not SSE4.1 or SSE4.2. */
+#ifndef R123_USE_SSE4_2
+#define R123_USE_SSE4_2 0
+#endif
+
+#ifndef R123_USE_SSE4_1
+#define R123_USE_SSE4_1 0
+#endif
+
+#ifndef R123_USE_SSE
+/* There's no point in trying to compile SSE code in Random123
+   unless SSE2 is available. */
+#ifdef __SSE2__
+#define R123_USE_SSE 1
+#else
+#define R123_USE_SSE 0
+#endif
+#endif
+
+#ifndef R123_USE_AES_OPENSSL
+/* There isn't really a good way to tell at compile time whether
+   openssl is available.  Without a pre-compilation configure-like
+   tool, it's less error-prone to guess that it isn't available.  Add
+   -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
+   play with openssl */
+#define R123_USE_AES_OPENSSL 0
+#endif
+
+#ifndef R123_USE_GNU_UINT128
+#define R123_USE_GNU_UINT128 0
+#endif
+
+#ifndef R123_USE_ASM_GNU
+#define R123_USE_ASM_GNU 1
+#endif
+
+#ifndef R123_USE_CPUID_MSVC
+#define R123_USE_CPUID_MSVC 0
+#endif
+
+#ifndef R123_USE_X86INTRIN_H
+#define R123_USE_X86INTRIN_H 0
+#endif
+
+#ifndef R123_USE_IA32INTRIN_H
+#define R123_USE_IA32INTRIN_H 0
+#endif
+
+/* emmintrin.h from PGI #includes xmmintrin.h but then complains at link time
+   about undefined references to _mm_castsi128_ps(__m128i).  Why? */
+#ifndef R123_USE_XMMINTRIN_H
+#define R123_USE_XMMINTRIN_H 1
+#endif
+
+#ifndef R123_USE_EMMINTRIN_H
+#define R123_USE_EMMINTRIN_H 1
+#endif
+
+#ifndef R123_USE_SMMINTRIN_H
+#define R123_USE_SMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_WMMINTRIN_H
+#define R123_USE_WMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_INTRIN_H
+#ifdef __ABM__
+#define R123_USE_INTRIN_H 1
+#else
+#define R123_USE_INTRIN_H 0
+#endif
+#endif
+
+#ifndef R123_USE_MULHILO32_ASM
+#define R123_USE_MULHILO32_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_MULHI_INTRIN
+#define R123_USE_MULHILO64_MULHI_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_ASM
+#define R123_USE_MULHILO64_ASM 1
+#endif
+
+#ifndef R123_USE_MULHILO64_MSVC_INTRIN
+#define R123_USE_MULHILO64_MSVC_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
+#define R123_USE_MULHILO64_CUDA_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
+#define R123_USE_MULHILO64_OPENCL_INTRIN 0
+#endif
+
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+#include <stdint.h>
+#ifndef UINT64_C
+#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
+#endif
+
+/* If you add something, it must go in all the other XXfeatures.hpp
+   and in ../ut_features.cpp */
+#endif
diff --git a/ext/random123/include/Random123/features/sse.h b/ext/random123/include/Random123/features/sse.h
new file mode 100644
index 0000000000000000000000000000000000000000..3a49ebd8652e0d8deb50ca0daac50f56818ca6e5
--- /dev/null
+++ b/ext/random123/include/Random123/features/sse.h
@@ -0,0 +1,280 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef _Random123_sse_dot_h__
+#define _Random123_sse_dot_h__
+
+#if R123_USE_SSE
+
+#if R123_USE_X86INTRIN_H
+#include <x86intrin.h>
+#endif
+#if R123_USE_IA32INTRIN_H
+#include <ia32intrin.h>
+#endif
+#if R123_USE_XMMINTRIN_H
+#include <xmmintrin.h>
+#endif
+#if R123_USE_EMMINTRIN_H
+#include <emmintrin.h>
+#endif
+#if R123_USE_SMMINTRIN_H
+#include <smmintrin.h>
+#endif
+#if R123_USE_WMMINTRIN_H
+#include <wmmintrin.h>
+#endif
+#if R123_USE_INTRIN_H
+#include <intrin.h>
+#endif
+#ifdef __cplusplus
+#include <iostream>
+#include <limits>
+#include <stdexcept>
+#endif
+
+#if R123_USE_ASM_GNU
+
+/* bit25 of CX tells us whether AES is enabled. */
+R123_STATIC_INLINE int haveAESNI(){
+    unsigned int eax, ebx, ecx, edx;
+    __asm__ __volatile__ ("cpuid": "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) :
+                      "a" (1));
+    return (ecx>>25) & 1;
+}
+#elif R123_USE_CPUID_MSVC
+R123_STATIC_INLINE int haveAESNI(){
+    int CPUInfo[4];
+    __cpuid(CPUInfo, 1);
+    return (CPUInfo[2]>>25)&1;
+}
+#else /* R123_USE_CPUID_??? */
+#warning "No R123_USE_CPUID_XXX method chosen.  haveAESNI will always return false"
+R123_STATIC_INLINE int haveAESNI(){
+    return 0;
+}
+#endif /* R123_USE_ASM_GNU || R123_USE_CPUID_MSVC */
+
+// There is a lot of annoying and inexplicable variation in the
+// SSE intrinsics available in different compilation environments.
+// The details seem to depend on the compiler, the version and
+// the target architecture.  Rather than insisting on
+// R123_USE_feature tests for each of these in each of the
+// compilerfeatures.h files we just keep the complexity localized
+// to here...
+#if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64))
+/* Is there an intrinsic to assemble an __m128i from two 64-bit words? 
+   If not, use the 4x32-bit intrisic instead.  N.B.  It looks like Intel
+   added _mm_set_epi64x to icc version 12.1 in Jan 2012.
+*/
+R123_STATIC_INLINE __m128i _mm_set_epi64x(uint64_t v1, uint64_t v0){
+    union{
+        uint64_t u64;
+        uint32_t u32[2];
+    } u1, u0;
+    u1.u64 = v1;
+    u0.u64 = v0;
+    return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]);
+}
+#endif
+/* _mm_extract_lo64 abstracts the task of extracting the low 64-bit
+   word from an __m128i.  The _mm_cvtsi128_si64 intrinsic does the job
+   on 64-bit platforms.  Unfortunately, both MSVC and Open64 fail
+   assertions in ut_M128.cpp and ut_carray.cpp when we use the
+   _mm_cvtsi128_si64 intrinsic.  (See
+   https://bugs.open64.net/show_bug.cgi?id=873 for the Open64 bug).
+   On 32-bit platforms, there's no MOVQ, so there's no intrinsic.
+   Finally, even if the intrinsic exists, it may be spelled with or
+   without the 'x'.
+*/
+#if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__)
+R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
+    union{
+        uint64_t u64[2];
+        __m128i m;
+    }u;
+    _mm_store_si128(&u.m, si);
+    return u.u64[0];
+}
+#elif defined(__llvm__) || defined(__ICC)
+R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
+    return (uint64_t)_mm_cvtsi128_si64(si);
+}
+#else /* GNUC, others */
+/* FWIW, gcc's emmintrin.h has had the 'x' spelling
+   since at least gcc-3.4.4.  The no-'x' spelling showed up
+   around 4.2. */
+R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
+    return (uint64_t)_mm_cvtsi128_si64x(si);
+}
+#endif
+#if defined(__GNUC__) && __GNUC__ < 4
+/* the cast builtins showed up in gcc4. */
+R123_STATIC_INLINE __m128 _mm_castsi128_ps(__m128i si){
+    return (__m128)si;
+}
+#endif
+
+#ifdef __cplusplus
+
+struct r123m128i{
+    __m128i m;
+#if R123_USE_CXX11_UNRESTRICTED_UNIONS
+    // C++98 forbids a union member from having *any* constructors.
+    // C++11 relaxes this, and allows union members to have constructors
+    // as long as there is a "trivial" default construtor.  So in C++11
+    // we can provide a r123m128i constructor with an __m128i argument, and still
+    // have the default (and hence trivial) default constructor.
+    r123m128i() = default;
+    r123m128i(__m128i _m): m(_m){}
+#endif
+    r123m128i& operator=(const __m128i& rhs){ m=rhs; return *this;}
+    r123m128i& operator=(R123_ULONG_LONG n){ m = _mm_set_epi64x(0, n); return *this;}
+#if R123_USE_CXX11_EXPLICIT_CONVERSIONS
+    // With C++11 we can attach explicit to the bool conversion operator
+    // to disambiguate undesired promotions.  For g++, this works
+    // only in 4.5 and above.
+    explicit operator bool() const {return _bool();}
+#else
+    // Pre-C++11, we have to do something else.  Google for the "safe bool"
+    // idiom for other ideas...
+    operator const void*() const{return _bool()?this:0;}
+#endif
+    operator __m128i() const {return m;}
+
+private:
+#if R123_USE_SSE4_1
+    bool _bool() const{ return !_mm_testz_si128(m,m); }
+#else
+    bool _bool() const{ return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(m, _mm_setzero_si128()))); }
+#endif
+};
+
+R123_STATIC_INLINE r123m128i& operator++(r123m128i& v){
+    __m128i& c = v.m;
+    __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1));
+    c = _mm_add_epi64(c, zeroone);
+    //return c;
+#if R123_USE_SSE4_1
+    __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0)));
+    if( R123_BUILTIN_EXPECT(_mm_testz_si128(c,zerofff), 0) ){
+        __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0));
+        c = _mm_add_epi64(c, onezero);
+    }
+#else
+    unsigned mask  = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128())));
+    // The low two bits of mask are 11 iff the low 64 bits of
+    // c are zero.
+    if( R123_BUILTIN_EXPECT((mask&0x3) == 0x3, 0) ){
+        __m128i onezero = _mm_set_epi64x(1,0);
+        c = _mm_add_epi64(c, onezero);
+    }
+#endif
+    return v;
+}
+
+R123_STATIC_INLINE r123m128i& operator+=(r123m128i& lhs, R123_ULONG_LONG n){ 
+    __m128i c = lhs.m;
+    __m128i incr128 = _mm_set_epi64x(0, n);
+    c = _mm_add_epi64(c, incr128);
+    // return c;     // NO CARRY!  
+
+    int64_t lo64 = _mm_extract_lo64(c);
+    if((uint64_t)lo64 < n)
+        c = _mm_add_epi64(c, _mm_set_epi64x(1,0));
+    lhs.m = c;
+    return lhs; 
+}
+
+// We need this one because it's present, but never used in r123array1xm128i::incr
+R123_STATIC_INLINE bool operator<=(R123_ULONG_LONG, const r123m128i &){
+    throw std::runtime_error("operator<=(unsigned long long, r123m128i) is unimplemented.");}
+
+// The comparisons aren't implemented, but if we leave them out, and 
+// somebody writes, e.g., M1 < M2, the compiler will do an implicit
+// conversion through void*.  Sigh...
+R123_STATIC_INLINE bool operator<(const r123m128i&, const r123m128i&){
+    throw std::runtime_error("operator<(r123m128i, r123m128i) is unimplemented.");}
+R123_STATIC_INLINE bool operator<=(const r123m128i&, const r123m128i&){
+    throw std::runtime_error("operator<=(r123m128i, r123m128i) is unimplemented.");}
+R123_STATIC_INLINE bool operator>(const r123m128i&, const r123m128i&){
+    throw std::runtime_error("operator>(r123m128i, r123m128i) is unimplemented.");}
+R123_STATIC_INLINE bool operator>=(const r123m128i&, const r123m128i&){
+    throw std::runtime_error("operator>=(r123m128i, r123m128i) is unimplemented.");}
+
+R123_STATIC_INLINE bool operator==(const r123m128i &lhs, const r123m128i &rhs){ 
+    return 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); }
+R123_STATIC_INLINE bool operator!=(const r123m128i &lhs, const r123m128i &rhs){ 
+    return !(lhs==rhs);}
+R123_STATIC_INLINE bool operator==(R123_ULONG_LONG lhs, const r123m128i &rhs){
+    r123m128i LHS; LHS.m=_mm_set_epi64x(0, lhs); return LHS == rhs; }
+R123_STATIC_INLINE bool operator!=(R123_ULONG_LONG lhs, const r123m128i &rhs){
+    return !(lhs==rhs);}
+R123_STATIC_INLINE std::ostream& operator<<(std::ostream& os, const r123m128i& m){
+    union{
+        uint64_t u64[2];
+        __m128i m;
+    }u;
+    _mm_storeu_si128(&u.m, m.m);
+    return os << u.u64[0] << " " << u.u64[1];
+}
+
+R123_STATIC_INLINE std::istream& operator>>(std::istream& is, r123m128i& m){
+    uint64_t u64[2];
+    is >> u64[0] >> u64[1];
+    m.m = _mm_set_epi64x(u64[1], u64[0]);
+    return is;
+}
+
+template<typename T> inline T assemble_from_u32(uint32_t *p32); // forward declaration
+
+template <>
+inline r123m128i assemble_from_u32<r123m128i>(uint32_t *p32){
+    r123m128i ret;
+    ret.m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]);
+    return ret;
+}
+
+#else
+
+typedef struct {
+    __m128i m;
+} r123m128i;
+
+#endif /* __cplusplus */
+
+#else /* !R123_USE_SSE */
+R123_STATIC_INLINE int haveAESNI(){
+    return 0;
+}
+#endif /* R123_USE_SSE */
+
+#endif /* _Random123_sse_dot_h__ */
diff --git a/ext/random123/include/Random123/features/sunprofeatures.h b/ext/random123/include/Random123/features/sunprofeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..c9cdc00f5e8f970898ae577b14fa910ceb135a91
--- /dev/null
+++ b/ext/random123/include/Random123/features/sunprofeatures.h
@@ -0,0 +1,172 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __sunprofeatures_dot_hpp
+#define __sunprofeatures_dot_hpp
+
+#ifndef R123_STATIC_INLINE
+#define R123_STATIC_INLINE static inline
+#endif
+
+#ifndef R123_FORCE_INLINE
+#define R123_FORCE_INLINE(decl) decl
+#endif
+
+#ifndef R123_CUDA_DEVICE
+#define R123_CUDA_DEVICE
+#endif
+
+#ifndef R123_ASSERT
+#include <assert.h>
+#define R123_ASSERT(x) assert(x)
+#endif
+
+#ifndef R123_BUILTIN_EXPECT
+#define R123_BUILTIN_EXPECT(expr,likely) expr
+#endif
+
+// The basic idiom is:
+// #ifndef R123_SOMETHING
+// #if some condition
+// #define R123_SOMETHING 1
+// #else
+// #define R123_SOMETHING 0
+// #endif
+// #endif
+// This idiom allows an external user to override any decision
+// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0
+
+// An alternative idiom is:
+// #ifndef R123_SOMETHING
+// #define R123_SOMETHING (some boolean expression)
+// #endif
+// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE
+// pp-symbols.
+
+#ifndef R123_USE_AES_NI
+#define R123_USE_AES_NI 0
+#endif
+
+#ifndef R123_USE_SSE4_2
+#define R123_USE_SSE4_2 0
+#endif
+
+#ifndef R123_USE_SSE4_1
+#define R123_USE_SSE4_1 0
+#endif
+
+#ifndef R123_USE_SSE
+#define R123_USE_SSE 0
+#endif
+
+#ifndef R123_USE_AES_OPENSSL
+#define R123_USE_AES_OPENSSL 0
+#endif
+
+#ifndef R123_USE_GNU_UINT128
+#define R123_USE_GNU_UINT128 0
+#endif
+
+#ifndef R123_USE_ASM_GNU
+#define R123_USE_ASM_GNU 0
+#endif
+
+#ifndef R123_USE_CPUID_MSVC
+#define R123_USE_CPUID_MSVC 0
+#endif
+
+#ifndef R123_USE_X86INTRIN_H
+#define R123_USE_X86INTRIN_H 0
+#endif
+
+#ifndef R123_USE_IA32INTRIN_H
+#define R123_USE_IA32INTRIN_H 0
+#endif
+
+#ifndef R123_USE_XMMINTRIN_H
+#define R123_USE_XMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_EMMINTRIN_H
+#define R123_USE_EMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_SMMINTRIN_H
+#define R123_USE_SMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_WMMINTRIN_H
+#define R123_USE_WMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_INTRIN_H
+#define R123_USE_INTRIN_H 0
+#endif
+
+#ifndef R123_USE_MULHILO16_ASM
+#define R123_USE_MULHILO16_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO32_ASM
+#define R123_USE_MULHILO32_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_ASM
+#define R123_USE_MULHILO64_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_MSVC_INTRIN
+#define R123_USE_MULHILO64_MSVC_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
+#define R123_USE_MULHILO64_CUDA_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
+#define R123_USE_MULHILO64_OPENCL_INTRIN 0
+#endif
+
+#ifndef R123_USE_PHILOX_64BIT
+#define R123_USE_PHILOX_64BIT 0
+#endif
+
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+#include <stdint.h>
+#ifndef UINT64_C
+#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
+#endif
+
+// If you add something, it must go in all the other XXfeatures.hpp
+// and in ../ut_features.cpp
+#endif
diff --git a/ext/random123/include/Random123/features/xlcfeatures.h b/ext/random123/include/Random123/features/xlcfeatures.h
new file mode 100644
index 0000000000000000000000000000000000000000..ccb98ee5531c57253410eb4d1fe3692ec289bbca
--- /dev/null
+++ b/ext/random123/include/Random123/features/xlcfeatures.h
@@ -0,0 +1,210 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Copyright (c) 2013, Los Alamos National Security, LLC
+All rights reserved.
+
+Copyright 2013. Los Alamos National Security, LLC. This software was produced
+under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
+Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
+the U.S. Department of Energy. The U.S. Government has rights to use,
+reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
+to produce derivative works, such modified software should be clearly marked,
+so as not to confuse it with the version available from LANL.
+*/
+#ifndef __xlcfeatures_dot_hpp
+#define __xlcfeatures_dot_hpp
+
+#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__)
+#  error "This code has only been tested on x86 and PowerPC platforms."
+#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task>
+{ /* maybe an unbalanced brace will terminate the compilation */
+ /* Feel free to try the Random123 library on other architectures by changing
+ the conditions that reach this error, but you should consider it a
+ porting exercise and expect to encounter bugs and deficiencies.
+ Please let the authors know of any successes (or failures). */
+#endif
+
+#ifdef __cplusplus
+/* builtins are automatically available to xlc.  To use them with xlc++,
+   one must include builtins.h.   c.f
+   http://publib.boulder.ibm.com/infocenter/cellcomp/v101v121/index.jsp?topic=/com.ibm.xlcpp101.cell.doc/compiler_ref/compiler_builtins.html
+*/
+#include <builtins.h>
+#endif
+
+#ifndef R123_STATIC_INLINE
+#define R123_STATIC_INLINE static inline
+#endif
+
+#ifndef R123_FORCE_INLINE
+#define R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__))
+#endif
+
+#ifndef R123_CUDA_DEVICE
+#define R123_CUDA_DEVICE
+#endif
+
+#ifndef R123_ASSERT
+#include <assert.h>
+#define R123_ASSERT(x) assert(x)
+#endif
+
+#ifndef R123_BUILTIN_EXPECT
+#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely)
+#endif
+
+#ifndef R123_USE_AES_NI
+#define R123_USE_AES_NI 0
+#endif
+
+#ifndef R123_USE_SSE4_2
+#define R123_USE_SSE4_2 0
+#endif
+
+#ifndef R123_USE_SSE4_1
+#define R123_USE_SSE4_1 0
+#endif
+
+#ifndef R123_USE_SSE
+#define R123_USE_SSE 0
+#endif
+
+#ifndef R123_USE_AES_OPENSSL
+/* There isn't really a good way to tell at compile time whether
+   openssl is available.  Without a pre-compilation configure-like
+   tool, it's less error-prone to guess that it isn't available.  Add
+   -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
+   play with openssl */
+#define R123_USE_AES_OPENSSL 0
+#endif
+
+#ifndef R123_USE_GNU_UINT128
+#define R123_USE_GNU_UINT128 0
+#endif
+
+#ifndef R123_USE_ASM_GNU
+#define R123_USE_ASM_GNU 1
+#endif
+
+#ifndef R123_USE_CPUID_MSVC
+#define R123_USE_CPUID_MSVC 0
+#endif
+
+#ifndef R123_USE_X86INTRIN_H
+#define R123_USE_X86INTRIN_H 0
+#endif
+
+#ifndef R123_USE_IA32INTRIN_H
+#define R123_USE_IA32INTRIN_H 0
+#endif
+
+#ifndef R123_USE_XMMINTRIN_H
+#define R123_USE_XMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_EMMINTRIN_H
+#define R123_USE_EMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_SMMINTRIN_H
+#define R123_USE_SMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_WMMINTRIN_H
+#define R123_USE_WMMINTRIN_H 0
+#endif
+
+#ifndef R123_USE_INTRIN_H
+#ifdef __ABM__
+#define R123_USE_INTRIN_H 1
+#else
+#define R123_USE_INTRIN_H 0
+#endif
+#endif
+
+#ifndef R123_USE_MULHILO32_ASM
+#define R123_USE_MULHILO32_ASM 0
+#endif
+
+#ifndef R123_USE_MULHILO64_MULHI_INTRIN
+#if (defined(__powerpc64__))
+#define R123_USE_MULHILO64_MULHI_INTRIN 1
+#else
+#define R123_USE_MULHILO64_MULHI_INTRIN 0
+#endif
+#endif
+
+#ifndef R123_MULHILO64_MULHI_INTRIN
+#define R123_MULHILO64_MULHI_INTRIN __mulhdu
+#endif
+
+#ifndef R123_USE_MULHILO32_MULHI_INTRIN
+#define R123_USE_MULHILO32_MULHI_INTRIN 0
+#endif
+
+#ifndef R123_MULHILO32_MULHI_INTRIN
+#define R123_MULHILO32_MULHI_INTRIN __mulhwu
+#endif
+
+#ifndef R123_USE_MULHILO64_ASM
+#if defined(__powerpc64__)
+#define R123_USE_MULHILO64_ASM (1 /*defined(__powerpc64__)*/ && !(R123_USE_MULHILO64_MULHI_INTRIN))
+#else
+#define R123_USE_MULHILO64_ASM (0 /*defined(__powerpc64__)*/ && !(R123_USE_MULHILO64_MULHI_INTRIN))
+#endif
+#endif
+
+#ifndef R123_USE_MULHILO64_MSVC_INTRIN
+#define R123_USE_MULHILO64_MSVC_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
+#define R123_USE_MULHILO64_CUDA_INTRIN 0
+#endif
+
+#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
+#define R123_USE_MULHILO64_OPENCL_INTRIN 0
+#endif
+
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+#include <stdint.h>
+#ifndef UINT64_C
+#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
+#endif
+
+/* If you add something, it must go in all the other XXfeatures.hpp
+   and in ../ut_features.cpp */
+#endif
diff --git a/ext/random123/include/Random123/gsl_microrng.h b/ext/random123/include/Random123/gsl_microrng.h
new file mode 100644
index 0000000000000000000000000000000000000000..4f09412152687462506ce88650a5328f6787ae23
--- /dev/null
+++ b/ext/random123/include/Random123/gsl_microrng.h
@@ -0,0 +1,136 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef __r123_gslmicrorng_dot_h__
+#define __r123_gslmicrorng_dot_h__
+
+
+#include <gsl/gsl_rng.h>
+#include <string.h>
+
+/**   The macro: GSL_MICRORNG(NAME, CBRNGNAME) is the GSL
+   analog analog of the C++ r123::MicroURNG template.  It declares a gsl_rng
+   type named gsl_rng_NAME which uses the underlying CBRNGNAME
+   and can be invoked a limited number of times between calls to NAME_reset.
+
+   When the underlying CBRNG's \c ctr_t is an \ref arrayNxW "r123arrayNxW",
+   and the gsl_rng_NAME may called up to \c N*2^32 times 
+   between calls to \c NAME_reset.
+
+   \c NAME_reset takes a gsl_rng_NAME type, a counter and a key as arguments.
+   It restarts the micro-rng with a new base counter and key.
+
+   Note that you must call NAME_reset before the first use
+   of a gsl_rng.  NAME_reset is not called automatically by
+   gsl_rng_alloc().
+
+   @code
+   #include <Random123/threefry.h>
+   #include <Random123/gsl_microrng.h> // this file
+   GSL_MICRORNG(microcbrng, threefry4x64, 20)	// creates gsl_rng_microcbrng
+
+   int main(int argc, char** argv) {
+	gsl_rng *r = gsl_rng_alloc(gsl_rng_microcbrng);
+	threefry4x64_ctr_t c = {{}};
+	threefry4x64_key_t k = {{}};
+
+	for (...) {
+	    c.v[0] = ??; //  some application variable
+	    microcbrng_reset(r, c, k);
+	    for (...) {
+		// gaussian calls r several times.  It is safe for
+		// r to be used upto 2^20 times in this loop
+		something[i] = gsl_ran_gaussian(r, 1.5);
+	    }
+	}
+   }
+   @endcode
+   
+*/
+
+#define GSL_MICRORNG(NAME, CBRNGNAME)                                   \
+const gsl_rng_type *gsl_rng_##NAME;                                     \
+                                                                        \
+typedef struct{                                                         \
+    CBRNGNAME##_ctr_t ctr;                                              \
+    CBRNGNAME##_ctr_t r;                                                \
+    CBRNGNAME##_key_t key;                                              \
+    R123_ULONG_LONG n;                                                  \
+    int elem;                                                           \
+} NAME##_state;                                                         \
+                                                                        \
+static unsigned long int NAME##_get(void *vstate){                      \
+    NAME##_state *st = (NAME##_state *)vstate;                          \
+    const int N=sizeof(st->ctr.v)/sizeof(st->ctr.v[0]);                 \
+    if( st->elem == 0 ){                                                \
+        CBRNGNAME##_ctr_t c = st->ctr;                                  \
+        c.v[N-1] |= st->n<<(R123_W(CBRNGNAME##_ctr_t)-32);              \
+        st->n++;                                                        \
+        st->r = CBRNGNAME(c, st->key);                                  \
+        st->elem = N;                                                   \
+    }                                                                   \
+    return 0xffffffff & st->r.v[--st->elem];                            \
+}                                                                       \
+                                                                        \
+static double                                                           \
+NAME##_get_double (void * vstate)                                       \
+{                                                                       \
+    return NAME##_get (vstate)/4294967296.;                             \
+}                                                                       \
+                                                                        \
+static void NAME##_set(void *vstate, unsigned long int s){              \
+    NAME##_state *st = (NAME##_state *)vstate;                          \
+    (void)s; /* ignored */                                              \
+    st->elem = 0;                                                       \
+    st->n = ~0; /* will abort if _reset is not called */                \
+}                                                                       \
+                                                                        \
+static const gsl_rng_type NAME##_type = {                               \
+    #NAME,                                                              \
+    0xffffffffUL,                                                       \
+    0,                                                                  \
+    sizeof(NAME##_state),                                               \
+    &NAME##_set,                                                        \
+    &NAME##_get,                                                        \
+    &NAME##_get_double                                                  \
+};                                                                      \
+                                                                        \
+R123_STATIC_INLINE void NAME##_reset(const gsl_rng* gr, CBRNGNAME##_ctr_t c, CBRNGNAME##_key_t k) { \
+    NAME##_state* state = (NAME##_state *)gr->state;                    \
+    state->ctr = c;                                                     \
+    state->key = k;                                                     \
+    state->n = 0;                                                       \
+    state->elem = 0;                                                    \
+}                                                                       \
+                                                                        \
+const gsl_rng_type *gsl_rng_##NAME = &NAME##_type
+
+#endif
diff --git a/ext/random123/include/Random123/philox.h b/ext/random123/include/Random123/philox.h
new file mode 100644
index 0000000000000000000000000000000000000000..7bf4d195772358a87b8fbb33667783b5caba61a4
--- /dev/null
+++ b/ext/random123/include/Random123/philox.h
@@ -0,0 +1,493 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef _philox_dot_h_
+#define _philox_dot_h_
+
+/** \cond HIDDEN_FROM_DOXYGEN */
+
+#include "features/compilerfeatures.h"
+#include "array.h"
+
+
+/*
+// Macros _Foo_tpl are code generation 'templates'  They define
+// inline functions with names obtained by mangling Foo and the
+// macro arguments.  E.g.,
+//   _mulhilo_tpl(32, uint32_t, uint64_t)
+// expands to a definition of:
+//   mulhilo32(uint32_t, uint32_t, uint32_t *, uint32_t *)
+// We then 'instantiate the template' to define
+// several different functions, e.g.,
+//   mulhilo32
+//   mulhilo64
+// These functions will be visible to user code, and may
+// also be used later in subsequent templates and definitions.
+
+// A template for mulhilo using a temporary of twice the word-width.
+// Gcc figures out that this can be reduced to a single 'mul' instruction,
+// despite the apparent use of double-wide variables, shifts, etc.  It's
+// obviously not guaranteed that all compilers will be that smart, so
+// other implementations might be preferable, e.g., using an intrinsic
+// or an asm block.  On the other hand, for 32-bit multiplies,
+// this *is* perfectly standard C99 - any C99 compiler should 
+// understand it and produce correct code.  For 64-bit multiplies,
+// it's only usable if the compiler recognizes that it can do
+// arithmetic on a 128-bit type.  That happens to be true for gcc on
+// x86-64, and powerpc64 but not much else.
+*/
+#define _mulhilo_dword_tpl(W, Word, Dword)                              \
+R123_CUDA_DEVICE R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \
+    Dword product = ((Dword)a)*((Dword)b);                              \
+    *hip = product>>W;                                                  \
+    return (Word)product;                                               \
+}
+
+/*
+// A template for mulhilo using gnu-style asm syntax.
+// INSN can be "mulw", "mull" or "mulq".  
+// FIXME - porting to other architectures, we'll need still-more conditional
+// branching here.  Note that intrinsics are usually preferable.
+*/
+#ifdef __powerpc__
+#define _mulhilo_asm_tpl(W, Word, INSN)                         \
+R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){ \
+    Word dx = 0;                                                \
+    __asm__("\n\t"                                              \
+        INSN " %0,%1,%2\n\t"                                    \
+        : "=r"(dx)                                              \
+        : "r"(b), "r"(ax)                                       \
+        );                                                      \
+    *hip = dx;                                                  \
+    return ax*b;                                                \
+}
+#else
+#define _mulhilo_asm_tpl(W, Word, INSN)                         \
+R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){      \
+    Word dx;                                                    \
+    __asm__("\n\t"                                              \
+        INSN " %2\n\t"                                          \
+        : "=a"(ax), "=d"(dx)                                    \
+        : "r"(b), "0"(ax)                                       \
+        );                                                      \
+    *hip = dx;                                                  \
+    return ax;                                                  \
+}
+#endif /* __powerpc__ */
+
+/*
+// A template for mulhilo using MSVC-style intrinsics
+// For example,_umul128 is an msvc intrinsic, c.f.
+// http://msdn.microsoft.com/en-us/library/3dayytw9.aspx
+*/
+#define _mulhilo_msvc_intrin_tpl(W, Word, INTRIN)               \
+R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){       \
+    return INTRIN(a, b, hip);                                   \
+}
+
+/* N.B.  This really should be called _mulhilo_mulhi_intrin.  It just
+   happens that CUDA was the first time we used the idiom. */
+#define _mulhilo_cuda_intrin_tpl(W, Word, INTRIN)                       \
+R123_CUDA_DEVICE R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, R123_METAL_THREAD_ADDRESS_SPACE Word* hip){ \
+    *hip = INTRIN(a, b);                                                \
+    return a*b;                                                         \
+}
+
+/*
+// A template for mulhilo using only word-size operations and
+// C99 operators (no adc, no mulhi).  It
+// requires four multiplies and a dozen or so shifts, adds
+// and tests.  It's *SLOW*.  It can be used to
+// implement philoxNx32 on platforms that completely lack
+// 64-bit types, e.g., Metal.  
+// On 32-bit platforms, it could be used to
+// implement philoxNx64, but on such platforms both the philoxNx32
+// and the threefryNx64 cbrngs are going to have much better
+// performance.  It is enabled below by R123_USE_MULHILO64_C99,
+// but that is currently (Feb 2019) only set by 
+// features/metalfeatures.h headers.  It can, of course, be
+// set with a compile-time -D option.
+*/
+#define _mulhilo_c99_tpl(W, Word) \
+R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, R123_METAL_THREAD_ADDRESS_SPACE Word *hip){ \
+    const unsigned WHALF = W/2;                                    \
+    const Word LOMASK = ((((Word)1)<<WHALF)-1);                    \
+    Word lo = a*b;               /* full low multiply */           \
+    Word ahi = a>>WHALF;                                           \
+    Word alo = a& LOMASK;                                          \
+    Word bhi = b>>WHALF;                                           \
+    Word blo = b& LOMASK;                                          \
+                                                                   \
+    Word ahbl = ahi*blo;                                           \
+    Word albh = alo*bhi;                                           \
+                                                                   \
+    Word ahbl_albh = ((ahbl&LOMASK) + (albh&LOMASK));                   \
+    Word hi = ahi*bhi + (ahbl>>WHALF) +  (albh>>WHALF);                 \
+    hi += ahbl_albh >> WHALF; /* carry from the sum of lo(ahbl) + lo(albh) ) */ \
+    /* carry from the sum with alo*blo */                               \
+    hi += ((lo >> WHALF) < (ahbl_albh&LOMASK));                         \
+    *hip = hi;                                                          \
+    return lo;                                                          \
+}
+
+/*
+// A template for mulhilo on a platform that can't do it
+// We could put a C version here, but is it better to run *VERY*
+// slowly or to just stop and force the user to find another CBRNG?
+*/
+#define _mulhilo_fail_tpl(W, Word)                                      \
+R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word *hip){               \
+    R123_STATIC_ASSERT(0, "mulhilo" #W " is not implemented on this machine\n"); \
+}
+
+/*
+// N.B.  There's an MSVC intrinsic called _emul,
+// which *might* compile into better code than
+// _mulhilo_dword_tpl 
+*/
+#if R123_USE_MULHILO32_ASM
+#ifdef __powerpc__
+_mulhilo_asm_tpl(32, uint32_t, "mulhwu")
+#else
+_mulhilo_asm_tpl(32, uint32_t, "mull")
+#endif /* __powerpc__ */
+#else
+#if R123_USE_64BIT
+_mulhilo_dword_tpl(32, uint32_t, uint64_t)
+#elif R123_USE_MULHILO32_MULHI_INTRIN
+_mulhilo_cuda_intrin_tpl(32, uint32_t, R123_MULHILO32_MULHI_INTRIN)
+#else
+_mulhilo_c99_tpl(32, uint32_t)
+#endif
+#endif
+
+#if R123_USE_PHILOX_64BIT
+#if R123_USE_MULHILO64_ASM
+#ifdef __powerpc64__
+_mulhilo_asm_tpl(64, uint64_t, "mulhdu")
+#else
+_mulhilo_asm_tpl(64, uint64_t, "mulq")
+#endif /* __powerpc64__ */
+#elif R123_USE_MULHILO64_MSVC_INTRIN
+_mulhilo_msvc_intrin_tpl(64, uint64_t, _umul128)
+#elif R123_USE_MULHILO64_CUDA_INTRIN
+_mulhilo_cuda_intrin_tpl(64, uint64_t, __umul64hi)
+#elif R123_USE_MULHILO64_OPENCL_INTRIN
+_mulhilo_cuda_intrin_tpl(64, uint64_t, mul_hi)
+#elif R123_USE_MULHILO64_MULHI_INTRIN
+_mulhilo_cuda_intrin_tpl(64, uint64_t, R123_MULHILO64_MULHI_INTRIN)
+#elif R123_USE_GNU_UINT128
+_mulhilo_dword_tpl(64, uint64_t, __uint128_t)
+#elif R123_USE_MULHILO64_C99
+_mulhilo_c99_tpl(64, uint64_t)
+#else
+_mulhilo_fail_tpl(64, uint64_t)
+#endif
+#endif
+
+/*
+// The multipliers and Weyl constants are "hard coded".
+// To change them, you can #define them with different
+// values before #include-ing this file. 
+// This isn't terribly elegant, but it works for C as
+// well as C++.  A nice C++-only solution would be to
+// use template parameters in the style of <random>
+*/
+#ifndef PHILOX_M2x64_0
+#define PHILOX_M2x64_0 R123_64BIT(0xD2B74407B1CE6E93)
+#endif
+
+#ifndef PHILOX_M4x64_0
+#define PHILOX_M4x64_0 R123_64BIT(0xD2E7470EE14C6C93)
+#endif
+
+#ifndef PHILOX_M4x64_1
+#define PHILOX_M4x64_1 R123_64BIT(0xCA5A826395121157)
+#endif
+
+#ifndef PHILOX_M2x32_0
+#define PHILOX_M2x32_0 ((uint32_t)0xd256d193)
+#endif
+
+#ifndef PHILOX_M4x32_0
+#define PHILOX_M4x32_0 ((uint32_t)0xD2511F53)
+#endif
+#ifndef PHILOX_M4x32_1
+#define PHILOX_M4x32_1 ((uint32_t)0xCD9E8D57)
+#endif
+
+#ifndef PHILOX_W64_0
+#define PHILOX_W64_0 R123_64BIT(0x9E3779B97F4A7C15)  /* golden ratio */
+#endif
+#ifndef PHILOX_W64_1
+#define PHILOX_W64_1 R123_64BIT(0xBB67AE8584CAA73B)  /* sqrt(3)-1 */
+#endif
+
+#ifndef PHILOX_W32_0
+#define PHILOX_W32_0 ((uint32_t)0x9E3779B9)
+#endif
+#ifndef PHILOX_W32_1
+#define PHILOX_W32_1 ((uint32_t)0xBB67AE85)
+#endif
+
+/** \endcond */
+#ifndef PHILOX2x32_DEFAULT_ROUNDS
+#define PHILOX2x32_DEFAULT_ROUNDS 10
+#endif
+
+#ifndef PHILOX2x64_DEFAULT_ROUNDS
+#define PHILOX2x64_DEFAULT_ROUNDS 10
+#endif
+
+#ifndef PHILOX4x32_DEFAULT_ROUNDS
+#define PHILOX4x32_DEFAULT_ROUNDS 10
+#endif
+
+#ifndef PHILOX4x64_DEFAULT_ROUNDS
+#define PHILOX4x64_DEFAULT_ROUNDS 10
+#endif
+/** \cond HIDDEN_FROM_DOXYGEN */
+
+/* The ignored fourth argument allows us to instantiate the
+   same macro regardless of N. */
+#define _philox2xWround_tpl(W, T)                                       \
+R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key)); \
+R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key){ \
+    T hi;                                                               \
+    T lo = mulhilo##W(PHILOX_M2x##W##_0, ctr.v[0], &hi);                \
+    struct r123array2x##W out = {{hi^key.v[0]^ctr.v[1], lo}};               \
+    return out;                                                         \
+}
+#define _philox2xWbumpkey_tpl(W)                                        \
+R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array1x##W _philox2x##W##bumpkey( struct r123array1x##W key) { \
+    key.v[0] += PHILOX_W##W##_0;                                        \
+    return key;                                                         \
+}
+
+#define _philox4xWround_tpl(W, T)                                       \
+R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key)); \
+R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key){ \
+    T hi0;                                                              \
+    T hi1;                                                              \
+    T lo0 = mulhilo##W(PHILOX_M4x##W##_0, ctr.v[0], &hi0);              \
+    T lo1 = mulhilo##W(PHILOX_M4x##W##_1, ctr.v[2], &hi1);              \
+    struct r123array4x##W out = {{hi1^ctr.v[1]^key.v[0], lo1,               \
+                              hi0^ctr.v[3]^key.v[1], lo0}};             \
+    return out;                                                         \
+}
+
+#define _philox4xWbumpkey_tpl(W)                                        \
+R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array2x##W _philox4x##W##bumpkey( struct r123array2x##W key) { \
+    key.v[0] += PHILOX_W##W##_0;                                        \
+    key.v[1] += PHILOX_W##W##_1;                                        \
+    return key;                                                         \
+}
+
+/** \endcond */
+#define _philoxNxW_tpl(N, Nhalf, W, T)                         \
+/** @ingroup PhiloxNxW */                                       \
+enum r123_enum_philox##N##x##W { philox##N##x##W##_rounds = PHILOX##N##x##W##_DEFAULT_ROUNDS }; \
+typedef struct r123array##N##x##W philox##N##x##W##_ctr_t;                  \
+typedef struct r123array##Nhalf##x##W philox##N##x##W##_key_t;              \
+typedef struct r123array##Nhalf##x##W philox##N##x##W##_ukey_t;              \
+R123_CUDA_DEVICE R123_STATIC_INLINE philox##N##x##W##_key_t philox##N##x##W##keyinit(philox##N##x##W##_ukey_t uk) { return uk; } \
+R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key)); \
+R123_CUDA_DEVICE R123_STATIC_INLINE philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key) { \
+    R123_ASSERT(R<=16);                                                 \
+    if(R>0){                                       ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>1){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>2){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>3){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>4){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>5){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>6){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>7){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>8){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>9){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>10){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>11){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>12){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>13){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>14){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    if(R>15){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
+    return ctr;                                                         \
+}
+         
+_philox2xWbumpkey_tpl(32)
+_philox4xWbumpkey_tpl(32)
+_philox2xWround_tpl(32, uint32_t) /* philox2x32round */
+_philox4xWround_tpl(32, uint32_t)            /* philo4x32round */
+
+_philoxNxW_tpl(2, 1, 32, uint32_t)    /* philox2x32bijection */
+_philoxNxW_tpl(4, 2, 32, uint32_t)    /* philox4x32bijection */
+#if R123_USE_PHILOX_64BIT
+/** \cond HIDDEN_FROM_DOXYGEN */
+_philox2xWbumpkey_tpl(64)
+_philox4xWbumpkey_tpl(64)
+_philox2xWround_tpl(64, uint64_t) /* philo2x64round */
+_philox4xWround_tpl(64, uint64_t) /* philo4x64round */
+/** \endcond */
+_philoxNxW_tpl(2, 1, 64, uint64_t)    /* philox2x64bijection */
+_philoxNxW_tpl(4, 2, 64, uint64_t)    /* philox4x64bijection */
+#endif /* R123_USE_PHILOX_64BIT */
+
+#define philox2x32(c,k) philox2x32_R(philox2x32_rounds, c, k)
+#define philox4x32(c,k) philox4x32_R(philox4x32_rounds, c, k)
+#if R123_USE_PHILOX_64BIT
+#define philox2x64(c,k) philox2x64_R(philox2x64_rounds, c, k)
+#define philox4x64(c,k) philox4x64_R(philox4x64_rounds, c, k)
+#endif /* R123_USE_PHILOX_64BIT */
+
+#if defined(__cplusplus) 
+
+#define _PhiloxNxW_base_tpl(CType, KType, N, W)                         \
+namespace r123{                                                          \
+template<unsigned int ROUNDS>                                             \
+struct Philox##N##x##W##_R{                                             \
+    typedef CType ctr_type;                                         \
+    typedef KType key_type;                                             \
+    typedef KType ukey_type;                                         \
+    static const R123_METAL_CONSTANT_ADDRESS_SPACE unsigned int rounds=ROUNDS;				\
+    inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){ \
+        R123_STATIC_ASSERT(ROUNDS<=16, "philox is only unrolled up to 16 rounds\n"); \
+        return philox##N##x##W##_R(ROUNDS, ctr, key);                       \
+    }                                                                   \
+};                                                                      \
+typedef Philox##N##x##W##_R<philox##N##x##W##_rounds> Philox##N##x##W; \
+ } // namespace r123
+
+_PhiloxNxW_base_tpl(r123array2x32, r123array1x32, 2, 32) // Philox2x32_R<R>
+_PhiloxNxW_base_tpl(r123array4x32, r123array2x32, 4, 32) // Philox4x32_R<R>
+#if R123_USE_PHILOX_64BIT
+_PhiloxNxW_base_tpl(r123array2x64, r123array1x64, 2, 64) // Philox2x64_R<R>
+_PhiloxNxW_base_tpl(r123array4x64, r123array2x64, 4, 64) // Philox4x64_R<R>
+#endif
+
+/* The _tpl macros don't quite work to do string-pasting inside comments.
+   so we just write out the boilerplate documentation four times... */
+
+/** 
+@defgroup PhiloxNxW Philox Classes and Typedefs
+
+The PhiloxNxW classes export the member functions, typedefs and
+operator overloads required by a @ref CBRNG "CBRNG" class.
+
+As described in  
+<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers:  As Easy as 1, 2, 3</i> </a>.
+The Philox family of counter-based RNGs use integer multiplication, xor and permutation of W-bit words
+to scramble its N-word input key.  Philox is a mnemonic for Product HI LO Xor).
+
+
+@class r123::Philox2x32_R 
+@ingroup PhiloxNxW
+
+exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
+
+The template argument, ROUNDS, is the number of times the Philox round
+function will be applied.
+
+As of November 2011, the authors know of no statistical flaws with
+ROUNDS=6 or more for Philox2x32.
+
+@typedef r123::Philox2x32
+@ingroup PhiloxNxW
+  Philox2x32 is equivalent to Philox2x32_R<10>.    With 10 rounds,
+  Philox2x32 has a considerable safety margin over the minimum number
+  of rounds with no known statistical flaws, but still has excellent
+   performance. 
+
+
+
+@class r123::Philox2x64_R 
+@ingroup PhiloxNxW
+
+exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
+
+The template argument, ROUNDS, is the number of times the Philox round
+function will be applied.
+
+As of September 2011, the authors know of no statistical flaws with
+ROUNDS=6 or more for Philox2x64.
+
+@typedef r123::Philox2x64
+@ingroup PhiloxNxW
+  Philox2x64 is equivalent to Philox2x64_R<10>.    With 10 rounds,
+  Philox2x64 has a considerable safety margin over the minimum number
+  of rounds with no known statistical flaws, but still has excellent
+   performance. 
+
+
+
+@class r123::Philox4x32_R 
+@ingroup PhiloxNxW
+
+exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
+
+The template argument, ROUNDS, is the number of times the Philox round
+function will be applied.
+
+In November 2011, the authors recorded some suspicious p-values (approximately 1.e-7) from
+some very long (longer than the default BigCrush length) SimpPoker tests.  Despite
+the fact that even longer tests reverted to "passing" p-values, a cloud remains over
+Philox4x32 with 7 rounds.  The authors know of no statistical flaws with
+ROUNDS=8 or more for Philox4x32.
+
+@typedef r123::Philox4x32
+@ingroup PhiloxNxW
+  Philox4x32 is equivalent to Philox4x32_R<10>.    With 10 rounds,
+  Philox4x32 has a considerable safety margin over the minimum number
+  of rounds with no known statistical flaws, but still has excellent
+   performance. 
+
+
+
+@class r123::Philox4x64_R 
+@ingroup PhiloxNxW
+
+exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
+
+The template argument, ROUNDS, is the number of times the Philox round
+function will be applied.
+
+As of September 2011, the authors know of no statistical flaws with
+ROUNDS=7 or more for Philox4x64.
+
+@typedef r123::Philox4x64
+@ingroup PhiloxNxW
+  Philox4x64 is equivalent to Philox4x64_R<10>.    With 10 rounds,
+  Philox4x64 has a considerable safety margin over the minimum number
+  of rounds with no known statistical flaws, but still has excellent
+   performance. 
+*/
+
+#endif /* __cplusplus */
+
+#endif /* _philox_dot_h_ */
diff --git a/ext/random123/include/Random123/threefry.h b/ext/random123/include/Random123/threefry.h
new file mode 100644
index 0000000000000000000000000000000000000000..390ceffe6865e6d23d7c69b38fb8f022abc532f6
--- /dev/null
+++ b/ext/random123/include/Random123/threefry.h
@@ -0,0 +1,870 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef _threefry_dot_h_
+#define _threefry_dot_h_
+#include "features/compilerfeatures.h"
+#include "array.h"
+
+/** \cond HIDDEN_FROM_DOXYGEN */
+/* Significant parts of this file were copied from
+   from:
+      Skein_FinalRnd/ReferenceImplementation/skein.h
+      Skein_FinalRnd/ReferenceImplementation/skein_block.c
+
+   in http://csrc.nist.gov/groups/ST/hash/sha-3/Round3/documents/Skein_FinalRnd.zip
+
+   This file has been modified so that it may no longer perform its originally
+   intended function.  If you're looking for a Skein or Threefish source code,
+   please consult the original file.
+
+   The original file had the following header:
+**************************************************************************
+**
+** Interface declarations and internal definitions for Skein hashing.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+***************************************************************************
+
+*/
+
+/* See comment at the top of philox.h for the macro pre-process
+   strategy. */
+
+/* Rotation constants: */
+enum r123_enum_threefry64x4 {
+    /* These are the R_256 constants from the Threefish reference sources
+       with names changed to R_64x4... */
+    R_64x4_0_0=14, R_64x4_0_1=16,
+    R_64x4_1_0=52, R_64x4_1_1=57,
+    R_64x4_2_0=23, R_64x4_2_1=40,
+    R_64x4_3_0= 5, R_64x4_3_1=37,
+    R_64x4_4_0=25, R_64x4_4_1=33,
+    R_64x4_5_0=46, R_64x4_5_1=12,
+    R_64x4_6_0=58, R_64x4_6_1=22,
+    R_64x4_7_0=32, R_64x4_7_1=32
+};
+
+enum r123_enum_threefry64x2 {
+    /*
+    // Output from skein_rot_search: (srs64_B64-X1000)
+    // Random seed = 1. BlockSize = 128 bits. sampleCnt =  1024. rounds =  8, minHW_or=57
+    // Start: Tue Mar  1 10:07:48 2011
+    // rMin = 0.136. #0325[*15] [CRC=455A682F. hw_OR=64. cnt=16384. blkSize= 128].format   
+    */
+    R_64x2_0_0=16,
+    R_64x2_1_0=42,
+    R_64x2_2_0=12,
+    R_64x2_3_0=31,
+    R_64x2_4_0=16,
+    R_64x2_5_0=32,
+    R_64x2_6_0=24,
+    R_64x2_7_0=21
+    /* 4 rounds: minHW =  4  [  4  4  4  4 ]
+    // 5 rounds: minHW =  8  [  8  8  8  8 ]
+    // 6 rounds: minHW = 16  [ 16 16 16 16 ]
+    // 7 rounds: minHW = 32  [ 32 32 32 32 ]
+    // 8 rounds: minHW = 64  [ 64 64 64 64 ]
+    // 9 rounds: minHW = 64  [ 64 64 64 64 ]
+    //10 rounds: minHW = 64  [ 64 64 64 64 ]
+    //11 rounds: minHW = 64  [ 64 64 64 64 ] */
+};
+
+enum r123_enum_threefry32x4 {
+    /* Output from skein_rot_search: (srs-B128-X5000.out)
+    // Random seed = 1. BlockSize = 64 bits. sampleCnt =  1024. rounds =  8, minHW_or=28
+    // Start: Mon Aug 24 22:41:36 2009
+    // ...
+    // rMin = 0.472. #0A4B[*33] [CRC=DD1ECE0F. hw_OR=31. cnt=16384. blkSize= 128].format    */
+    R_32x4_0_0=10, R_32x4_0_1=26,
+    R_32x4_1_0=11, R_32x4_1_1=21,
+    R_32x4_2_0=13, R_32x4_2_1=27,
+    R_32x4_3_0=23, R_32x4_3_1= 5,
+    R_32x4_4_0= 6, R_32x4_4_1=20,
+    R_32x4_5_0=17, R_32x4_5_1=11,
+    R_32x4_6_0=25, R_32x4_6_1=10,
+    R_32x4_7_0=18, R_32x4_7_1=20
+
+    /* 4 rounds: minHW =  3  [  3  3  3  3 ]
+    // 5 rounds: minHW =  7  [  7  7  7  7 ]
+    // 6 rounds: minHW = 12  [ 13 12 13 12 ]
+    // 7 rounds: minHW = 22  [ 22 23 22 23 ]
+    // 8 rounds: minHW = 31  [ 31 31 31 31 ]
+    // 9 rounds: minHW = 32  [ 32 32 32 32 ]
+    //10 rounds: minHW = 32  [ 32 32 32 32 ]
+    //11 rounds: minHW = 32  [ 32 32 32 32 ] */
+
+};
+
+enum r123_enum_threefry32x2 {
+    /* Output from skein_rot_search (srs32x2-X5000.out)
+    // Random seed = 1. BlockSize = 64 bits. sampleCnt =  1024. rounds =  8, minHW_or=28
+    // Start: Tue Jul 12 11:11:33 2011
+    // rMin = 0.334. #0206[*07] [CRC=1D9765C0. hw_OR=32. cnt=16384. blkSize=  64].format   */
+    R_32x2_0_0=13,
+    R_32x2_1_0=15,
+    R_32x2_2_0=26,
+    R_32x2_3_0= 6,
+    R_32x2_4_0=17,
+    R_32x2_5_0=29,
+    R_32x2_6_0=16,
+    R_32x2_7_0=24
+
+    /* 4 rounds: minHW =  4  [  4  4  4  4 ]
+    // 5 rounds: minHW =  6  [  6  8  6  8 ]
+    // 6 rounds: minHW =  9  [  9 12  9 12 ]
+    // 7 rounds: minHW = 16  [ 16 24 16 24 ]
+    // 8 rounds: minHW = 32  [ 32 32 32 32 ]
+    // 9 rounds: minHW = 32  [ 32 32 32 32 ]
+    //10 rounds: minHW = 32  [ 32 32 32 32 ]
+    //11 rounds: minHW = 32  [ 32 32 32 32 ] */
+    };
+
+enum r123_enum_threefry_wcnt {
+    WCNT2=2,
+    WCNT4=4
+};
+
+#if R123_USE_64BIT
+R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint64_t RotL_64(uint64_t x, unsigned int N));
+R123_CUDA_DEVICE R123_STATIC_INLINE uint64_t RotL_64(uint64_t x, unsigned int N)
+{
+    return (x << (N & 63)) | (x >> ((64-N) & 63));
+}
+#endif
+
+R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint32_t RotL_32(uint32_t x, unsigned int N));
+R123_CUDA_DEVICE R123_STATIC_INLINE uint32_t RotL_32(uint32_t x, unsigned int N)
+{
+    return (x << (N & 31)) | (x >> ((32-N) & 31));
+}
+
+#define SKEIN_MK_64(hi32,lo32)  ((lo32) + (((uint64_t) (hi32)) << 32))
+#define SKEIN_KS_PARITY64         SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
+#define SKEIN_KS_PARITY32         0x1BD11BDA
+
+/** \endcond */
+
+#ifndef THREEFRY2x32_DEFAULT_ROUNDS
+#define THREEFRY2x32_DEFAULT_ROUNDS 20
+#endif
+
+#ifndef THREEFRY2x64_DEFAULT_ROUNDS
+#define THREEFRY2x64_DEFAULT_ROUNDS 20
+#endif
+
+#ifndef THREEFRY4x32_DEFAULT_ROUNDS
+#define THREEFRY4x32_DEFAULT_ROUNDS 20
+#endif
+
+#ifndef THREEFRY4x64_DEFAULT_ROUNDS
+#define THREEFRY4x64_DEFAULT_ROUNDS 20
+#endif
+
+#define _threefry2x_tpl(W)                                              \
+typedef struct r123array2x##W threefry2x##W##_ctr_t;                          \
+typedef struct r123array2x##W threefry2x##W##_key_t;                          \
+typedef struct r123array2x##W threefry2x##W##_ukey_t;                          \
+R123_CUDA_DEVICE R123_STATIC_INLINE threefry2x##W##_key_t threefry2x##W##keyinit(threefry2x##W##_ukey_t uk) { return uk; } \
+R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
+R123_CUDA_DEVICE R123_STATIC_INLINE                                          \
+threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
+    threefry2x##W##_ctr_t X;                                              \
+    uint##W##_t ks[2+1];                                          \
+    int  i; /* avoid size_t to avoid need for stddef.h */                   \
+    R123_ASSERT(Nrounds<=32);                                           \
+    ks[2] =  SKEIN_KS_PARITY##W;                                   \
+    for (i=0;i < 2; i++)                                        \
+        {                                                               \
+            ks[i] = k.v[i];                                             \
+            X.v[i]  = in.v[i];                                          \
+            ks[2] ^= k.v[i];                                    \
+        }                                                               \
+                                                                        \
+    /* Insert initial key before round 0 */                             \
+    X.v[0] += ks[0]; X.v[1] += ks[1];                                   \
+                                                                        \
+    if(Nrounds>0){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>1){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>2){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>3){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>3){                                                      \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[1]; X.v[1] += ks[2];                               \
+        X.v[1] += 1;     /* X.v[2-1] += r  */                   \
+    }                                                                   \
+    if(Nrounds>4){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>5){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>6){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>7){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>7){                                                      \
+        /* InjectKey(r=2) */                                            \
+        X.v[0] += ks[2]; X.v[1] += ks[0];                               \
+        X.v[1] += 2;                                                    \
+    }                                                                   \
+    if(Nrounds>8){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>9){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>10){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>11){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>11){                                                     \
+        /* InjectKey(r=3) */                                            \
+        X.v[0] += ks[0]; X.v[1] += ks[1];                               \
+        X.v[1] += 3;                                                    \
+    }                                                                   \
+    if(Nrounds>12){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>13){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>14){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>15){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>15){                                                     \
+        /* InjectKey(r=4) */                                            \
+        X.v[0] += ks[1]; X.v[1] += ks[2];                               \
+        X.v[1] += 4;                                                    \
+    }                                                                   \
+    if(Nrounds>16){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>17){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>18){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>19){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>19){                                                     \
+        /* InjectKey(r=5) */                                            \
+        X.v[0] += ks[2]; X.v[1] += ks[0];                               \
+        X.v[1] += 5;                                                    \
+    }                                                                   \
+    if(Nrounds>20){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>21){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>22){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>23){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>23){                                                     \
+        /* InjectKey(r=6) */                                            \
+        X.v[0] += ks[0]; X.v[1] += ks[1];                               \
+        X.v[1] += 6;                                                    \
+    }                                                                   \
+    if(Nrounds>24){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>25){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>26){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>27){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>27){                                                     \
+        /* InjectKey(r=7) */                                            \
+        X.v[0] += ks[1]; X.v[1] += ks[2];                               \
+        X.v[1] += 7;                                                    \
+    }                                                                   \
+    if(Nrounds>28){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>29){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>30){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>31){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
+    if(Nrounds>31){                                                     \
+        /* InjectKey(r=8) */                                            \
+        X.v[0] += ks[2]; X.v[1] += ks[0];                               \
+        X.v[1] += 8;                                                    \
+    }                                                                   \
+    return X;                                                           \
+}                                                                       \
+ /** @ingroup ThreefryNxW */                                            \
+enum r123_enum_threefry2x##W { threefry2x##W##_rounds = THREEFRY2x##W##_DEFAULT_ROUNDS };       \
+R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
+R123_CUDA_DEVICE R123_STATIC_INLINE                                     \
+threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
+    return threefry2x##W##_R(threefry2x##W##_rounds, in, k);            \
+}
+
+
+#define _threefry4x_tpl(W)                                              \
+typedef struct r123array4x##W threefry4x##W##_ctr_t;                        \
+typedef struct r123array4x##W threefry4x##W##_key_t;                        \
+typedef struct r123array4x##W threefry4x##W##_ukey_t;                        \
+R123_CUDA_DEVICE R123_STATIC_INLINE threefry4x##W##_key_t threefry4x##W##keyinit(threefry4x##W##_ukey_t uk) { return uk; } \
+R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
+R123_CUDA_DEVICE R123_STATIC_INLINE                                          \
+threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
+    threefry4x##W##_ctr_t X;                                            \
+    uint##W##_t ks[4+1];                                            \
+    int  i; /* avoid size_t to avoid need for stddef.h */                   \
+    R123_ASSERT(Nrounds<=72);                                           \
+    ks[4] =  SKEIN_KS_PARITY##W;                                    \
+    for (i=0;i < 4; i++)                                            \
+        {                                                               \
+            ks[i] = k.v[i];                                             \
+            X.v[i]  = in.v[i];                                          \
+            ks[4] ^= k.v[i];                                        \
+        }                                                               \
+                                                                        \
+    /* Insert initial key before round 0 */                             \
+    X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
+                                                                        \
+    if(Nrounds>0){                                                      \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>1){                                                      \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>2){                                                      \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>3){                                                      \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>3){                                                      \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
+        X.v[4-1] += 1;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>4){                                                      \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>5){                                                      \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>6){                                                      \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>7){                                                      \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>7){                                                      \
+        /* InjectKey(r=2) */                                            \
+        X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
+        X.v[4-1] += 2;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>8){                                                      \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>9){                                                      \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>10){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>11){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>11){                                                     \
+        /* InjectKey(r=3) */                                            \
+        X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
+        X.v[4-1] += 3;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>12){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>13){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>14){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>15){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>15){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
+        X.v[4-1] += 4;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>16){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>17){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>18){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>19){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>19){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
+        X.v[4-1] += 5;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>20){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>21){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>22){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>23){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>23){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
+        X.v[4-1] += 6;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>24){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>25){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>26){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>27){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>27){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
+        X.v[4-1] += 7;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>28){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>29){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>30){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>31){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>31){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
+        X.v[4-1] += 8;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>32){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>33){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>34){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>35){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>35){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
+        X.v[4-1] += 9;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>36){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>37){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>38){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>39){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>39){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
+        X.v[4-1] += 10;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>40){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>41){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>42){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>43){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>43){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
+        X.v[4-1] += 11;     /* X.v[WCNT4-1] += r  */                \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>44){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>45){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>46){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>47){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>47){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
+        X.v[4-1] += 12;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>48){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>49){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>50){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>51){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>51){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
+        X.v[4-1] += 13;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>52){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>53){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>54){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>55){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>55){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
+        X.v[4-1] += 14;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>56){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>57){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>58){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>59){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>59){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
+        X.v[4-1] += 15;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>60){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>61){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>62){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>63){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>63){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
+        X.v[4-1] += 16;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>64){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>65){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>66){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>67){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>67){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
+        X.v[4-1] += 17;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    if(Nrounds>68){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>69){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>70){                                                     \
+        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
+        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>71){                                                     \
+        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
+        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
+    }                                                                   \
+    if(Nrounds>71){                                                     \
+        /* InjectKey(r=1) */                                            \
+        X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
+        X.v[4-1] += 18;     /* X.v[WCNT4-1] += r  */                 \
+    }                                                                   \
+                                                                        \
+    return X;                                                           \
+}                                                                       \
+                                                                        \
+ /** @ingroup ThreefryNxW */                                            \
+enum r123_enum_threefry4x##W { threefry4x##W##_rounds = THREEFRY4x##W##_DEFAULT_ROUNDS };       \
+R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
+R123_CUDA_DEVICE R123_STATIC_INLINE                                     \
+threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
+    return threefry4x##W##_R(threefry4x##W##_rounds, in, k);            \
+}
+
+#if R123_USE_64BIT
+_threefry2x_tpl(64)
+_threefry4x_tpl(64)
+#endif
+_threefry2x_tpl(32)
+_threefry4x_tpl(32)
+
+/* gcc4.5 and 4.6 seem to optimize a macro-ized threefryNxW better
+   than a static inline function.  Why?  */
+#define threefry2x32(c,k) threefry2x32_R(threefry2x32_rounds, c, k)
+#define threefry4x32(c,k) threefry4x32_R(threefry4x32_rounds, c, k)
+#define threefry2x64(c,k) threefry2x64_R(threefry2x64_rounds, c, k)
+#define threefry4x64(c,k) threefry4x64_R(threefry4x64_rounds, c, k)
+
+#if defined(__cplusplus)
+#define _threefryNxWclass_tpl(NxW)                                      \
+namespace r123{                                                     \
+template<unsigned int ROUNDS>                                                  \
+ struct Threefry##NxW##_R{                                              \
+    typedef threefry##NxW##_ctr_t ctr_type;                             \
+    typedef threefry##NxW##_key_t key_type;                             \
+    typedef threefry##NxW##_key_t ukey_type;                            \
+    static const R123_METAL_CONSTANT_ADDRESS_SPACE unsigned int rounds=ROUNDS;                            \
+   inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key)){ \
+        R123_STATIC_ASSERT(ROUNDS<=72, "threefry is only unrolled up to 72 rounds\n"); \
+        return threefry##NxW##_R(ROUNDS, ctr, key);                              \
+    }                                                                   \
+};                                                                      \
+ typedef Threefry##NxW##_R<threefry##NxW##_rounds> Threefry##NxW;       \
+} // namespace r123
+
+_threefryNxWclass_tpl(2x32)
+_threefryNxWclass_tpl(4x32)
+#if R123_USE_64BIT
+_threefryNxWclass_tpl(2x64)
+_threefryNxWclass_tpl(4x64)
+#endif
+
+/* The _tpl macros don't quite work to do string-pasting inside comments.
+   so we just write out the boilerplate documentation four times... */
+
+/** 
+@defgroup ThreefryNxW Threefry Classes and Typedefs
+
+The ThreefryNxW classes export the member functions, typedefs and
+operator overloads required by a @ref CBRNG "CBRNG" class.
+
+As described in  
+<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers:  As Easy as 1, 2, 3</i> </a>, 
+the Threefry family is closely related to the Threefish block cipher from
+<a href="http://www.skein-hash.info/"> Skein Hash Function</a>.  
+Threefry is \b not suitable for cryptographic use.
+
+Threefry uses integer addition, bitwise rotation, xor and permutation of words to randomize its output.
+
+@class r123::Threefry2x32_R 
+@ingroup ThreefryNxW
+
+exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
+
+The template argument, ROUNDS, is the number of times the Threefry round
+function will be applied.
+
+As of September 2011, the authors know of no statistical flaws with
+ROUNDS=13 or more for Threefry2x32.
+
+@typedef r123::Threefry2x32
+@ingroup ThreefryNxW
+  Threefry2x32 is equivalent to Threefry2x32_R<20>.    With 20 rounds,
+  Threefry2x32 has a considerable safety margin over the minimum number
+  of rounds with no known statistical flaws, but still has excellent
+   performance. 
+
+@class r123::Threefry2x64_R 
+@ingroup ThreefryNxW
+
+exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
+
+The template argument, ROUNDS, is the number of times the Threefry round
+function will be applied.
+
+In November 2011, the authors discovered that 13 rounds of
+Threefry2x64 sequenced by strided, interleaved key and counter
+increments failed a very long (longer than the default BigCrush
+length) WeightDistrub test.  At the same time, it was confirmed that
+14 rounds passes much longer tests (up to 5x10^12 samples) of a
+similar nature.  The authors know of no statistical flaws with
+ROUNDS=14 or more for Threefry2x64.
+
+@typedef r123::Threefry2x64
+@ingroup ThreefryNxW
+  Threefry2x64 is equivalent to Threefry2x64_R<20>.    With 20 rounds,
+  Threefry2x64 has a considerable safety margin over the minimum number
+  of rounds with no known statistical flaws, but still has excellent
+   performance. 
+
+
+
+@class r123::Threefry4x32_R 
+@ingroup ThreefryNxW
+
+exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
+
+The template argument, ROUNDS, is the number of times the Threefry round
+function will be applied.
+
+As of September 2011, the authors know of no statistical flaws with
+ROUNDS=12 or more for Threefry4x32.
+
+@typedef r123::Threefry4x32
+@ingroup ThreefryNxW
+  Threefry4x32 is equivalent to Threefry4x32_R<20>.    With 20 rounds,
+  Threefry4x32 has a considerable safety margin over the minimum number
+  of rounds with no known statistical flaws, but still has excellent
+   performance. 
+
+
+
+@class r123::Threefry4x64_R 
+@ingroup ThreefryNxW
+
+exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
+
+The template argument, ROUNDS, is the number of times the Threefry round
+function will be applied.
+
+As of September 2011, the authors know of no statistical flaws with
+ROUNDS=12 or more for Threefry4x64.
+
+@typedef r123::Threefry4x64
+@ingroup ThreefryNxW
+  Threefry4x64 is equivalent to Threefry4x64_R<20>.    With 20 rounds,
+  Threefry4x64 has a considerable safety margin over the minimum number
+  of rounds with no known statistical flaws, but still has excellent
+   performance. 
+*/
+
+#endif
+
+#endif
diff --git a/ext/random123/include/Random123/u01fixedpt.h b/ext/random123/include/Random123/u01fixedpt.h
new file mode 100644
index 0000000000000000000000000000000000000000..2058f8b57efcbb14a82f7d14def1066c24732dc9
--- /dev/null
+++ b/ext/random123/include/Random123/u01fixedpt.h
@@ -0,0 +1,200 @@
+/*
+Copyright 2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef _random123_ufixed01_dot_h_
+#define _random123_ufixed01_dot_h_
+
+#include "features/compilerfeatures.h"
+
+/** @defgroup u01fixedpt The u01fixedpt conversion functions
+
+    These functions convert unsigned W-bit integers to uniformly
+    spaced real values (float or double) between 0.0 and 1.0 with
+    mantissas of M bits.
+
+    PLEASE THINK CAREFULLY BEFORE USING THESE FUNCTIONS.  THEY MAY
+    NOT BE WHAT YOU WANT.  YOU MAY BE MUCH BETTER SERVED BY THE
+    FUNCTIONS IN ./uniform.hpp.
+
+    These functions produce a finite number *uniformly spaced* values
+    in the range from 0.0 to 1.0 with uniform probability.  The price
+    of uniform spacing is that they may not utilize the entire space
+    of possible outputs.  E.g., u01fixedpt_closed_open_32_24 will never
+    produce a non-zero value less than 2^-24, even though such values
+    are representable in single-precision floating point.
+
+    There are 12 functions, corresponding to the following choices:
+
+     -  W = 32 or 64
+     -  M = 24 (float) or 53 (double)
+     -  open0 or closed0 : whether the output is open or closed at 0.0
+     -  open1 or closed1 : whether the output is open or closed at 1.0 
+
+    The W=64 M=24 cases are not implemented.  To obtain an M=24 float
+    from a uint64_t, use a cast (possibly with right-shift and bitwise
+    and) to convert some of the bits of the uint64_t to a uint32_t and
+    then use u01fixedpt_x_y_32_float.  Note that the 64-bit random integers
+    produced by the Random123 library are random in "all the bits", so
+    with a little extra effort you can obtain two floats this way --
+    one from the high bits and one from the low bits of the 64-bit
+    value.
+
+    If the output is open at one end, then the extreme
+    value (0.0 or 1.0) will never be returned.  Conversely, if the output
+    is closed at one end, then the extreme value is a possible
+    return value.
+
+    The values returned are as follows.  All values are returned
+    with equal frequency, except as noted in the closed_closed case:
+
+     closed_open:  Let P=min(M,W)
+        there are 2^P possible output values:
+        {0, 1, 2, ..., 2^P-1}/2^P
+
+     open_closed:  Let P=min(M,W)
+        there are 2^P possible values:
+        {1, 2, ..., 2^P}/2^P
+
+     open_open:   Let P=min(M, W+1) 
+        there are 2^(P-1) possible values:
+        {1, 3, 5, ..., 2^P-1}/2^P
+
+     closed_closed:  Let P=min(M, W-1)
+        there are 1+2^P possible values:
+        {0, 1, 2, ... 2^P}/2^P
+        The extreme values (0.0 and 1.0) are
+        returned with half the frequency of
+        all others.
+    
+    On x86 hardware, especially on 32bit machines, the use of
+    internal 80bit x87-style floating point may result in
+    'bonus' precision, which may cause closed intervals to not
+    be really closed, i.e. the conversions below might not
+    convert UINT{32,64}_MAX to 1.0.  This sort of issue is
+    likely to occur when storing the output of a u01fixedpt_*_32_float
+    function in a double, though one can imagine getting extra
+    precision artifacts when going from 64_53 as well.  Other
+    artifacts may exist on some GPU hardware.  The tests in
+    kat_u01_main.h try to expose such issues, but caveat emptor.
+
+    @cond HIDDEN_FROM_DOXYGEN
+ */
+
+/* Hex floats were standardized by C in 1999, but weren't standardized
+   by C++ until 2011.  So, we're obliged to write out our constants in
+   decimal, even though they're most naturally expressed in binary.
+   We cross our fingers and hope that the compiler does the compile-time
+   constant arithmetic properly.
+*/
+#define R123_0x1p_31f (1.f/(1024.f*1024.f*1024.f*2.f))
+#define R123_0x1p_24f (128.f*R123_0x1p_31f)
+#define R123_0x1p_23f (256.f*R123_0x1p_31f)
+#define R123_0x1p_32  (1./(1024.*1024.*1024.*4.))
+#define R123_0x1p_63 (2.*R123_0x1p_32*R123_0x1p_32)
+#define R123_0x1p_53 (1024.*R123_0x1p_63)
+#define R123_0x1p_52 (2048.*R123_0x1p_63)
+
+/** @endcond */
+
+#ifndef R123_USE_U01_DOUBLE
+#define R123_USE_U01_DOUBLE 1
+#endif
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+/* narrowing conversions:  uint32_t to float */
+R123_CUDA_DEVICE R123_STATIC_INLINE float u01fixedpt_closed_closed_32_float(uint32_t i){
+    /* N.B.  we ignore the high bit, so output is not monotonic */
+    return ((i&0x7fffffc0) + (i&0x40))*R123_0x1p_31f; /* 0x1.p-31f */
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE float u01fixedpt_closed_open_32_float(uint32_t i){
+    return (i>>8)*R123_0x1p_24f; /* 0x1.0p-24f; */
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE float u01fixedpt_open_closed_32_float(uint32_t i){
+    return (1+(i>>8))*R123_0x1p_24f; /* *0x1.0p-24f; */
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE float u01fixedpt_open_open_32_float(uint32_t i){
+    return (0.5f+(i>>9))*R123_0x1p_23f; /* 0x1.p-23f; */
+}
+
+#if R123_USE_U01_DOUBLE
+/* narrowing conversions:  uint64_t to double */
+R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_closed_closed_64_double(uint64_t i){
+    /* N.B.  we ignore the high bit, so output is not monotonic */
+    return ((i&R123_64BIT(0x7ffffffffffffe00)) + (i&0x200))*R123_0x1p_63; /* 0x1.p-63; */
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_closed_open_64_double(uint64_t i){
+    return (i>>11)*R123_0x1p_53; /* 0x1.0p-53; */
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_open_closed_64_double(uint64_t i){
+    return (1+(i>>11))*R123_0x1p_53; /* 0x1.0p-53; */
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_open_open_64_double(uint64_t i){
+    return (0.5+(i>>12))*R123_0x1p_52; /* 0x1.0p-52; */
+}
+
+/* widening conversions:  u32 to double */
+R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_closed_closed_32_double(uint32_t i){
+    /* j = i+(i&1) takes on 2^31+1 possible values with a 'trapezoid' distribution:
+      p_j =  1 0 2 0 2 .... 2 0 2 0 1
+      j   =  0 1 2 3 4 ....        2^32
+      by converting to double *before* doing the add, we don't wrap the high bit.
+    */
+    return (((double)(i&1)) + i)*R123_0x1p_32; /* 0x1.p-32; */
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_closed_open_32_double(uint32_t i){
+    return i*R123_0x1p_32; /* 0x1.p-32; */
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_open_closed_32_double(uint32_t i){
+    return (1.+i)*R123_0x1p_32; /* 0x1.p-32; */
+}
+
+R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_open_open_32_double(uint32_t i){
+    return (0.5+i)*R123_0x1p_32; /* 0x1.p-32; */
+}
+#endif /* R123_USE_U01_DOUBLE */
+
+#ifdef __cplusplus
+}
+#endif
+
+/** @} */
+#endif
diff --git a/ext/random123/include/Random123/uniform.hpp b/ext/random123/include/Random123/uniform.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a815066ae8d302f7fba8c57c1feec56979fc5bd8
--- /dev/null
+++ b/ext/random123/include/Random123/uniform.hpp
@@ -0,0 +1,310 @@
+/*
+Copyright 2010-2011, D. E. Shaw Research.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions, and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions, and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of D. E. Shaw Research nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __r123_uniform_dot_hpp
+#define __r123_uniform_dot_hpp
+
+/** @defgroup uniform Uniform distribution scalar conversion functions
+
+This file provides some simple functions that can be used to convert
+integers of various widths to floats and doubles with various
+characteristics.  It can be used to generate real-valued, uniformly
+distributed random variables from the random integers produced by
+the Random123 CBRNGs.
+
+There are three templated functions:
+
+ - u01:  output is as dense as possible in (0,1}, never 0.0.  May
+    return 1.0 if and only if the number of output mantissa bits
+    is less than the width of the input.
+
+ - uneg11:  output is as dense as possible in {-1,1}, never 0.0.  May
+    return 1.0 or -1.0 if and only if the number of output mantissa bits
+    is less than the width of the input.
+
+ - u01fixedpt:  output is "fixed point", equispaced, open at both ends, 
+     and is never 0.0, 0.5 nor 1.0.
+
+The behavior of u01 and uneg11 depend on the pre-processor symbol:
+R123_UNIFORM_FLOAT_STORE.  When #defined to a non-zero value, u01
+and uneg11 declare a volatile intermediate result, with the
+intention of forcing architectures that have "extra bits" in their
+floating point registers to more closely conform to IEEE
+arithmetic.  When compiled this way, u01 and uneg11 will be
+significantly slower, as they will incur a memory write and read on
+every call.  Without it, they may fail the "known answer test"
+implemented in ut_uniform_IEEEkat.cpp even though they perform
+perfectly reasonable int to float conversions.  We have used
+this option to get 32-bit x86 to produce the same results as
+64-bit x86-64 code, but we do not recommend it for normal
+use.
+
+Three additional functions are defined when C++11 or newer is in use:
+
+ - u01all
+ - uneg11all
+ - u01fixedptall
+
+These functions apply the corresponding conversion to every
+element of their argument, which must be a staticly sized
+array, e.g., an r123array or a std::array of an integer type.
+
+This file may not be as portable, and has not been tested as
+rigorously as other files in the library, e.g., the generators.
+Nevertheless, we hope it is useful and we encourage developers to
+copy it and modify it for their own use.  We invite comments and
+improvements.
+*/
+
+#include <Random123/features/compilerfeatures.h>
+#include <limits>
+#if R123_USE_CXX11_TYPE_TRAITS
+#include <type_traits>
+#endif
+#if __cplusplus >= 201103L
+#include <array>
+#endif
+
+namespace r123{
+/**
+@{
+@cond HIDDEN_FROM_DOXYGEN
+*/
+
+#if R123_USE_CXX11_TYPE_TRAITS
+using std::make_signed;
+using std::make_unsigned;
+#else
+// Sigh... We could try to find another <type_traits>, e.g., from
+// boost or TR1.  Or we can do it ourselves in the r123 namespace.
+// It's not clear which will cause less headache...
+template <typename T> struct make_signed{};
+template <typename T> struct make_unsigned{};
+#define R123_MK_SIGNED_UNSIGNED(ST, UT)                 \
+template<> struct make_signed<ST>{ typedef ST type; }; \
+template<> struct make_signed<UT>{ typedef ST type; }; \
+template<> struct make_unsigned<ST>{ typedef UT type; }; \
+template<> struct make_unsigned<UT>{ typedef UT type; }
+
+R123_MK_SIGNED_UNSIGNED(int8_t, uint8_t);
+R123_MK_SIGNED_UNSIGNED(int16_t, uint16_t);
+R123_MK_SIGNED_UNSIGNED(int32_t, uint32_t);
+R123_MK_SIGNED_UNSIGNED(int64_t, uint64_t);
+#if R123_USE_GNU_UINT128
+R123_MK_SIGNED_UNSIGNED(__int128_t, __uint128_t);
+#endif
+#undef R123_MK_SIGNED_UNSIGNED
+#endif
+
+#if defined(__CUDACC__) || defined(_LIBCPP_HAS_NO_CONSTEXPR)
+// Amazing! cuda thinks numeric_limits::max() is a __host__ function, so
+// we can't use it in a device function.  
+//
+// The LIBCPP_HAS_NO_CONSTEXP test catches situations where the libc++
+// library thinks that the compiler doesn't support constexpr, but we
+// think it does.  As a consequence, the library declares
+// numeric_limits::max without constexpr.  This workaround should only
+// affect a narrow range of compiler/library pairings.
+// 
+// In both cases, we find max() by computing ~(unsigned)0 right-shifted
+// by is_signed.
+template <typename T>
+R123_CONSTEXPR R123_STATIC_INLINE R123_CUDA_DEVICE T maxTvalue(){
+    typedef typename make_unsigned<T>::type uT;
+    return (~uT(0)) >> std::numeric_limits<T>::is_signed;
+ }
+#else
+template <typename T>
+R123_CONSTEXPR R123_STATIC_INLINE T maxTvalue(){
+    return std::numeric_limits<T>::max();
+}
+#endif
+/** @endcond
+    @}
+ */
+
+//! Return a uniform real value in (0, 1]
+/**
+    @ingroup uniform
+     Input is a W-bit integer (signed or unsigned).  It is cast to
+     a W-bit unsigned integer, multiplied by Ftype(2^-W) and added to
+     Ftype(2^(-W-1)).  A good compiler should optimize it down to an
+     int-to-float conversion followed by a multiply and an add, which
+     might be fused, depending on the architecture.
+   
+    If the input is a uniformly distributed integer, and if Ftype
+    arithmetic follows IEEE754 round-to-nearest rules, then the
+    result is a uniformly distributed floating point number in (0, 1].
+
+-    The result is never exactly 0.0.  
+-    The smallest value returned is 2^-(W-1).
+-    Let M be the number of mantissa bits in Ftype (typically 24 or 53).
+  -    If W>M  then the largest value retured is 1.0.
+  -    If W<=M then the largest value returned is Ftype(1.0 - 2^(-W-1)).
+*/
+template <typename Ftype, typename Itype>
+R123_CUDA_DEVICE R123_STATIC_INLINE Ftype u01(Itype in){
+    typedef typename make_unsigned<Itype>::type Utype;
+    R123_CONSTEXPR Ftype factor = Ftype(1.)/(maxTvalue<Utype>() + Ftype(1.));
+    R123_CONSTEXPR Ftype halffactor = Ftype(0.5)*factor;
+#if R123_UNIFORM_FLOAT_STORE
+    volatile Ftype x = Utype(in)*factor; return x+halffactor;
+#else
+    return Utype(in)*factor + halffactor;
+#endif
+}
+
+//! Return a signed value in [-1,1]
+/**
+    @ingroup uniform
+   The argument is converted to a W-bit signed integer, multiplied by Ftype(2^-(W-1)) and
+   then added to Ftype(2^-W).  A good compiler should optimize
+   it down to an int-to-float conversion followed by a multiply and
+   an add, which might be fused, depending on the architecture.
+
+ If the input is a uniformly distributed integer, and if Ftype
+ arithmetic follows IEEE754 round-to-nearest rules, then the
+ output is a uniformly distributed floating point number in [-1, 1].
+
+- The result is never exactly 0.0.
+- The smallest absolute value returned is 2^-W
+- Let M be the number of mantissa bits in Ftype.
+  - If W>M  then the largest value retured is 1.0 and the smallest is -1.0.
+  - If W<=M then the largest value returned is the Ftype(1.0 - 2^-W)
+    and the smallest value returned is -Ftype(1.0 - 2^-W).
+*/
+template <typename Ftype, typename Itype>
+R123_CUDA_DEVICE R123_STATIC_INLINE Ftype uneg11(Itype in){
+    typedef typename make_signed<Itype>::type Stype;
+    R123_CONSTEXPR Ftype factor = Ftype(1.)/(maxTvalue<Stype>() + Ftype(1.));
+    R123_CONSTEXPR Ftype halffactor = Ftype(0.5)*factor;
+#if R123_UNIFORM_FLOAT_STORE
+    volatile Ftype x = Stype(in)*factor; return x+halffactor;
+#else
+    return Stype(in)*factor + halffactor;
+#endif
+}
+
+//! Return a value in (0,1) chosen from a set of equally spaced fixed-point values
+/**
+    @ingroup uniform
+   Let:
+     - W = width of Itype, e.g., 32 or 64, regardless of signedness.
+     - M = mantissa bits of Ftype, e.g., 24, 53 or 64
+     - B = min(M, W)
+
+   Then the 2^(B-1) possible output values are: 2^-B*{1, 3, 5, ..., 2^B - 1}
+
+   The smallest output is: 2^-B
+
+   The largest output is:  1 - 2^-B
+
+   The output is never exactly 0.0, nor 0.5, nor 1.0.
+
+   The 2^(B-1) possible outputs:
+     - are equally likely,
+     - are uniformly spaced by 2^-(B-1),
+     - are balanced around 0.5
+*/
+template <typename Ftype, typename Itype>
+R123_CUDA_DEVICE R123_STATIC_INLINE Ftype u01fixedpt(Itype in){
+    typedef typename make_unsigned<Itype>::type Utype;
+    R123_CONSTEXPR int excess = std::numeric_limits<Utype>::digits - std::numeric_limits<Ftype>::digits;
+    if(excess>=0){
+        R123_CONSTEXPR int ex_nowarn = (excess>=0) ? excess : 0;
+        R123_CONSTEXPR Ftype factor = Ftype(1.)/(Ftype(1.) + ((maxTvalue<Utype>()>>ex_nowarn)));
+        return (1 | (Utype(in)>>ex_nowarn)) * factor;
+    }else
+        return u01<Ftype>(in);
+}
+
+#if R123_USE_CXX11_STD_ARRAY
+
+//! Apply u01 to every item in an r123array, returning a std::array
+/** @ingroup uniform
+ * Only in C++11 and newer.
+ * The argument type may be any integer collection with a constexpr static_size member,
+ * e.g., an r123array or a std::array of an integer type.
+ */
+template <typename Ftype, typename CollType>
+static inline
+std::array<Ftype, CollType::static_size> u01all(CollType in)
+{
+    std::array<Ftype, CollType::static_size> ret;
+    size_t i=0;
+    for(auto e : in){
+        ret[i++] = u01<Ftype>(e);
+    }
+    return ret;
+}
+
+//! Apply uneg11 to every item in an r123array, returning a std::array
+/** @ingroup uniform
+ * Only in C++11 and newer.
+ * The argument type may be any integer collection with a constexpr static_size member,
+ * e.g., an r123array or a std::array of an integer type.
+ */
+template <typename Ftype, typename CollType>
+static inline
+std::array<Ftype, CollType::static_size> uneg11all(CollType in)
+{
+    std::array<Ftype, CollType::static_size> ret;
+    size_t i=0;
+    for(auto e : in){
+        ret[i++] = uneg11<Ftype>(e);
+    }
+    return ret;
+}
+
+//! Apply u01fixedpt to every item in an r123array, returning a std::array
+/** @ingroup uniform 
+ * Only in C++11 and newer.
+ * The argument type may be any integer collection with a constexpr static_size member,
+ * e.g., an r123array or a std::array of an integer type.
+*/
+template <typename Ftype, typename CollType>
+static inline
+std::array<Ftype, CollType::static_size> u01fixedptall(CollType in)
+{
+    std::array<Ftype, CollType::static_size> ret;
+    size_t i=0;
+    for(auto e : in){
+        ret[i++] = u01fixedpt<Ftype>(e);
+    }
+    return ret;
+}
+#endif // __cplusplus >= 201103L
+
+} // namespace r123
+
+#endif
+
diff --git a/test/unit/test_morph_expr.cpp b/test/unit/test_morph_expr.cpp
index 60d104e992458d8b64e0ad760c0f0e17b4d3317f..5a06a27586ea7c5e24b260f12830b9b3ea21ee39 100644
--- a/test/unit/test_morph_expr.cpp
+++ b/test/unit/test_morph_expr.cpp
@@ -24,6 +24,22 @@ namespace arb {
     }
 }
 
+::testing::AssertionResult mlocation_eq(mlocation a, mlocation b) {
+    if (a.branch!=b.branch) {
+        return ::testing::AssertionFailure()
+                << "cables " << a << " and " << b << " differ";
+    }
+
+    using FP = testing::internal::FloatingPoint<double>;
+    if (FP(a.pos).AlmostEquals(FP(b.pos))) {
+        return ::testing::AssertionSuccess();
+    }
+    else {
+        return ::testing::AssertionFailure()
+                << "mlocations " << a << " and " << b << " differ";
+    }
+}
+
 ::testing::AssertionResult cable_eq(mcable a, mcable b) {
     if (a.branch!=b.branch) {
         return ::testing::AssertionFailure()
@@ -54,6 +70,20 @@ namespace arb {
     return ::testing::AssertionSuccess();
 }
 
+::testing::AssertionResult mloctionlist_eq(const mlocation_list& as, const mlocation_list& bs) {
+    if (as.size()!=bs.size()) {
+        return ::testing::AssertionFailure()
+                << "cablelists " << as << " and " << bs << " differ";
+    }
+
+    for (auto i: util::count_along(as)) {
+        auto result = mlocation_eq(as[i], bs[i]);
+        if (!result) return ::testing::AssertionFailure()
+                    << "mlocation lists " << as << " and " << bs << " differ";
+    }
+    return ::testing::AssertionSuccess();
+}
+
 TEST(region, expr_repn) {
     using util::to_string;
 
@@ -268,9 +298,91 @@ TEST(locset, thingify) {
         // In the absence of a spherical root, there is no branch 4.
         EXPECT_THROW(thingify(begb4, mp), no_such_branch);
     }
+    {
+        mprovider mp(morphology(sm, false));
+
+        auto all = reg::all();
+        auto ls0 = thingify(ls::uniform(all,  0,  9, 12), mp);
+        auto ls1 = thingify(ls::uniform(all,  0,  9, 12), mp);
+        auto ls2 = thingify(ls::uniform(all, 10, 19, 12), mp);
+        auto ls3 = thingify(ls::uniform(all,  0,  9, 13), mp);
+        auto ls4 = thingify(ls::uniform(all,  5,  6, 12), mp);
+        auto ls5 = thingify(ls::uniform(all,  2,  5, 12), mp);
+        auto ls6 = thingify(ls::uniform(all,  5, 11, 12), mp);
+
+        EXPECT_EQ(ls0, ls1);
+
+        bool found_none = true;
+        for (auto l: ls2) {
+            auto it = std::find(ls0.begin(), ls0.end(), l);
+            if (it != ls0.end()) {
+                found_none = false;
+            }
+        }
+        EXPECT_TRUE(found_none);
+
+        found_none = true;
+        for (auto l: ls3) {
+            auto it = std::find(ls0.begin(), ls0.end(), l);
+            if (it != ls0.end()) {
+                found_none = false;
+            }
+        }
+        EXPECT_TRUE(found_none);
+
+        bool found_all = true;
+        for (auto l: ls4) {
+            auto it = std::find(ls0.begin(), ls0.end(), l);
+            if (it == ls0.end()) {
+                found_all = false;
+            }
+        }
+        EXPECT_TRUE(found_all);
+
+        int found = 0;
+        for (auto l: ls5) {
+            auto it = std::find(ls4.begin(), ls4.end(), l);
+            if (it != ls4.end()) found++;
+        }
+        EXPECT_TRUE(found == 1);
+
+        found = 0;
+        for (auto l: ls6) {
+            auto it = std::find(ls4.begin(), ls4.end(), l);
+            if (it != ls4.end()) found++;
+        }
+        EXPECT_TRUE(found == 2);
+    }
+    {
+        mprovider mp(morphology(sm, false));
+        auto sub_reg = join(reg::cable(0, 0.2, 0.7), reg::cable(1, 0.1, 1), reg::cable(3, 0.5, 0.6));
+
+        auto ls0 = thingify(ls::uniform(sub_reg, 0, 10000, 72), mp);
+        for (auto l: ls0) {
+            switch(l.branch) {
+                case 0: {
+                    if (l.pos < 0.2 || l.pos > 0.7) FAIL();
+                    break;
+                }
+                case 1: {
+                    if (l.pos < 0.1 || l.pos > 1) FAIL();
+                    break;
+                }
+                case 3: {
+                    if (l.pos < 0.5 || l.pos > 0.6) FAIL();
+                    break;
+                }
+                default: {
+                    FAIL();
+                    break;
+                }
+            }
+            SUCCEED();
+        }
+    }
 }
 
-TEST(region, thingify) {
+TEST(region, thingify_simple_morphologies) {
     using pvec = std::vector<msize_t>;
     using svec = std::vector<msample>;
     using cl = mcable_list;
@@ -352,16 +464,45 @@ TEST(region, thingify) {
         sample_tree sm(samples, parents);
         mprovider mp(morphology(sm, true));
 
+        using ls::location;
         using reg::tagged;
+        using reg::distal_interval;
+        using reg::proximal_interval;
         using reg::branch;
+        using reg::cable;
         using reg::all;
 
+        locset mid0_   = location(0,0.5);
+        locset start1_ = location(1,0);
+        locset end1_   = location(1,1);
+
+        auto reg0_ = distal_interval(start1_, 45);
+        auto reg1_ = distal_interval(mid0_,   74);
+        auto reg2_ = proximal_interval(end1_, 45);
+        auto reg3_ = proximal_interval(end1_, 91);
+        auto reg4_ = distal_interval(end1_, 0);
+        auto reg5_ = distal_interval(start1_, 0);
+        auto reg6_ = proximal_interval(start1_, 0);
+
         EXPECT_EQ(thingify(tagged(1), mp), (mcable_list{{0,0,1}}));
         EXPECT_EQ(thingify(tagged(2), mp), (mcable_list{{2,0,1}}));
         EXPECT_EQ(thingify(tagged(3), mp), (mcable_list{{1,0,1}}));
         EXPECT_EQ(thingify(join(tagged(1), tagged(2), tagged(3)), mp), (mcable_list{{0,0,1}, {1,0,1}, {2,0,1}}));
         EXPECT_EQ(thingify(join(tagged(1), tagged(2), tagged(3)), mp), thingify(all(), mp));
+        EXPECT_EQ(thingify(reg0_, mp), (mcable_list{{1,0,0.5}}));
+        EXPECT_EQ(thingify(reg1_, mp), (mcable_list{{0,0.5,1}, {1,0,0.8}, {2,0,0.8}}));
+        EXPECT_EQ(thingify(reg2_, mp), (mcable_list{{1,0.5,1}}));
+        EXPECT_EQ(thingify(reg3_, mp), (mcable_list{{0, 0.75, 1}, {1,0,1}}));
+        EXPECT_EQ(thingify(reg4_, mp), (mcable_list{{1,1,1}}));
+        EXPECT_EQ(thingify(reg5_, mp), (mcable_list{{0,1,1}}));
+        EXPECT_EQ(thingify(reg6_, mp), (mcable_list{{0,1,1}}));
     }
+}
+
+TEST(region, thingify_moderate_morphologies) {
+    using pvec = std::vector<msize_t>;
+    using svec = std::vector<msample>;
+    using cl = mcable_list;
 
     // Test multi-level morphologies.
     //
@@ -382,21 +523,28 @@ TEST(region, thingify) {
     {
         pvec parents = {mnpos, 0, 1, 0, 3, 4, 4, 6};
         svec samples = {
-            {{  0,  0,  0,  2}, 1},
-            {{ 10,  0,  0,  2}, 3},
-            {{100,  0,  0,  2}, 3},
-            {{  0, 10,  0,  2}, 2},
-            {{  0,100,  0,  2}, 2},
+            {{  0,  0,  0,  1}, 1},
+            {{ 10,  0,  0,  1}, 3},
+            {{100,  0,  0,  3}, 3},
+            {{  0, 10,  0,  1}, 2},
+            {{  0,100,  0,  5}, 2},
             {{100,100,  0,  2}, 4},
-            {{  0,200,  0,  2}, 3},
-            {{  0,300,  0,  2}, 3},
+            {{  0,200,  0,  1}, 3},
+            {{  0,300,  0,  3}, 3},
         };
         sample_tree sm(samples, parents);
 
         // Without spherical root
         mprovider mp(morphology(sm, false));
 
+        using ls::location;
         using reg::tagged;
+        using reg::distal_interval;
+        using reg::proximal_interval;
+        using reg::radius_lt;
+        using reg::radius_le;
+        using reg::radius_gt;
+        using reg::radius_ge;
         using reg::branch;
         using reg::all;
         using reg::cable;
@@ -419,11 +567,10 @@ TEST(region, thingify) {
         mcable b3_{3,0,1};
         cl all_  = {b0_,b1_,b2_,b3_};
 
-        mcable end1_{1,1,1};
-        mcable root_{0,0,0};
+        mcable c_end1_{1,1,1};
+        mcable c_root_{0,0,0};
 
         EXPECT_EQ(thingify(all(), mp), all_);
-        EXPECT_EQ(thingify(soma, mp), empty_);
         EXPECT_EQ(thingify(axon, mp), (cl{b1_}));
         EXPECT_EQ(thingify(dend, mp), (cl{b0_,b3_}));
         EXPECT_EQ(thingify(apic, mp), (cl{b2_}));
@@ -432,9 +579,60 @@ TEST(region, thingify) {
 
         // Test that intersection correctly generates zero-length cables at
         // parent-child interfaces.
-        EXPECT_EQ(thingify(intersect(apic, dend), mp), (cl{end1_}));
-        EXPECT_EQ(thingify(intersect(apic, axon), mp), (cl{end1_}));
-        EXPECT_EQ(thingify(intersect(axon, dend), mp), (cl{root_, end1_}));
+        EXPECT_EQ(thingify(intersect(apic, dend), mp), (cl{c_end1_}));
+        EXPECT_EQ(thingify(intersect(apic, axon), mp), (cl{c_end1_}));
+        EXPECT_EQ(thingify(intersect(axon, dend), mp), (cl{c_root_, c_end1_}));
+
+        // Test distal and proximal interavls
+        auto start0_         = location(0, 0   );
+        auto quar_1_         = location(1, 0.25);
+        auto mid1_           = location(1, 0.5 );
+        auto end1_           = location(1, 1   );
+        auto mid2_           = location(2, 0.5 );
+        auto end2_           = location(2, 1   );
+        auto mid3_           = location(3, 0.5 );
+        auto loc_3_0_        = location(3, 0.4 );
+        auto loc_3_1_        = location(3, 0.65);
+        auto mid_3_          = location(3, 0.5 );
+        auto reg_a_ = join(cable(0,0.1,0.4), cable(2,0,1), cable(3,0.1,0.4));
+        auto reg_b_ = join(cable(0,0.1,0.4), cable(2,0,1), cable(3,0.1,0.3));
+        auto reg_c_ = join(cable(0,0,0.7), cable(2,0,0.5), cable(3,0.1,0.4), cable(3,0.9,1));
+        auto reg_d_ = join(cable(0,0,0.7), cable(2,0,0.5), cable(3,0.1,0.9));
+
+        // Distal from point and/or interval
+        EXPECT_TRUE(cablelist_eq(thingify(distal_interval(start0_, 1000), mp), (mcable_list{{0,0,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(distal_interval(quar_1_,  150), mp), (mcable_list{{1,0.25,1}, {2,0,0.75}, {3,0,0.375}})));
+        EXPECT_TRUE(cablelist_eq(thingify(distal_interval(mid1_,   1000), mp), (mcable_list{{1,0.5,1}, {2,0,1}, {3,0,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(distal_interval(mid1_,    150), mp), (mcable_list{{1,0.5,1}, {2,0,1}, {3,0,0.5}})));
+        EXPECT_TRUE(cablelist_eq(thingify(distal_interval(end1_,    100), mp), (mcable_list{{2,0,1},{3,0,0.5}})));
+        EXPECT_TRUE(cablelist_eq(thingify(distal_interval(join(quar_1_, mid1_),    150), mp), (mcable_list{{1,0.25,1}, {2,0,1}, {3,0,0.5}})));
+        EXPECT_TRUE(cablelist_eq(thingify(distal_interval(join(quar_1_, loc_3_1_), 150), mp), (mcable_list{{1,0.25,1}, {2,0,0.75}, {3,0,0.375}, {3,0.65,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(distal_interval(join(quar_1_, loc_3_1_), 150), mp), (mcable_list{{1,0.25,1}, {2,0,0.75}, {3,0,0.375}, {3,0.65,1}})));
+
+        // Proximal from point and/or interval
+        EXPECT_TRUE(cablelist_eq(thingify(proximal_interval(mid3_, 100), mp), (mcable_list{{3,0,0.5}})));
+        EXPECT_TRUE(cablelist_eq(thingify(proximal_interval(mid3_, 150), mp), (mcable_list{{1,0.5,1}, {3,0,0.5}})));
+        EXPECT_TRUE(cablelist_eq(thingify(proximal_interval(end2_, 150), mp), (mcable_list{{1,0.5,1}, {2,0,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(proximal_interval(end2_, 500), mp), (mcable_list{{1,0,1}, {2,0,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(proximal_interval(loc_3_0_, 100), mp), (mcable_list{{1,0.8,1}, {3,0,0.4}})));
+        EXPECT_TRUE(cablelist_eq(thingify(proximal_interval(join(loc_3_0_, mid2_), 120), mp), (mcable_list{{1,0.3,1}, {2,0,0.5}, {3, 0, 0.4}})));
+
+        // Test radius_lt and radius_gt
+        EXPECT_TRUE(cablelist_eq(thingify(radius_lt(all(), 2), mp), (mcable_list{{0,0,0.55}, {1,0,0.325}, {3,0.375,0.75}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_lt(all(), 3), mp), (mcable_list{{0,0,1}, {1,0,0.55}, {2,6.0/9.0,1}, {3,0.25,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_gt(all(), 2), mp), (mcable_list{{0,0.55,1}, {1,0.325,1}, {2,0,1}, {3,0,0.375}, {3,0.75,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_gt(all(), 3), mp), (mcable_list{{1,0.55,1}, {2,0,6.0/9.0}, {3,0,0.25}})));
+
+        EXPECT_TRUE(cablelist_eq(thingify(radius_le(all(), 2), mp), (mcable_list{{0,0,0.55}, {1,0,0.325}, {2,1,1}, {3,0.375,0.75}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_le(all(), 3), mp), (mcable_list{{0,0,1}, {1,0,0.55}, {2,6.0/9.0,1}, {3,0.25,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_ge(all(), 2), mp), (mcable_list{{0,0.55,1}, {1,0.325,1}, {2,0,1}, {3,0,0.375}, {3,0.75,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_ge(all(), 3), mp), (mcable_list{{1,0.55,1}, {2,0,6.0/9.0}, {3,0,0.25}})));
+
+        EXPECT_TRUE(cablelist_eq(thingify(radius_lt(reg_a_, 2), mp), (mcable_list{{0,0.1,0.4},{3,0.375,0.4}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_gt(reg_a_, 2), mp), (mcable_list{{2,0,1},{3,0.1,0.375}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_lt(reg_b_, 2), mp), (mcable_list{{0,0.1,0.4}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_gt(reg_c_, 2), mp), (mcable_list{{0,0.55,0.7},{2,0,0.5},{3,0.1,0.375},{3,0.9,1}})));
+        EXPECT_TRUE(cablelist_eq(thingify(radius_gt(reg_d_, 2), mp), (mcable_list{{0,0.55,0.7},{2,0,0.5},{3,0.1,0.375},{3,0.75,0.9}})));
 
         // Test some more interesting intersections and unions.
 
@@ -570,6 +768,11 @@ TEST(region, thingify) {
         EXPECT_EQ(thingify(join(lhs, rhs), mp), ror);
 
     }
+}
+TEST(region, thingify_complex_morphologies) {
+    using pvec = std::vector<msize_t>;
+    using svec = std::vector<msample>;
+    using cl = mcable_list;
     {
         pvec parents = {mnpos, 0, 1, 0, 3, 4, 5, 5, 7, 7, 4, 10};
         svec samples = {
@@ -588,8 +791,6 @@ TEST(region, thingify) {
         };
         sample_tree sm(samples, parents);
         auto m = morphology(sm, false);
-        std::cout << m.branch_parent(7);
-
         {
             auto in = cl{{0,0,0},{1,0,0.5},{1,1,1},{2,0,1},{2,1,1},{3,1,1},{4,0,1},{5,1,1},{7,0,1}};
             auto out = reg::remove_covered_points(in, m);
@@ -604,5 +805,104 @@ TEST(region, thingify) {
             auto expected = cl{{1,0,0.5},{3,1,1},{4,0,1},{5,1,1},{7,0,1}};
             EXPECT_TRUE(cablelist_eq(out, expected));
         }
+        {
+            mprovider mp(m);
+            using reg::cable;
+            using ls::most_distal;
+            using ls::most_proximal;
+
+            auto reg_a_ = join(cable(0,0.1,0.4), cable(0,0,0.9), cable(1,0.1,0.4));
+            auto reg_b_ = join(cable(0,0.1,0.4), cable(0,0,0.9), cable(1,0.1,0.4), cable(1,0.2,0.5));
+            auto reg_c_ = join(cable(0,0.1,0.4), cable(0,0,0.9), cable(1,0.1,0.4), cable(2,0.2,0.5));
+            auto reg_d_ = join(cable(2,0,0.9), cable(3,0.1,0.1), cable(4,0.1,0.6));
+            auto reg_e_ = join(cable(2,0,0.9), cable(4,0.1,0.1), cable(5,0.1,0.6));
+            auto reg_f_ = join(cable(7,0,1), cable(2,0,0.9), cable(4,0.1,0.1), cable(5,0.1,0.6));
+
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_distal(reg_a_), mp), mlocation_list{{0,0.9},{1,0.4}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_distal(reg_b_), mp), mlocation_list{{0,0.9},{1,0.5}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_distal(reg_c_), mp), mlocation_list{{0,0.9},{2,0.5}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_distal(reg_d_), mp), mlocation_list{{3,0.1},{4,0.6}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_distal(reg_e_), mp), mlocation_list{{5,0.6}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_distal(reg_f_), mp), mlocation_list{{5,0.6},{7,1}}));
+
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_proximal(reg_a_), mp), mlocation_list{{0,0}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_proximal(reg_b_), mp), mlocation_list{{0,0}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_proximal(reg_c_), mp), mlocation_list{{0,0}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_proximal(reg_d_), mp), mlocation_list{{2,0}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_proximal(reg_e_), mp), mlocation_list{{2,0}}));
+            EXPECT_TRUE(mloctionlist_eq(thingify(most_proximal(reg_f_), mp), mlocation_list{{2,0}}));
+        }
+    }
+    {
+        pvec parents = {mnpos, 0, 1, 1, 2, 3, 0, 6, 7, 8, 7};
+        svec samples = {
+                {{  0, 10, 10,  1}, 1},
+                {{  0, 30, 30,  1}, 2},
+                {{  0, 60,-20,  1}, 2},
+                {{  0, 90, 70,  1}, 2},
+                {{  0, 80,-10,  1}, 2},
+                {{  0,100,-40,  1}, 2},
+                {{  0,-50,-50,  1}, 2},
+                {{  0, 20,-30,  2}, 2},
+                {{  0, 40,-80,  2}, 2},
+                {{  0,-30,-80,  3}, 2},
+                {{  0, 90,-70,  5}, 2}
+        };
+        sample_tree sm(samples, parents);
+
+        // Without spherical root
+        mprovider mp(morphology(sm, false));
+
+        using reg::all;
+        using reg::z_dist_from_soma_lt;
+        using reg::z_dist_from_soma_le;
+        using reg::z_dist_from_soma_gt;
+        using reg::z_dist_from_soma_ge;
+        using reg::cable;
+
+        // Test projection
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_lt(0), mp), (mcable_list{})));
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_ge(0), mp), thingify(all(), mp)));
+
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_le(100), mp), thingify(all(), mp)));
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_gt(100), mp), (mcable_list{})));
+
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_le(90), mp), thingify(all(), mp)));
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_gt(90), mp), (mcable_list{})));
+
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_lt(20), mp),
+                                (mcable_list{{0,0,1},
+                                             {1,0,0.578250901781922829},
+                                             {2,0.61499300915417734997,0.8349970039232188642},
+                                             {3,0,0.179407353580315756}
+                                })));
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_ge(20), mp),
+                                (mcable_list{{0,1,1},
+                                             {1,0.578250901781922829,1},
+                                             {2,0,0.61499300915417734997},
+                                             {2,0.8349970039232188642,1},
+                                             {3,0.179407353580315756,1},
+                                             {4,0,1},
+                                             {5,0,1}
+                                })));
+        EXPECT_TRUE(cablelist_eq(thingify(join(z_dist_from_soma_lt(20), z_dist_from_soma_ge(20)), mp), thingify(all(), mp)));
+
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_le(50), mp),
+                                (mcable_list{{0,0,1},
+                                             {1,0,1},
+                                             {2,0,0.2962417607888518767},
+                                             {2,0.4499900130773962142,1},
+                                             {3,0,0.4485183839507893905},
+                                             {3,0.7691110303704736343,1},
+                                             {4,0,0.0869615364994152821},
+                                             {5,0,0.25}
+                                })));
+        EXPECT_TRUE(cablelist_eq(thingify(z_dist_from_soma_gt(50), mp),
+                                (mcable_list{{2,0.2962417607888518767,0.4499900130773962142},
+                                             {3,0.4485183839507893905,0.7691110303704736343},
+                                             {4,0.0869615364994152821,1},
+                                             {5,0.25,1}})));
+
+        EXPECT_TRUE(cablelist_eq(thingify(join(z_dist_from_soma_le(50), z_dist_from_soma_gt(50)), mp), thingify(all(), mp)));
     }
 }