1 | //
|
---|
2 | // Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
|
---|
3 | //
|
---|
4 | // The contents of this file are covered under the licence agreement in the
|
---|
5 | // file "LICENCE" distributed with Cforall.
|
---|
6 | //
|
---|
7 | // PersistentDisjointSet.h --
|
---|
8 | //
|
---|
9 | // Author : Aaron B. Moss
|
---|
10 | // Created On : Wed Jun 13 16:31:00 2018
|
---|
11 | // Last Modified By : Aaron B. Moss
|
---|
12 | // Last Modified On : Wed Jun 13 16:31:00 2018
|
---|
13 | // Update Count : 1
|
---|
14 | //
|
---|
15 |
|
---|
16 | #pragma once
|
---|
17 |
|
---|
18 | #include <cassert>
|
---|
19 | #include <functional>
|
---|
20 | #include <unordered_map>
|
---|
21 | #include <utility>
|
---|
22 |
|
---|
23 | #include "GC.h"
|
---|
24 |
|
---|
25 | /// Persistent disjoint-set data structure based on the persistent array in
|
---|
26 | /// Conchon & Filliatre "A Persistent Union-Find Data Structure". Path
|
---|
27 | /// compression is not performed (to lower cost of persistent rollback).
|
---|
28 | /// Auxilliary lists are kept for efficient retrieval of class members.
|
---|
29 | /// Find root should still operate in O(log k), for k the size of an
|
---|
30 | /// equivalence class.
|
---|
31 |
|
---|
32 | template<typename Elm, typename Hash = std::hash<Elm>, typename Eq = std::equal_to<Elm>>
|
---|
33 | class PersistentDisjointSet : public GC_Object {
|
---|
34 | public:
|
---|
35 | /// Type of this class
|
---|
36 | using Self = PersistentDisjointSet<Elm, Hash, Eq>;
|
---|
37 |
|
---|
38 | /// Types of version nodes
|
---|
39 | enum Mode {
|
---|
40 | BASE, ///< Root node of version tree
|
---|
41 | ADD, ///< Add key to set
|
---|
42 | REM, ///< Reverse add operation
|
---|
43 | ADDTO, ///< Merge one class root under another
|
---|
44 | REMFROM ///< Reverse addTo operation
|
---|
45 | };
|
---|
46 |
|
---|
47 | private:
|
---|
48 | /// Type of node height
|
---|
49 | using Height = unsigned char;
|
---|
50 |
|
---|
51 | /// Disjoint-set node
|
---|
52 | struct Node {
|
---|
53 | Elm parent; ///< Parent node in equivalence class
|
---|
54 | Elm next; ///< Next node in equivalence class
|
---|
55 | Height height; ///< Tree height of the node
|
---|
56 |
|
---|
57 | template<typename E>
|
---|
58 | Node(E&& e) : parent(e), next(std::forward<E>(e)), height(0) {}
|
---|
59 |
|
---|
60 | template<typename E, typename F>
|
---|
61 | Node(E&& p, F&& n, Height h)
|
---|
62 | : parent(std::forward<E>(p)), next(std::forward<F>(n)), height(h) {}
|
---|
63 | };
|
---|
64 |
|
---|
65 | /// Type of class map
|
---|
66 | using Base = std::unordered_map<Elm, Node, Hash, Eq>;
|
---|
67 |
|
---|
68 | /// Node inserted into underlying map as new equivalence class
|
---|
69 | struct Add {
|
---|
70 | Self* base; ///< Modified map
|
---|
71 | Elm root; ///< Element added
|
---|
72 |
|
---|
73 | template<typename E>
|
---|
74 | Add(Self* b, E&& r) : base(b), root(std::forward<E>(r)) {}
|
---|
75 | };
|
---|
76 |
|
---|
77 | /// Two classes merged
|
---|
78 | struct AddTo {
|
---|
79 | Self* base; ///< Modified map
|
---|
80 | Elm root; ///< Root node
|
---|
81 | Elm child; ///< Child node, formerly root of own class
|
---|
82 | bool new_height; ///< Did the root node's height change?
|
---|
83 |
|
---|
84 | template<typename R, typename C>
|
---|
85 | AddTo(Self* b, R&& r, C&& c, bool h)
|
---|
86 | : base(b), root(std::forward<R>(r)), child(std::forward<C>(c)), new_height(h) {}
|
---|
87 | };
|
---|
88 |
|
---|
89 | /// Underlying storage
|
---|
90 | union Data {
|
---|
91 | char none;
|
---|
92 | Base base;
|
---|
93 | Add add;
|
---|
94 | AddTo add_to;
|
---|
95 |
|
---|
96 | Data() : none('\0') {}
|
---|
97 | ~Data() {}
|
---|
98 | } data;
|
---|
99 |
|
---|
100 | /// Type of version node
|
---|
101 | mutable Mode mode;
|
---|
102 |
|
---|
103 | /// get mutable reference as T
|
---|
104 | template<typename T>
|
---|
105 | T& as() { return reinterpret_cast<T&>(data); }
|
---|
106 |
|
---|
107 | /// get const reference as T
|
---|
108 | template<typename T>
|
---|
109 | const T& as() const { return reinterpret_cast<const T&>(data); }
|
---|
110 |
|
---|
111 | /// get rvalue reference as T
|
---|
112 | template<typename T>
|
---|
113 | T&& take_as() { return std::move(as<T>()); }
|
---|
114 |
|
---|
115 | /// initialize as T
|
---|
116 | template<typename T, typename... Args>
|
---|
117 | void init( Args&&... args ) {
|
---|
118 | new( &as<T>() ) T { std::forward<Args>(args)... };
|
---|
119 | }
|
---|
120 |
|
---|
121 | /// reset according to current mode
|
---|
122 | void reset() {
|
---|
123 | switch( mode ) {
|
---|
124 | case BASE: as<Base>().~Base(); break;
|
---|
125 | case ADD: case REM: as<Add>().~Add(); break;
|
---|
126 | case ADDTO: case REMFROM: as<AddTo>().~AddTo(); break;
|
---|
127 | default: assertf(false, "invalid mode");
|
---|
128 | }
|
---|
129 | }
|
---|
130 |
|
---|
131 | /// reset as base
|
---|
132 | void reset_as_base() {
|
---|
133 | assertf( mode == BASE, "can only reset_as_base() on BASE" );
|
---|
134 | as<Base>().~Base();
|
---|
135 | }
|
---|
136 |
|
---|
137 | /// Non-initializing constructor; should call init() before use
|
---|
138 | PersistentDisjointSet( Mode m ) : data(), mode(m) {}
|
---|
139 |
|
---|
140 | PersistentDisjointSet( Mode m, Base&& b ) : data(), mode(m) {
|
---|
141 | assertf(m == BASE, "invalid mode");
|
---|
142 | init<Base>(std::move(b));
|
---|
143 | }
|
---|
144 |
|
---|
145 | template<typename R>
|
---|
146 | PersistentDisjointSet( Mode m, const Self* b, R&& r ) : data(), mode(m) {
|
---|
147 | assertf(m == ADD || m == REM, "invalid mode");
|
---|
148 | init<Add>(b, std::forward<R>(r));
|
---|
149 | }
|
---|
150 |
|
---|
151 | template<typename R, typename C>
|
---|
152 | PersistentDisjointSet( Mode m, const Self* b, R&& r, C&& c, bool h ) : data(), mode(m) {
|
---|
153 | assertf(m == ADDTO || m == REMFROM, "invalid mode");
|
---|
154 | init<AddTo>(b, std::forward<R>(r), std::forward<C>(c), h);
|
---|
155 | }
|
---|
156 |
|
---|
157 | /// Adds (also removes) graph edges.
|
---|
158 | /// * `from.parent` updated to `new_root`,
|
---|
159 | /// * `from.next` and `to.next` swapped (splices or un-splices class lists)
|
---|
160 | /// * `to.height` adjusted by change
|
---|
161 | template<typename R>
|
---|
162 | static void addEdge( Node& from, Node& to, R&& new_root, Height change ) {
|
---|
163 | from.parent = std::forward<R>(new_root);
|
---|
164 | std::swap(from.next, to.next);
|
---|
165 | to.height += change;
|
---|
166 | }
|
---|
167 |
|
---|
168 | protected:
|
---|
169 | void trace( const GC& gc ) const {
|
---|
170 | switch( mode ) {
|
---|
171 | case BASE: {
|
---|
172 | for (const auto& entry : as<Base>()) {
|
---|
173 | gc.maybe_trace( entry.first );
|
---|
174 | }
|
---|
175 | return;
|
---|
176 | }
|
---|
177 | case ADD: case REM: {
|
---|
178 | const Add& self = as<Add>();
|
---|
179 | gc << self.base;
|
---|
180 | gc.maybe_trace( self.root );
|
---|
181 | return;
|
---|
182 | }
|
---|
183 | case ADDTO: case REMFROM: {
|
---|
184 | const AddTo& self = as<AddTo>();
|
---|
185 | gc << self.base;
|
---|
186 | gc.maybe_trace( self.root, self.child );
|
---|
187 | return;
|
---|
188 | }
|
---|
189 | default: assertf(false, "invalid mode");
|
---|
190 | }
|
---|
191 | }
|
---|
192 |
|
---|
193 | public:
|
---|
194 | using size_type = std::size_t;
|
---|
195 |
|
---|
196 | using iterator = typename Base::const_iterator;
|
---|
197 |
|
---|
198 | PersistentDisjointSet() : data(), mode(BASE) { init<Base>(); }
|
---|
199 |
|
---|
200 | PersistentDisjointSet( const Self& o ) = delete;
|
---|
201 |
|
---|
202 | Self& operator= ( const Self& o ) = delete;
|
---|
203 |
|
---|
204 | ~PersistentDisjointSet() { reset(); }
|
---|
205 |
|
---|
206 | /// reroot persistent data structure at current node
|
---|
207 | void reroot() const {
|
---|
208 | if ( mode == BASE ) return;
|
---|
209 |
|
---|
210 | // reroot base
|
---|
211 | Self* mut_this = const_cast<Self*>(this);
|
---|
212 | Self* base = ( mode == ADD || mode == REM ) ?
|
---|
213 | mut_this->as<Add>().base :
|
---|
214 | mut_this->as<AddTo>().base;
|
---|
215 | base->reroot();
|
---|
216 | assertf(base->mode == BASE, "reroot results in base");
|
---|
217 |
|
---|
218 | // take map out of base
|
---|
219 | Base base_map = base->take_as<Base>();
|
---|
220 | base->reset_as_base();
|
---|
221 |
|
---|
222 | // switch base to inverse of self and mutate base map
|
---|
223 | switch ( mode ) {
|
---|
224 | case ADD: {
|
---|
225 | Add& self = mut_this->as<Add>();
|
---|
226 |
|
---|
227 | base->init<Add>( mut_this, self.root );
|
---|
228 | base->mode = REM;
|
---|
229 |
|
---|
230 | base_map.emplace( self.root, Node{ std::move(self.root) } );
|
---|
231 | } break;
|
---|
232 | case REM: {
|
---|
233 | Add& self = mut_this->as<Add>();
|
---|
234 |
|
---|
235 | base->init<Add>( mut_this, self.root );
|
---|
236 | base->mode = ADD;
|
---|
237 |
|
---|
238 | base_map.erase( self.root );
|
---|
239 | } break;
|
---|
240 | case ADDTO: {
|
---|
241 | AddTo& self = mut_this->as<AddTo>();
|
---|
242 |
|
---|
243 | base->init<AddTo>( mut_this, self.root, self.child, self.new_height );
|
---|
244 | base->mode = REMFROM;
|
---|
245 |
|
---|
246 | auto child_it = base_map.find( self.child );
|
---|
247 | auto root_it = base_map.find( self.root );
|
---|
248 | assertf(child_it != base_map.end() && root_it != base_map.end(),
|
---|
249 | "nodes must exist in base");
|
---|
250 | Node& child = child_it->second;
|
---|
251 | Node& root = root_it->second;
|
---|
252 |
|
---|
253 | addEdge( child, root, std::move(self.root), Height(self.new_height) );
|
---|
254 | } break;
|
---|
255 | case REMFROM: {
|
---|
256 | AddTo& self = mut_this->as<AddTo>();
|
---|
257 |
|
---|
258 | base->init<AddTo>( mut_this, self.root, self.child, self.new_height );
|
---|
259 | base->mode = ADDTO;
|
---|
260 |
|
---|
261 | auto child_it = base_map.find( self.child );
|
---|
262 | auto root_it = base_map.find( self.root );
|
---|
263 | assertf(child_it != base_map.end() && root_it != base_map.end(),
|
---|
264 | "nodes must exist in base");
|
---|
265 | Node& child = child_it->second;
|
---|
266 | Node& root = root_it->second;
|
---|
267 |
|
---|
268 | addEdge( child, root, std::move(self.child), Height(-1 * self.new_height) );
|
---|
269 | } break;
|
---|
270 | default: assertf(false, "invalid mode");
|
---|
271 | }
|
---|
272 |
|
---|
273 | // set base map into self
|
---|
274 | mut_this->reset();
|
---|
275 | mut_this->init<Base>( std::move(base_map) );
|
---|
276 | mode = BASE;
|
---|
277 | }
|
---|
278 |
|
---|
279 | private:
|
---|
280 | /// Gets the base after rerooting at the current node
|
---|
281 | const Base& rerooted() const {
|
---|
282 | reroot();
|
---|
283 | assertf(mode == BASE, "reroot results in base");
|
---|
284 | return as<Base>();
|
---|
285 | }
|
---|
286 |
|
---|
287 | public:
|
---|
288 | /// true if the set of sets is empty
|
---|
289 | bool empty() const { return rerooted().empty(); }
|
---|
290 |
|
---|
291 | /// Get number of entries in the map
|
---|
292 | size_type size() const { return rerooted().size(); }
|
---|
293 |
|
---|
294 | /// Get begin iterator for map; may be invalidated by calls to non-iteration functions
|
---|
295 | /// or functions on other maps in the same chain
|
---|
296 | iterator begin() const { return rerooted().begin(); }
|
---|
297 |
|
---|
298 | /// Get end iterator for map; may be invalidated by calls to non-iteration functions
|
---|
299 | /// or functions on other maps in the same chain
|
---|
300 | iterator end() const { return rerooted().end(); }
|
---|
301 |
|
---|
302 | /// Check if value is present
|
---|
303 | size_type count(Elm i) const { return rerooted().count( i ); }
|
---|
304 |
|
---|
305 | /// Finds root for element i, undefined behaviour if i is not present
|
---|
306 | Elm find(Elm i) const {
|
---|
307 | const Base& self = rerooted();
|
---|
308 |
|
---|
309 | auto it = self.find( i );
|
---|
310 | while (true) {
|
---|
311 | assertf(it != self.end(), "find target not present");
|
---|
312 |
|
---|
313 | if ( it->first == it->second.parent ) return it->first;
|
---|
314 |
|
---|
315 | it = self.find( it->second.parent );
|
---|
316 | }
|
---|
317 | }
|
---|
318 |
|
---|
319 | /// Finds root for element i, or default if i is not present
|
---|
320 | template<typename E>
|
---|
321 | Elm find_or_default(Elm i, E&& d) const {
|
---|
322 | const Base& self = rerooted();
|
---|
323 |
|
---|
324 | auto it = self.find( i );
|
---|
325 | if ( it == self.end() ) return d;
|
---|
326 |
|
---|
327 | while ( it->first != it->second.parent ) {
|
---|
328 | it = self.find( it->second.parent );
|
---|
329 |
|
---|
330 | assertf(it != self.end(), "find target not present");
|
---|
331 | }
|
---|
332 | return it->first;
|
---|
333 | }
|
---|
334 |
|
---|
335 | /// Adds fresh class including only one item; returns updated map (or self if no change)
|
---|
336 | template<typename E>
|
---|
337 | Self* add(E&& i) {
|
---|
338 | reroot();
|
---|
339 |
|
---|
340 | // add new element to node
|
---|
341 | Base base_map = take_as<Base>();
|
---|
342 | bool added = base_map.emplace( i, Node{ i } ).second;
|
---|
343 |
|
---|
344 | // fail early on node already present
|
---|
345 | if ( ! added ) {
|
---|
346 | as<Base>() = std::move(base_map);
|
---|
347 | return this;
|
---|
348 | }
|
---|
349 |
|
---|
350 | // make new return node and reset self as REM node
|
---|
351 | Self* ret = new Self{ BASE, std::move(base_map) };
|
---|
352 | reset_as_base();
|
---|
353 | init<Add>( ret, i );
|
---|
354 | mode = REM;
|
---|
355 |
|
---|
356 | return ret;
|
---|
357 | }
|
---|
358 |
|
---|
359 | /// Merges two classes given by their roots; returns updated map.
|
---|
360 | /// If two classes have same height, `i` is new root.
|
---|
361 | Self* merge(Elm i, Elm j) {
|
---|
362 | reroot();
|
---|
363 |
|
---|
364 | // transfer map to new node
|
---|
365 | Self* ret = new Self{ BASE, take_as<Base>() };
|
---|
366 | reset_as_base();
|
---|
367 |
|
---|
368 | // find set nodes
|
---|
369 | Base& base_map = ret->as<Base>();
|
---|
370 | auto it = base_map.find( i );
|
---|
371 | auto jt = base_map.find( j );
|
---|
372 | assertf(it != base_map.end() && jt != base_map.end(), "nodes must exist in base");
|
---|
373 | Node& in = it->second;
|
---|
374 | Node& jn = jt->second;
|
---|
375 |
|
---|
376 | // update returned map and set self to appropriate REMFROM node
|
---|
377 | if ( in.height < jn.height ) {
|
---|
378 | addEdge( in, jn, j, 0 );
|
---|
379 | init<AddTo>( ret, j, i, false );
|
---|
380 | } else if ( jn.height < in.height ) {
|
---|
381 | addEdge( jn, in, i, 0 );
|
---|
382 | init<AddTo>( ret, i, j, false );
|
---|
383 | } else /* if ( jn.height == in.height ) */ {
|
---|
384 | addEdge( jn, in, i, 1 );
|
---|
385 | init<AddTo>( ret, i, j, true );
|
---|
386 | }
|
---|
387 | mode = REMFROM;
|
---|
388 |
|
---|
389 | return ret;
|
---|
390 | }
|
---|
391 |
|
---|
392 | private:
|
---|
393 | /// Removes the children of the tree rooted at `root` as `root_node`, returning a new
|
---|
394 | /// uninitialized node and editing `next_edit` to point toward this new node.
|
---|
395 | static Self* remove_children(Elm root, Node& root_node, Base& base_map, Self* next_edit) {
|
---|
396 | // Invariant: root.next is *always* a pointer to a leaf of the most-recently added
|
---|
397 | // subtree.
|
---|
398 | //
|
---|
399 | // Proof: By induction on height:
|
---|
400 | // * height == 0: root.next == root, trivially true
|
---|
401 | // * added.height < root.height: true by ind. hyp.
|
---|
402 | // * added.height == root.height: no node may have a child the same height, ergo
|
---|
403 | // property holds for added, therefore root.
|
---|
404 | //
|
---|
405 | // Corollary: next of most-recently-added subtree root is previously added subtree
|
---|
406 |
|
---|
407 | // remove all subtrees
|
---|
408 | while ( root != root_node.next ) {
|
---|
409 | // find added child
|
---|
410 | auto it = base_map.find( root_node.next );
|
---|
411 | while ( it->second.parent != root ) it = base_map.find( it->second.parent );
|
---|
412 | Elm added = it->first;
|
---|
413 | Node& added_node = it->second;
|
---|
414 |
|
---|
415 | // unlink subtree and set up previous map as ADDTO new map
|
---|
416 | std::swap( root_node.next, added_node.next );
|
---|
417 | Self* new_edit = new Self{ BASE };
|
---|
418 | next_edit->init<AddTo>( new_edit, root, added, false ); // assume unchanged root height
|
---|
419 | next_edit->mode = ADDTO;
|
---|
420 |
|
---|
421 | // remove subtree children from map
|
---|
422 | next_edit = remove_children( added, added_node, base_map, new_edit );
|
---|
423 |
|
---|
424 | // remove subtree root from map and set up previous map as ADD to new map
|
---|
425 | base_map.erase( added );
|
---|
426 | new_edit = new Self{ BASE };
|
---|
427 | next_edit->init<Add>( new_edit, added );
|
---|
428 | next_edit->mode = ADD;
|
---|
429 |
|
---|
430 | next_edit = new_edit;
|
---|
431 | }
|
---|
432 |
|
---|
433 | return next_edit;
|
---|
434 | }
|
---|
435 |
|
---|
436 | public:
|
---|
437 | /// Removes all elements of a class given by its root; returns updated map.
|
---|
438 | Self* remove_class(Elm root) {
|
---|
439 | reroot();
|
---|
440 |
|
---|
441 | // remove map from self
|
---|
442 | Base base_map = take_as<Base>();
|
---|
443 | reset_as_base();
|
---|
444 |
|
---|
445 | // find root node and remove children
|
---|
446 | auto it = base_map.find( root );
|
---|
447 | assertf(it != base_map.end(), "root node must exist in map");
|
---|
448 | Self* next_edit = remove_children( root, it->second, base_map, this);
|
---|
449 |
|
---|
450 | // root is alone in class, remove from map
|
---|
451 | base_map.erase( root );
|
---|
452 | Self* ret = new Self{ BASE, std::move(base_map) };
|
---|
453 | // reinitialize previous node as ADD to new map
|
---|
454 | next_edit->init<Add>( ret, root );
|
---|
455 | next_edit->mode = ADD;
|
---|
456 |
|
---|
457 | return ret;
|
---|
458 | }
|
---|
459 |
|
---|
460 | /// Applies `f` to all members of a class containing `i` (undefined behaviour if not present).
|
---|
461 | /// `f` should take members by const reference or copy
|
---|
462 | template<typename F>
|
---|
463 | void for_class(Elm i, F&& f) const {
|
---|
464 | const Base& self = rerooted();
|
---|
465 |
|
---|
466 | // exit early if class not present
|
---|
467 | auto it = self.find( i );
|
---|
468 | if ( it == self.end() ) return;
|
---|
469 |
|
---|
470 | // apply f to each member of class
|
---|
471 | f( i );
|
---|
472 | for ( Elm crnt = it->second.next; crnt != i; crnt = it->second.next ) {
|
---|
473 | f( crnt );
|
---|
474 | it = self.find( crnt );
|
---|
475 | assertf( it != self.end(), "current node must exist in base" );
|
---|
476 | }
|
---|
477 | }
|
---|
478 |
|
---|
479 | /// Get version node type
|
---|
480 | Mode get_mode() const { return mode; }
|
---|
481 |
|
---|
482 | /// Get next version up the revision tree (self if base node)
|
---|
483 | const Self* get_base() const {
|
---|
484 | switch ( mode ) {
|
---|
485 | case BASE: return this;
|
---|
486 | case ADD: case REM: return as<Add>().base;
|
---|
487 | case ADDTO: case REMFROM: return as<AddTo>().base;
|
---|
488 | default: assertf(false, "invalid mode");
|
---|
489 | }
|
---|
490 | }
|
---|
491 |
|
---|
492 | /// Get root of new class formed/removed/split (undefined if called on base)
|
---|
493 | Elm get_root() const {
|
---|
494 | switch ( mode ) {
|
---|
495 | case ADD: case REM: return as<Add>().root;
|
---|
496 | case ADDTO: case REMFROM: return as<AddTo>().root;
|
---|
497 | default: assertf(false, "invalid mode for get_root()");
|
---|
498 | }
|
---|
499 | }
|
---|
500 |
|
---|
501 | /// Get child root of new class formed/split (undefined if called on base or add/remove node)
|
---|
502 | Elm get_child() const {
|
---|
503 | switch ( mode ) {
|
---|
504 | case ADDTO: case REMFROM: return as<AddTo>().child;
|
---|
505 | default: assertf(false, "invalid mode for get_child()");
|
---|
506 | }
|
---|
507 | }
|
---|
508 |
|
---|
509 | /// Gets acted-upon key for new class (root unless for add/remove node, child for add to/remove
|
---|
510 | /// from node, undefined otherwise)
|
---|
511 | Elm get_key() const {
|
---|
512 | switch ( mode ) {
|
---|
513 | case ADD: case REM: return as<Add>().root;
|
---|
514 | case ADDTO: case REMFROM: return as<AddTo>().child;
|
---|
515 | default: assertf(false, "invalid mode for get_key()");
|
---|
516 | }
|
---|
517 | }
|
---|
518 | };
|
---|
519 |
|
---|
520 | // Local Variables: //
|
---|
521 | // tab-width: 4 //
|
---|
522 | // mode: c++ //
|
---|
523 | // compile-command: "make install" //
|
---|
524 | // End: //
|
---|