source: src/Common/PersistentDisjointSet.h@ eff03a94

new-env
Last change on this file since eff03a94 was d318a18, checked in by Aaron Moss <a3moss@…>, 7 years ago

Fix assorted memory bugs with persistent-array environment

  • Property mode set to 100644
File size: 14.8 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// PersistentDisjointSet.h --
8//
9// Author : Aaron B. Moss
10// Created On : Wed Jun 13 16:31:00 2018
11// Last Modified By : Aaron B. Moss
12// Last Modified On : Wed Jun 13 16:31:00 2018
13// Update Count : 1
14//
15
16#pragma once
17
18#include <cassert>
19#include <functional>
20#include <unordered_map>
21#include <utility>
22
23#include "GC.h"
24
25/// Persistent disjoint-set data structure based on the persistent array in
26/// Conchon & Filliatre "A Persistent Union-Find Data Structure". Path
27/// compression is not performed (to lower cost of persistent rollback).
28/// Auxilliary lists are kept for efficient retrieval of class members.
29/// Find root should still operate in O(log k), for k the size of an
30/// equivalence class.
31
32template<typename Elm, typename Hash = std::hash<Elm>, typename Eq = std::equal_to<Elm>>
33class PersistentDisjointSet : public GC_Object {
34public:
35 /// Type of this class
36 using Self = PersistentDisjointSet<Elm, Hash, Eq>;
37
38 /// Types of version nodes
39 enum Mode {
40 BASE, ///< Root node of version tree
41 ADD, ///< Add key to set
42 REM, ///< Reverse add operation
43 ADDTO, ///< Merge one class root under another
44 REMFROM ///< Reverse addTo operation
45 };
46
47private:
48 /// Type of node height
49 using Height = unsigned char;
50
51 /// Disjoint-set node
52 struct Node {
53 Elm parent; ///< Parent node in equivalence class
54 Elm next; ///< Next node in equivalence class
55 Height height; ///< Tree height of the node
56
57 template<typename E>
58 Node(E&& e) : parent(e), next(std::forward<E>(e)), height(0) {}
59
60 template<typename E, typename F>
61 Node(E&& p, F&& n, Height h)
62 : parent(std::forward<E>(p)), next(std::forward<F>(n)), height(h) {}
63 };
64
65 /// Type of class map
66 using Base = std::unordered_map<Elm, Node, Hash, Eq>;
67
68 /// Node inserted into underlying map as new equivalence class
69 struct Add {
70 Self* base; ///< Modified map
71 Elm root; ///< Element added
72
73 template<typename E>
74 Add(Self* b, E&& r) : base(b), root(std::forward<E>(r)) {}
75 };
76
77 /// Two classes merged
78 struct AddTo {
79 Self* base; ///< Modified map
80 Elm root; ///< Root node
81 Elm child; ///< Child node, formerly root of own class
82 bool new_height; ///< Did the root node's height change?
83
84 template<typename R, typename C>
85 AddTo(Self* b, R&& r, C&& c, bool h)
86 : base(b), root(std::forward<R>(r)), child(std::forward<C>(c)), new_height(h) {}
87 };
88
89 /// Underlying storage
90 union Data {
91 char none;
92 Base base;
93 Add add;
94 AddTo add_to;
95
96 Data() : none('\0') {}
97 ~Data() {}
98 } data;
99
100 /// Type of version node
101 mutable Mode mode;
102
103 /// get mutable reference as T
104 template<typename T>
105 T& as() { return reinterpret_cast<T&>(data); }
106
107 /// get const reference as T
108 template<typename T>
109 const T& as() const { return reinterpret_cast<const T&>(data); }
110
111 /// get rvalue reference as T
112 template<typename T>
113 T&& take_as() { return std::move(as<T>()); }
114
115 /// initialize as T
116 template<typename T, typename... Args>
117 void init( Args&&... args ) {
118 new( &as<T>() ) T { std::forward<Args>(args)... };
119 }
120
121 /// reset according to current mode
122 void reset() {
123 switch( mode ) {
124 case BASE: as<Base>().~Base(); break;
125 case ADD: case REM: as<Add>().~Add(); break;
126 case ADDTO: case REMFROM: as<AddTo>().~AddTo(); break;
127 default: assertf(false, "invalid mode");
128 }
129 }
130
131 /// reset as base
132 void reset_as_base() {
133 assertf( mode == BASE, "can only reset_as_base() on BASE" );
134 as<Base>().~Base();
135 }
136
137 /// Non-initializing constructor; should call init() before use
138 PersistentDisjointSet( Mode m ) : data(), mode(m) {}
139
140 PersistentDisjointSet( Mode m, Base&& b ) : data(), mode(m) {
141 assertf(m == BASE, "invalid mode");
142 init<Base>(std::move(b));
143 }
144
145 template<typename R>
146 PersistentDisjointSet( Mode m, const Self* b, R&& r ) : data(), mode(m) {
147 assertf(m == ADD || m == REM, "invalid mode");
148 init<Add>(b, std::forward<R>(r));
149 }
150
151 template<typename R, typename C>
152 PersistentDisjointSet( Mode m, const Self* b, R&& r, C&& c, bool h ) : data(), mode(m) {
153 assertf(m == ADDTO || m == REMFROM, "invalid mode");
154 init<AddTo>(b, std::forward<R>(r), std::forward<C>(c), h);
155 }
156
157 /// Adds (also removes) graph edges.
158 /// * `from.parent` updated to `new_root`,
159 /// * `from.next` and `to.next` swapped (splices or un-splices class lists)
160 /// * `to.height` adjusted by change
161 template<typename R>
162 static void addEdge( Node& from, Node& to, R&& new_root, Height change ) {
163 from.parent = std::forward<R>(new_root);
164 std::swap(from.next, to.next);
165 to.height += change;
166 }
167
168protected:
169 void trace( const GC& gc ) const {
170 switch( mode ) {
171 case BASE: {
172 for (const auto& entry : as<Base>()) {
173 gc.maybe_trace( entry.first );
174 }
175 return;
176 }
177 case ADD: case REM: {
178 const Add& self = as<Add>();
179 gc << self.base;
180 gc.maybe_trace( self.root );
181 return;
182 }
183 case ADDTO: case REMFROM: {
184 const AddTo& self = as<AddTo>();
185 gc << self.base;
186 gc.maybe_trace( self.root, self.child );
187 return;
188 }
189 default: assertf(false, "invalid mode");
190 }
191 }
192
193public:
194 using size_type = std::size_t;
195
196 using iterator = typename Base::const_iterator;
197
198 PersistentDisjointSet() : data(), mode(BASE) { init<Base>(); }
199
200 PersistentDisjointSet( const Self& o ) = delete;
201
202 Self& operator= ( const Self& o ) = delete;
203
204 ~PersistentDisjointSet() { reset(); }
205
206 /// reroot persistent data structure at current node
207 void reroot() const {
208 if ( mode == BASE ) return;
209
210 // reroot base
211 Self* mut_this = const_cast<Self*>(this);
212 Self* base = ( mode == ADD || mode == REM ) ?
213 mut_this->as<Add>().base :
214 mut_this->as<AddTo>().base;
215 base->reroot();
216 assertf(base->mode == BASE, "reroot results in base");
217
218 // take map out of base
219 Base base_map = base->take_as<Base>();
220 base->reset_as_base();
221
222 // switch base to inverse of self and mutate base map
223 switch ( mode ) {
224 case ADD: {
225 Add& self = mut_this->as<Add>();
226
227 base->init<Add>( mut_this, self.root );
228 base->mode = REM;
229
230 base_map.emplace( self.root, Node{ std::move(self.root) } );
231 } break;
232 case REM: {
233 Add& self = mut_this->as<Add>();
234
235 base->init<Add>( mut_this, self.root );
236 base->mode = ADD;
237
238 base_map.erase( self.root );
239 } break;
240 case ADDTO: {
241 AddTo& self = mut_this->as<AddTo>();
242
243 base->init<AddTo>( mut_this, self.root, self.child, self.new_height );
244 base->mode = REMFROM;
245
246 auto child_it = base_map.find( self.child );
247 auto root_it = base_map.find( self.root );
248 assertf(child_it != base_map.end() && root_it != base_map.end(),
249 "nodes must exist in base");
250 Node& child = child_it->second;
251 Node& root = root_it->second;
252
253 addEdge( child, root, std::move(self.root), Height(self.new_height) );
254 } break;
255 case REMFROM: {
256 AddTo& self = mut_this->as<AddTo>();
257
258 base->init<AddTo>( mut_this, self.root, self.child, self.new_height );
259 base->mode = ADDTO;
260
261 auto child_it = base_map.find( self.child );
262 auto root_it = base_map.find( self.root );
263 assertf(child_it != base_map.end() && root_it != base_map.end(),
264 "nodes must exist in base");
265 Node& child = child_it->second;
266 Node& root = root_it->second;
267
268 addEdge( child, root, std::move(self.child), Height(-1 * self.new_height) );
269 } break;
270 default: assertf(false, "invalid mode");
271 }
272
273 // set base map into self
274 mut_this->reset();
275 mut_this->init<Base>( std::move(base_map) );
276 mode = BASE;
277 }
278
279private:
280 /// Gets the base after rerooting at the current node
281 const Base& rerooted() const {
282 reroot();
283 assertf(mode == BASE, "reroot results in base");
284 return as<Base>();
285 }
286
287public:
288 /// true if the set of sets is empty
289 bool empty() const { return rerooted().empty(); }
290
291 /// Get number of entries in the map
292 size_type size() const { return rerooted().size(); }
293
294 /// Get begin iterator for map; may be invalidated by calls to non-iteration functions
295 /// or functions on other maps in the same chain
296 iterator begin() const { return rerooted().begin(); }
297
298 /// Get end iterator for map; may be invalidated by calls to non-iteration functions
299 /// or functions on other maps in the same chain
300 iterator end() const { return rerooted().end(); }
301
302 /// Check if value is present
303 size_type count(Elm i) const { return rerooted().count( i ); }
304
305 /// Finds root for element i, undefined behaviour if i is not present
306 Elm find(Elm i) const {
307 const Base& self = rerooted();
308
309 auto it = self.find( i );
310 while (true) {
311 assertf(it != self.end(), "find target not present");
312
313 if ( it->first == it->second.parent ) return it->first;
314
315 it = self.find( it->second.parent );
316 }
317 }
318
319 /// Finds root for element i, or default if i is not present
320 template<typename E>
321 Elm find_or_default(Elm i, E&& d) const {
322 const Base& self = rerooted();
323
324 auto it = self.find( i );
325 if ( it == self.end() ) return d;
326
327 while ( it->first != it->second.parent ) {
328 it = self.find( it->second.parent );
329
330 assertf(it != self.end(), "find target not present");
331 }
332 return it->first;
333 }
334
335 /// Adds fresh class including only one item; returns updated map (or self if no change)
336 template<typename E>
337 Self* add(E&& i) {
338 reroot();
339
340 // add new element to node
341 Base base_map = take_as<Base>();
342 bool added = base_map.emplace( i, Node{ i } ).second;
343
344 // fail early on node already present
345 if ( ! added ) {
346 as<Base>() = std::move(base_map);
347 return this;
348 }
349
350 // make new return node and reset self as REM node
351 Self* ret = new Self{ BASE, std::move(base_map) };
352 reset_as_base();
353 init<Add>( ret, i );
354 mode = REM;
355
356 return ret;
357 }
358
359 /// Merges two classes given by their roots; returns updated map.
360 /// If two classes have same height, `i` is new root.
361 Self* merge(Elm i, Elm j) {
362 reroot();
363
364 // transfer map to new node
365 Self* ret = new Self{ BASE, take_as<Base>() };
366 reset_as_base();
367
368 // find set nodes
369 Base& base_map = ret->as<Base>();
370 auto it = base_map.find( i );
371 auto jt = base_map.find( j );
372 assertf(it != base_map.end() && jt != base_map.end(), "nodes must exist in base");
373 Node& in = it->second;
374 Node& jn = jt->second;
375
376 // update returned map and set self to appropriate REMFROM node
377 if ( in.height < jn.height ) {
378 addEdge( in, jn, j, 0 );
379 init<AddTo>( ret, j, i, false );
380 } else if ( jn.height < in.height ) {
381 addEdge( jn, in, i, 0 );
382 init<AddTo>( ret, i, j, false );
383 } else /* if ( jn.height == in.height ) */ {
384 addEdge( jn, in, i, 1 );
385 init<AddTo>( ret, i, j, true );
386 }
387 mode = REMFROM;
388
389 return ret;
390 }
391
392private:
393 /// Removes the children of the tree rooted at `root` as `root_node`, returning a new
394 /// uninitialized node and editing `next_edit` to point toward this new node.
395 static Self* remove_children(Elm root, Node& root_node, Base& base_map, Self* next_edit) {
396 // Invariant: root.next is *always* a pointer to a leaf of the most-recently added
397 // subtree.
398 //
399 // Proof: By induction on height:
400 // * height == 0: root.next == root, trivially true
401 // * added.height < root.height: true by ind. hyp.
402 // * added.height == root.height: no node may have a child the same height, ergo
403 // property holds for added, therefore root.
404 //
405 // Corollary: next of most-recently-added subtree root is previously added subtree
406
407 // remove all subtrees
408 while ( root != root_node.next ) {
409 // find added child
410 auto it = base_map.find( root_node.next );
411 while ( it->second.parent != root ) it = base_map.find( it->second.parent );
412 Elm added = it->first;
413 Node& added_node = it->second;
414
415 // unlink subtree and set up previous map as ADDTO new map
416 std::swap( root_node.next, added_node.next );
417 Self* new_edit = new Self{ BASE };
418 next_edit->init<AddTo>( new_edit, root, added, false ); // assume unchanged root height
419 next_edit->mode = ADDTO;
420
421 // remove subtree children from map
422 next_edit = remove_children( added, added_node, base_map, new_edit );
423
424 // remove subtree root from map and set up previous map as ADD to new map
425 base_map.erase( added );
426 new_edit = new Self{ BASE };
427 next_edit->init<Add>( new_edit, added );
428 next_edit->mode = ADD;
429
430 next_edit = new_edit;
431 }
432
433 return next_edit;
434 }
435
436public:
437 /// Removes all elements of a class given by its root; returns updated map.
438 Self* remove_class(Elm root) {
439 reroot();
440
441 // remove map from self
442 Base base_map = take_as<Base>();
443 reset_as_base();
444
445 // find root node and remove children
446 auto it = base_map.find( root );
447 assertf(it != base_map.end(), "root node must exist in map");
448 Self* next_edit = remove_children( root, it->second, base_map, this);
449
450 // root is alone in class, remove from map
451 base_map.erase( root );
452 Self* ret = new Self{ BASE, std::move(base_map) };
453 // reinitialize previous node as ADD to new map
454 next_edit->init<Add>( ret, root );
455 next_edit->mode = ADD;
456
457 return ret;
458 }
459
460 /// Applies `f` to all members of a class containing `i` (undefined behaviour if not present).
461 /// `f` should take members by const reference or copy
462 template<typename F>
463 void for_class(Elm i, F&& f) const {
464 const Base& self = rerooted();
465
466 // exit early if class not present
467 auto it = self.find( i );
468 if ( it == self.end() ) return;
469
470 // apply f to each member of class
471 f( i );
472 for ( Elm crnt = it->second.next; crnt != i; crnt = it->second.next ) {
473 f( crnt );
474 it = self.find( crnt );
475 assertf( it != self.end(), "current node must exist in base" );
476 }
477 }
478
479 /// Get version node type
480 Mode get_mode() const { return mode; }
481
482 /// Get next version up the revision tree (self if base node)
483 const Self* get_base() const {
484 switch ( mode ) {
485 case BASE: return this;
486 case ADD: case REM: return as<Add>().base;
487 case ADDTO: case REMFROM: return as<AddTo>().base;
488 default: assertf(false, "invalid mode");
489 }
490 }
491
492 /// Get root of new class formed/removed/split (undefined if called on base)
493 Elm get_root() const {
494 switch ( mode ) {
495 case ADD: case REM: return as<Add>().root;
496 case ADDTO: case REMFROM: return as<AddTo>().root;
497 default: assertf(false, "invalid mode for get_root()");
498 }
499 }
500
501 /// Get child root of new class formed/split (undefined if called on base or add/remove node)
502 Elm get_child() const {
503 switch ( mode ) {
504 case ADDTO: case REMFROM: return as<AddTo>().child;
505 default: assertf(false, "invalid mode for get_child()");
506 }
507 }
508
509 /// Gets acted-upon key for new class (root unless for add/remove node, child for add to/remove
510 /// from node, undefined otherwise)
511 Elm get_key() const {
512 switch ( mode ) {
513 case ADD: case REM: return as<Add>().root;
514 case ADDTO: case REMFROM: return as<AddTo>().child;
515 default: assertf(false, "invalid mode for get_key()");
516 }
517 }
518};
519
520// Local Variables: //
521// tab-width: 4 //
522// mode: c++ //
523// compile-command: "make install" //
524// End: //
Note: See TracBrowser for help on using the repository browser.