source: src/Common/PersistentDisjointSet.h@ 184557e

new-env
Last change on this file since 184557e was 184557e, checked in by Aaron Moss <a3moss@…>, 7 years ago

First draft of persistent-hash-based TypeEnvironment

  • Property mode set to 100644
File size: 14.3 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// PersistentDisjointSet.h --
8//
9// Author : Aaron B. Moss
10// Created On : Wed Jun 13 16:31:00 2018
11// Last Modified By : Aaron B. Moss
12// Last Modified On : Wed Jun 13 16:31:00 2018
13// Update Count : 1
14//
15
16#pragma once
17
18#include <cassert>
19#include <functional>
20#include <unordered_map>
21#include <utility>
22
23#include "GC.h"
24
25/// Persistent disjoint-set data structure based on the persistent array in
26/// Conchon & Filliatre "A Persistent Union-Find Data Structure". Path
27/// compression is not performed (to lower cost of persistent rollback).
28/// Auxilliary lists are kept for efficient retrieval of class members.
29/// Find root should still operate in O(log k), for k the size of an
30/// equivalence class.
31
32template<typename Elm, typename Hash = std::hash<Elm>, typename Eq = std::equal_to<Elm>>
33class PersistentDisjointSet : public GC_Object {
34public:
35 /// Type of this class
36 using Self = PersistentDisjointSet<Elm, Hash, Eq>;
37
38 /// Types of version nodes
39 enum Mode {
40 BASE, ///< Root node of version tree
41 ADD, ///< Add key to set
42 REM, ///< Reverse add operation
43 ADDTO, ///< Merge one class root under another
44 REMFROM ///< Reverse addTo operation
45 };
46
47private:
48 /// Type of node height
49 using Height = unsigned char;
50
51 /// Disjoint-set node
52 struct Node {
53 Elm parent; ///< Parent node in equivalence class
54 Elm next; ///< Next node in equivalence class
55 Height height; ///< Tree height of the node
56
57 template<typename E>
58 Node(E&& e) : parent(e), next(std::forward<E>(e)), height(0) {}
59
60 template<typename E, typename F>
61 Node(E&& p, F&& n, Height h)
62 : parent(std::forward<E>(p)), next(std::forward<F>(n)), height(h) {}
63 };
64
65 /// Type of class map
66 using Base = std::unordered_map<Elm, Node, Hash, Eq>;
67
68 /// Node inserted into underlying map as new equivalence class
69 struct Add {
70 Self* base; ///< Modified map
71 Elm root; ///< Element added
72
73 template<typename E>
74 Add(Self* b, E&& r) : base(b), root(std::forward<E>(r)) {}
75 };
76
77 /// Two classes merged
78 struct AddTo {
79 Self* base; ///< Modified map
80 Elm root; ///< Root node
81 Elm child; ///< Child node, formerly root of own class
82 bool new_height; ///< Did the root node's height change?
83
84 template<typename R, typename C>
85 AddTo(Self* b, R&& r, C&& c, bool h)
86 : base(b), root(std::forward<R>(r)), child(std::forward<C>(c)), new_height(h) {}
87 };
88
89 /// Underlying storage
90 union Data {
91 char none;
92 Base base;
93 Add add;
94 AddTo add_to;
95
96 Data() : none('\0') {}
97 ~Data() {}
98 } data;
99
100 /// Type of version node
101 mutable Mode mode;
102
103 /// get mutable reference as T
104 template<typename T>
105 T& as() { return reinterpret_cast<T&>(data); }
106
107 /// get const reference as T
108 template<typename T>
109 const T& as() const { return reinterpret_cast<const T&>(data); }
110
111 /// get rvalue reference as T
112 template<typename T>
113 T&& take_as() { return std::move(as<T>()); }
114
115 /// initialize as T
116 template<typename T, typename... Args>
117 void init( Args&&... args ) {
118 new( &as<T>() ) T { std::forward<Args>(args)... };
119 }
120
121 /// reset according to current mode
122 void reset() {
123 switch( mode ) {
124 case BASE: as<Base>().~Base(); break;
125 case ADD: case REM: as<Add>().~Add(); break;
126 case ADDTO: case REMFROM: as<AddTo>().~AddTo(); break;
127 default: assertf(false, "invalid mode");
128 }
129 }
130
131 /// Non-initializing constructor; should call init() before use
132 PersistentDisjointSet( Mode m ) : data(), mode(m) {}
133
134 PersistentDisjointSet( Mode m, Base&& b ) : data(), mode(m) {
135 assertf(m == BASE, "invalid mode");
136 init<Base>(std::move(b));
137 }
138
139 template<typename R>
140 PersistentDisjointSet( Mode m, const Self* b, R&& r ) : data(), mode(m) {
141 assertf(m == ADD || m == REM, "invalid mode");
142 init<Add>(b, std::forward<R>(r));
143 }
144
145 template<typename R, typename C>
146 PersistentDisjointSet( Mode m, const Self* b, R&& r, C&& c, bool h ) : data(), mode(m) {
147 assertf(m == ADDTO || m == REMFROM, "invalid mode");
148 init<AddTo>(b, std::forward<R>(r), std::forward<C>(c), h);
149 }
150
151 /// Adds (also removes) graph edges.
152 /// * `from.parent` updated to `new_root`,
153 /// * `from.next` and `to.next` swapped (splices or un-splices class lists)
154 /// * `to.height` adjusted by change
155 template<typename R>
156 static void addEdge( Node& from, Node& to, R&& new_root, Height change ) {
157 from.parent = std::forward<R>(new_root);
158 std::swap(from.next, to.next);
159 to.height += change;
160 }
161
162protected:
163 void trace( const GC& gc ) const {
164 switch( mode ) {
165 case BASE: {
166 for (const auto& entry : as<Base>()) {
167 gc << entry.first;
168 }
169 return;
170 }
171 case ADD: case REM: {
172 const Add& self = as<Add>();
173 gc << self.base << self.root;
174 return;
175 }
176 case ADDTO: case REMFROM: {
177 const AddTo& self = as<AddTo>();
178 gc << self.base << self.root << self.child;
179 return;
180 }
181 default: assertf(false, "invalid mode");
182 }
183 }
184
185public:
186 using size_type = std::size_t;
187
188 using iterator = typename Base::const_iterator;
189
190 PersistentDisjointSet() : data(), mode(BASE) { init<Base>(); }
191
192 PersistentDisjointSet( const Self& o ) = delete;
193
194 Self& operator= ( const Self& o ) = delete;
195
196 ~PersistentDisjointSet() { reset(); }
197
198 /// reroot persistent data structure at current node
199 void reroot() const {
200 if ( mode == BASE ) return;
201
202 // reroot base
203 Self* mut_this = const_cast<Self*>(this);
204 Self* base = ( mode == ADD || mode == REM ) ?
205 mut_this->as<Add>().base :
206 mut_this->as<AddTo>().base;
207 base->reroot();
208 assertf(base->mode == BASE, "reroot results in base");
209
210 // take map out of base
211 Base base_map = base->take_as<Base>();
212 base->reset();
213
214 // switch base to inverse of self and mutate base map
215 switch ( mode ) {
216 case ADD: {
217 Add& self = mut_this->as<Add>();
218
219 base->init<Add>( mut_this, self.root );
220 base->mode = REM;
221
222 base_map.emplace( self.root, Node{ std::move(self.root) } );
223 } break;
224 case REM: {
225 Add& self = mut_this->as<Add>();
226
227 base->init<Add>( mut_this, self.root );
228 base->mode = ADD;
229
230 base_map.erase( self.root );
231 } break;
232 case ADDTO: {
233 AddTo& self = mut_this->as<AddTo>();
234
235 base->init<AddTo>( mut_this, self.root, self.child, self.new_height );
236 base->mode = REMFROM;
237
238 auto child_it = base_map.find( self.child );
239 auto root_it = base_map.find( self.root );
240 assertf(child_it != base_map.end() && root_it != base_map.end(),
241 "nodes must exist in base");
242 Node& child = child_it->second;
243 Node& root = root_it->second;
244
245 addEdge( child, root, std::move(self.root), Height(self.new_height) );
246 } break;
247 case REMFROM: {
248 AddTo& self = mut_this->as<AddTo>();
249
250 base->init<AddTo>( mut_this, self.root, self.child, self.new_height );
251 base->mode = ADDTO;
252
253 auto child_it = base_map.find( self.child );
254 auto root_it = base_map.find( self.root );
255 assertf(child_it != base_map.end() && root_it != base_map.end(),
256 "nodes must exist in base");
257 Node& child = child_it->second;
258 Node& root = root_it->second;
259
260 addEdge( child, root, std::move(self.child), Height(-1 * self.new_height) );
261 } break;
262 default: assertf(false, "invalid mode");
263 }
264
265 // set base map into self
266 mut_this->reset();
267 mut_this->init<Base>( std::move(base_map) );
268 mode = BASE;
269 }
270
271private:
272 /// Gets the base after rerooting at the current node
273 const Base& rerooted() const {
274 reroot();
275 return as<Base>();
276 }
277
278public:
279 /// true if the set of sets is empty
280 bool empty() const { return rerooted().empty(); }
281
282 /// Get number of entries in the map
283 size_type size() const { return rerooted().size(); }
284
285 /// Get begin iterator for map; may be invalidated by calls to non-iteration functions
286 /// or functions on other maps in the same chain
287 iterator begin() const { return rerooted().begin(); }
288
289 /// Get end iterator for map; may be invalidated by calls to non-iteration functions
290 /// or functions on other maps in the same chain
291 iterator end() const { return rerooted().end(); }
292
293 /// Check if value is present
294 size_type count(Elm i) const { return rerooted().count( i ); }
295
296 /// Finds root for element i, undefined behaviour if i is not present
297 Elm find(Elm i) const {
298 const Base& self = rerooted();
299
300 auto it = self.find( i );
301 while (true) {
302 assertf(it != self.end(), "find target not present");
303
304 if ( it->first == it->second.parent ) return it->first;
305
306 it = self.find( it->second.parent );
307 }
308 }
309
310 /// Finds root for element i, or default if i is not present
311 template<typename E>
312 Elm find_or_default(Elm i, E&& d) const {
313 const Base& self = rerooted();
314
315 auto it = self.find( i );
316 if ( it == self.end() ) return d;
317
318 while ( it->first != it->second.parent ) {
319 it = self.find( it->second.parent );
320
321 assertf(it != self.end(), "find target not present");
322 }
323 return it->first;
324 }
325
326 /// Adds fresh class including only one item; returns updated map
327 template<typename E>
328 Self* add(E&& i) {
329 reroot();
330
331 // transfer map to new node
332 Self* ret = new Self{ BASE, take_as<Base>() };
333 reset();
334
335 // set self to REM node
336 init<Add>( ret, i );
337 mode = REM;
338
339 // add element in returned map
340 Base& base_map = ret->as<Base>();
341 bool added = base_map.emplace( i, Node{ std::forward<E>(i) } ).second;
342 assertf(added, "added element already present in map");
343
344 return ret;
345 }
346
347 /// Merges two classes given by their roots; returns updated map.
348 /// If two classes have same height, `i` is new root.
349 Self* merge(Elm i, Elm j) {
350 reroot();
351
352 // transfer map to new node
353 Self* ret = new Self{ BASE, take_as<Base>() };
354 reset();
355
356 // find set nodes
357 Base& base_map = ret->as<Base>();
358 auto it = base_map.find( i );
359 auto jt = base_map.find( j );
360 assertf(it != base_map.end() && jt != base_map.end(), "nodes must exist in base");
361 Node& in = it->second;
362 Node& jn = jt->second;
363
364 // update returned map and set self to appropriate REMFROM node
365 if ( in.height < jn.height ) {
366 addEdge( in, jn, j, 0 );
367 init<AddTo>( ret, j, i, false );
368 } else if ( jn.height < in.height ) {
369 addEdge( jn, in, i, 0 );
370 init<AddTo>( ret, i, j, false );
371 } else /* if ( jn.height == in.height ) */ {
372 addEdge( jn, in, i, 1 );
373 init<AddTo>( ret, i, j, true );
374 }
375 mode = REMFROM;
376
377 return ret;
378 }
379
380private:
381 /// Removes the children of the tree rooted at `root` as `root_node`, returning a new
382 /// uninitialized node and editing `next_edit` to point toward this new node.
383 static Self* remove_children(Elm root, Node& root_node, Base& base_map, Self* next_edit) {
384 // Invariant: root.next is *always* a pointer to a leaf of the most-recently added
385 // subtree.
386 //
387 // Proof: By induction on height:
388 // * height == 0: root.next == root, trivially true
389 // * added.height < root.height: true by ind. hyp.
390 // * added.height == root.height: no node may have a child the same height, ergo
391 // property holds for added, therefore root.
392 //
393 // Corollary: next of most-recently-added subtree root is previously added subtree
394
395 // remove all subtrees
396 while ( root != root_node.next ) {
397 // find added child
398 auto it = base_map.find( root_node.next );
399 while ( it->second.parent != root ) it = base_map.find( it->second.parent );
400 Elm added = it->first;
401 Node& added_node = it->second;
402
403 // unlink subtree and set up previous map as ADDTO new map
404 std::swap( root_node.next, added_node.next );
405 Self* new_edit = new Self{ BASE };
406 next_edit->init<AddTo>( new_edit, root, added, false ); // assume unchanged root height
407 next_edit->mode = ADDTO;
408
409 // remove subtree children from map
410 next_edit = remove_children( added, added_node, base_map, new_edit );
411
412 // remove subtree root from map and set up previous map as ADD to new map
413 base_map.erase( added );
414 new_edit = new Self{ BASE };
415 next_edit->init<Add>( new_edit, added );
416 next_edit->mode = ADD;
417
418 next_edit = new_edit;
419 }
420
421 return next_edit;
422 }
423
424public:
425 /// Removes all elements of a class given by its root; returns updated map.
426 Self* remove_class(Elm root) {
427 reroot();
428
429 // remove map from self
430 Base base_map = take_as<Base>();
431 reset_as_base();
432
433 // find root node and remove children
434 auto it = base_map.find( root );
435 assertf(it != base_map.end(), "root node must exist in map");
436 Self* next_edit = remove_children( root, it->second, base_map, this);
437
438 // root is alone in class, remove from map
439 base_map.erase( root );
440 Self* ret = new Self{ BASE, std::move(base_map) };
441 // reinitialize previous node as ADD to new map
442 next_edit->init<Add>( ret, root );
443 next_edit->mode = ADD;
444
445 return ret;
446 }
447
448 /// Applies `f` to all members of a class containing `i` (undefined behaviour if not present).
449 /// `f` should take members by const reference or copy
450 template<typename F>
451 void apply_to_class(Elm i, F&& f) const {
452 const Base& self = rerooted();
453
454 Elm crnt = i;
455 do {
456 f( crnt );
457 auto it = self.find( crnt );
458 assertf( it != self.end(), "current node must exist in base" );
459 crnt = it->second.next;
460 } while ( crnt != i );
461 }
462
463 /// Get version node type
464 Mode get_mode() const { return mode; }
465
466 /// Get next version up the revision tree (self if base node)
467 const Self* get_base() const {
468 switch ( mode ) {
469 case BASE: return this;
470 case ADD: case REM: return as<Add>().base;
471 case ADDTO: case REMFROM: return as<AddTo>().base;
472 default: assertf(false, "invalid mode");
473 }
474 }
475
476 /// Get root of new class formed/removed/split (undefined if called on base)
477 Elm get_root() const {
478 switch ( mode ) {
479 case ADD: case REM: return as<Add>().root;
480 case ADDTO: case REMFROM: return as<AddTo>().root;
481 default: assertf(false, "invalid mode for get_root()");
482 }
483 }
484
485 /// Get child root of new class formed/split (undefined if called on base or add/remove node)
486 Elm get_child() const {
487 switch ( mode ) {
488 case ADDTO: case REMFROM: return as<AddTo>().child;
489 default: assertf(false, "invalid mode for get_child()");
490 }
491 }
492
493 /// Gets acted-upon key for new class (root unless for add/remove node, child for add to/remove
494 /// from node, undefined otherwise)
495 Elm get_key() const {
496 switch ( mode ) {
497 case ADD: case REM: return as<Add>().root;
498 case ADDTO: case REMFROM: return as<AddTo>().child;
499 default: assertf(false, "invalid mode for get_key()");
500 }
501 }
502};
503
504// Local Variables: //
505// tab-width: 4 //
506// mode: c++ //
507// compile-command: "make install" //
508// End: //
Note: See TracBrowser for help on using the repository browser.