Index: libcfa/src/concurrency/actor.hfa
===================================================================
--- libcfa/src/concurrency/actor.hfa	(revision 858350aeda11d90bc7dba1d54c8df962b4643ed5)
+++ libcfa/src/concurrency/actor.hfa	(revision f23d34db26cec6c6f32cf80f98f35530197c7526)
@@ -175,5 +175,7 @@
     volatile unsigned long long stamp;
     #ifdef STATS
-    size_t stolen_from;
+    size_t stolen_from, try_steal, stolen, failed_swaps, msgs_stolen;
+    unsigned long long processed;
+    size_t gulps;
     #endif
 };
@@ -181,12 +183,18 @@
     #ifdef STATS
     this.stolen_from = 0;
+    this.try_steal = 0;                             // attempts to steal
+    this.stolen = 0;                                // successful steals
+    this.processed = 0;                             // requests processed
+    this.gulps = 0;                                 // number of gulps
+    this.failed_swaps = 0;                          // steal swap failures
+    this.msgs_stolen = 0;                           // number of messages stolen
     #endif
     this.stamp = rdtscl();
 }
 
-#ifdef STATS
-unsigned int * stolen_arr;
-unsigned int * replaced_queue;
-#endif
+// #ifdef STATS
+// unsigned int * stolen_arr;
+// unsigned int * replaced_queue;
+// #endif
 thread worker {
     work_queue ** request_queues;
@@ -195,15 +203,10 @@
     unsigned int start, range;
     int id;
-    #ifdef STATS
-    size_t try_steal, stolen, failed_swaps, msgs_stolen;
-    unsigned long long processed;
-    size_t gulps;
-    #endif
 };
 
 #ifdef STATS
 // aggregate counters for statistics
-size_t total_tries = 0, total_stolen = 0, total_workers, all_gulps = 0,
-    total_failed_swaps = 0, all_processed = 0, __num_actors_stats = 0, all_msgs_stolen = 0;
+size_t __total_tries = 0, __total_stolen = 0, __total_workers, __all_gulps = 0,
+    __total_failed_swaps = 0, __all_processed = 0, __num_actors_stats = 0, __all_msgs_stolen = 0;
 #endif
 static inline void ?{}( worker & this, cluster & clu, work_queue ** request_queues, copy_queue * current_queue, executor * executor_,
@@ -216,12 +219,4 @@
     this.range = range;                             // size of worker's subrange of request_queues
     this.id = id;                                   // worker's id and index in array of workers
-    #ifdef STATS
-    this.try_steal = 0;                             // attempts to steal
-    this.stolen = 0;                                // successful steals
-    this.processed = 0;                             // requests processed
-    this.gulps = 0;                                 // number of gulps
-    this.failed_swaps = 0;                          // steal swap failures
-    this.msgs_stolen = 0;                           // number of messages stolen
-    #endif
 }
 
@@ -244,10 +239,10 @@
 static inline void ^?{}( worker & mutex this ) with(this) { 
     #ifdef STATS
-    __atomic_add_fetch(&all_gulps, gulps,__ATOMIC_SEQ_CST);
-    __atomic_add_fetch(&all_processed, processed,__ATOMIC_SEQ_CST);
-    __atomic_add_fetch(&all_msgs_stolen, msgs_stolen,__ATOMIC_SEQ_CST);
-    __atomic_add_fetch(&total_tries, try_steal, __ATOMIC_SEQ_CST);
-    __atomic_add_fetch(&total_stolen, stolen, __ATOMIC_SEQ_CST);
-    __atomic_add_fetch(&total_failed_swaps, failed_swaps, __ATOMIC_SEQ_CST);
+    __atomic_add_fetch(&__all_gulps, executor_->w_infos[id].gulps,__ATOMIC_SEQ_CST);
+    __atomic_add_fetch(&__all_processed, executor_->w_infos[id].processed,__ATOMIC_SEQ_CST);
+    __atomic_add_fetch(&__all_msgs_stolen, executor_->w_infos[id].msgs_stolen,__ATOMIC_SEQ_CST);
+    __atomic_add_fetch(&__total_tries, executor_->w_infos[id].try_steal, __ATOMIC_SEQ_CST);
+    __atomic_add_fetch(&__total_stolen, executor_->w_infos[id].stolen, __ATOMIC_SEQ_CST);
+    __atomic_add_fetch(&__total_failed_swaps, executor_->w_infos[id].failed_swaps, __ATOMIC_SEQ_CST);
 
     // per worker steal stats (uncomment alongside the lock above this routine to print)
@@ -281,7 +276,7 @@
     
     #ifdef STATS
-    stolen_arr = aalloc( nrqueues );
-    replaced_queue = aalloc( nrqueues );
-    total_workers = nworkers;
+    // stolen_arr = aalloc( nrqueues );
+    // replaced_queue = aalloc( nrqueues );
+    __total_workers = nworkers;
     #endif
 
@@ -350,6 +345,6 @@
         misses += worker_req_queues[i]->missed;
     }
-    adelete( stolen_arr );
-    adelete( replaced_queue );
+    // adelete( stolen_arr );
+    // adelete( replaced_queue );
     #endif
 
@@ -364,11 +359,11 @@
     #ifdef STATS
     printf("    Actor System Stats:\n");
-    printf("\tActors Created:\t\t\t\t%lu\n\tMessages Sent:\t\t\t\t%lu\n", __num_actors_stats, all_processed);
-    size_t avg_gulps = all_gulps == 0 ? 0 : all_processed / all_gulps;
-    printf("\tGulps:\t\t\t\t\t%lu\n\tAverage Gulp Size:\t\t\t%lu\n\tMissed gulps:\t\t\t\t%lu\n", all_gulps, avg_gulps, misses);
+    printf("\tActors Created:\t\t\t\t%lu\n\tMessages Sent:\t\t\t\t%lu\n", __num_actors_stats, __all_processed);
+    size_t avg_gulps = __all_gulps == 0 ? 0 : __all_processed / __all_gulps;
+    printf("\tGulps:\t\t\t\t\t%lu\n\tAverage Gulp Size:\t\t\t%lu\n\tMissed gulps:\t\t\t\t%lu\n", __all_gulps, avg_gulps, misses);
     printf("\tSteal attempts:\t\t\t\t%lu\n\tSteals:\t\t\t\t\t%lu\n\tSteal failures (no candidates):\t\t%lu\n\tSteal failures (failed swaps):\t\t%lu\n", 
-        total_tries, total_stolen, total_tries - total_stolen - total_failed_swaps, total_failed_swaps);
-    size_t avg_steal = total_stolen == 0 ? 0 : all_msgs_stolen / total_stolen;
-    printf("\tMessages stolen:\t\t\t%lu\n\tAverage steal size:\t\t\t%lu\n", all_msgs_stolen, avg_steal);
+        __total_tries, __total_stolen, __total_tries - __total_stolen - __total_failed_swaps, __total_failed_swaps);
+    size_t avg_steal = __total_stolen == 0 ? 0 : __all_msgs_stolen / __total_stolen;
+    printf("\tMessages stolen:\t\t\t%lu\n\tAverage steal size:\t\t\t%lu\n", __all_msgs_stolen, avg_steal);
     #endif
         
@@ -443,6 +438,7 @@
 
 static inline void check_message( message & this ) {
+    CFA_DEBUG( this.allocation_ = Finished; )
     switch ( this.allocation_ ) {						// analyze message status
-        case Nodelete: CFA_DEBUG( this.allocation_ = Finished; ) break;
+        case Nodelete: break;
         case Delete: delete( &this ); break;
         case Destroy: ^?{}(this); break;
@@ -512,11 +508,11 @@
         curr_steal_queue = try_swap_queues( this, i + vic_start, swap_idx );
         if ( curr_steal_queue ) {
-            msgs_stolen += curr_steal_queue->c_queue->count;
-            stolen++;
-            __atomic_add_fetch(&executor_->w_infos[victim_id].stolen_from, 1, __ATOMIC_RELAXED);
-            replaced_queue[swap_idx]++;
-            __atomic_add_fetch(&stolen_arr[ i + vic_start ], 1, __ATOMIC_RELAXED);
+            executor_->w_infos[id].msgs_stolen += curr_steal_queue->c_queue->count;
+            executor_->w_infos[id].stolen++;
+            // __atomic_add_fetch(&executor_->w_infos[victim_id].stolen_from, 1, __ATOMIC_RELAXED);
+            // replaced_queue[swap_idx]++;
+            // __atomic_add_fetch(&stolen_arr[ i + vic_start ], 1, __ATOMIC_RELAXED);
         } else {
-            failed_swaps++;
+            executor_->w_infos[id].failed_swaps++;
         }
         #else
@@ -554,10 +550,10 @@
 
 void main( worker & this ) with(this) {
-    #ifdef STATS
-    for ( i; executor_->nrqueues ) {
-        replaced_queue[i] = 0;
-        __atomic_store_n( &stolen_arr[i], 0, __ATOMIC_SEQ_CST );
-    }
-    #endif
+    // #ifdef STATS
+    // for ( i; executor_->nrqueues ) {
+    //     replaced_queue[i] = 0;
+    //     __atomic_store_n( &stolen_arr[i], 0, __ATOMIC_SEQ_CST );
+    // }
+    // #endif
 
     // threshold of empty queues we see before we go stealing
@@ -586,5 +582,5 @@
         transfer( *curr_work_queue, &current_queue );
         #ifdef STATS
-        gulps++;
+        executor_->w_infos[id].gulps++;
         #endif // STATS
         #ifdef __STEAL
@@ -598,5 +594,5 @@
             
             #ifdef STATS
-            try_steal++;
+            executor_->w_infos[id].try_steal++;
             #endif // STATS
             
@@ -607,5 +603,5 @@
         while ( ! isEmpty( *current_queue ) ) {
             #ifdef STATS
-            processed++;
+            executor_->w_infos[id].processed++;
             #endif
             &req = &remove( *current_queue );
@@ -633,5 +629,18 @@
 }
 
+static inline void __reset_stats() {
+    #ifdef STATS
+    __total_tries = 0;
+    __total_stolen = 0;
+    __all_gulps = 0;
+    __total_failed_swaps = 0;
+    __all_processed = 0;
+    __num_actors_stats = 0;
+    __all_msgs_stolen = 0;
+    #endif
+}
+
 static inline void start_actor_system( size_t num_thds ) {
+    __reset_stats();
     __actor_executor_thd = active_thread();
     __actor_executor_ = alloc();
@@ -639,5 +648,5 @@
 }
 
-// TODO: potentially getting revisit number of processors
+// TODO: potentially revisit getting number of processors
 //  ( currently the value stored in active_cluster()->procs.total is often stale 
 //  and doesn't reflect how many procs are allocated )
@@ -646,4 +655,5 @@
 
 static inline void start_actor_system( executor & this ) {
+    __reset_stats();
     __actor_executor_thd = active_thread();
     __actor_executor_ = &this;
