Index: libcfa/src/bits/defs.hfa
===================================================================
--- libcfa/src/bits/defs.hfa	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ libcfa/src/bits/defs.hfa	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -26,4 +26,5 @@
 
 typedef void (* fptr_t)();
+typedef uint64_t func_id_t;
 typedef int_fast16_t __lock_size_t;
 
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ libcfa/src/concurrency/invoke.h	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -159,4 +159,7 @@
 		// last function that acquired monitors
 		fptr_t func;
+
+		// hash-based function identity for cross-TU matching
+		func_id_t func_id;
 	};
 
@@ -288,10 +291,12 @@
 			(this.size){0};
 			(this.func){NULL};
-		}
-
-		static inline void ?{}(__monitor_group_t & this, struct monitor$ ** data, __lock_size_t size, fptr_t func) {
+			(this.func_id){0};
+		}
+
+		static inline void ?{}(__monitor_group_t & this, struct monitor$ ** data, __lock_size_t size, fptr_t func, func_id_t func_id) {
 			(this.data){data};
 			(this.size){size};
 			(this.func){func};
+			(this.func_id){func_id};
 		}
 
@@ -299,5 +304,12 @@
 			if( (lhs.data != 0) != (rhs.data != 0) ) return false;
 			if( lhs.size != rhs.size ) return false;
-			if( lhs.func != rhs.func ) return false;
+			// Use hash-based comparison when both sides have a valid func_id,
+			// otherwise fall back to function pointer comparison (for library code
+			// like join() that cannot compute the hash at compile time).
+			if( lhs.func_id != 0 && rhs.func_id != 0 ) {
+				if( lhs.func_id != rhs.func_id ) return false;
+			} else {
+				if( lhs.func != rhs.func ) return false;
+			}
 
 			// Check that all the monitors match
@@ -314,4 +326,5 @@
 			lhs.size = rhs.size;
 			lhs.func = rhs.func;
+			lhs.func_id = rhs.func_id;
 		}
 	}
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -517,5 +517,5 @@
 	doregister(curr_cluster, this);
 
-	monitors{ &self_mon_p, 1, (fptr_t)0 };
+	monitors{ &self_mon_p, 1, (fptr_t)0, (func_id_t)0 };
 }
 
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ libcfa/src/concurrency/monitor.cfa	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -140,5 +140,5 @@
 }
 
-static void __dtor_enter( monitor$ * this, fptr_t func, bool join ) {
+static void __dtor_enter( monitor$ * this, fptr_t func, func_id_t func_id, bool join ) {
 	thread$ * thrd = active_thread();
 	#if defined( __CFA_WITH_VERIFY__ )
@@ -190,5 +190,5 @@
 	__lock_size_t count = 1;
 	monitor$ ** monitors = &this;
-	__monitor_group_t group = { &this, 1, func };
+	__monitor_group_t group = { &this, 1, func, func_id };
 	if ( is_accepted( this, group) ) {
 		__cfaabi_dbg_print_safe( "Kernel : mon accepts dtor, block and signal it \n" );
@@ -341,5 +341,5 @@
 // Ctor for monitor guard
 // Sorts monitors before entering
-void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count, fptr_t func ) libcfa_public {
+void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count, fptr_t func, func_id_t func_id ) libcfa_public {
 	thread$ * thrd = active_thread();
 
@@ -355,10 +355,10 @@
 
 	// Update thread context (needed for conditions)
-	(thrd->monitors){m, count, func};
+	(thrd->monitors){m, count, func, func_id};
 
 	// __cfaabi_dbg_print_safe( "MGUARD : enter %d\n", count);
 
 	// Enter the monitors in order
-	__monitor_group_t group = {this.m, this.count, func};
+	__monitor_group_t group = {this.m, this.count, func, func_id};
 	enter( group );
 
@@ -367,5 +367,5 @@
 
 void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count ) libcfa_public {
-	this{ m, count, 0p };
+	this{ m, count, 0p, 0 };
 }
 
@@ -386,5 +386,5 @@
 // Ctor for monitor guard
 // Sorts monitors before entering
-void ?{}( monitor_dtor_guard_t & this, monitor$ * m [], fptr_t func, bool join ) libcfa_public {
+void ?{}( monitor_dtor_guard_t & this, monitor$ * m [], fptr_t func, func_id_t func_id, bool join ) libcfa_public {
 	// optimization
 	thread$ * thrd = active_thread();
@@ -400,7 +400,7 @@
 
 	// Update thread context (needed for conditions)
-	(thrd->monitors){m, 1, func};
-
-	__dtor_enter( this.m, func, join );
+	(thrd->monitors){m, 1, func, func_id};
+
+	__dtor_enter( this.m, func, func_id, join );
 }
 
Index: libcfa/src/concurrency/monitor.hfa
===================================================================
--- libcfa/src/concurrency/monitor.hfa	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ libcfa/src/concurrency/monitor.hfa	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -48,5 +48,5 @@
 };
 
-void ?{}( monitor_guard_t & this, monitor$ ** m, __lock_size_t count, void (*func)() );
+void ?{}( monitor_guard_t & this, monitor$ ** m, __lock_size_t count, void (*func)(), func_id_t func_id );
 void ?{}( monitor_guard_t & this, monitor$ ** m, __lock_size_t count );
 void ^?{}( monitor_guard_t & this );
@@ -58,5 +58,5 @@
 };
 
-void ?{}( monitor_dtor_guard_t & this, monitor$ ** m, void (*func)(), bool join );
+void ?{}( monitor_dtor_guard_t & this, monitor$ ** m, void (*func)(), func_id_t func_id, bool join );
 void ^?{}( monitor_dtor_guard_t & this );
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ libcfa/src/concurrency/thread.cfa	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -61,5 +61,5 @@
 
 	doregister(curr_cluster, this);
-	monitors{ &self_mon_p, 1, (fptr_t)0 };
+	monitors{ &self_mon_p, 1, (fptr_t)0, (func_id_t)0 };
 }
 
@@ -93,5 +93,5 @@
     | { EHM_DEFAULT_VTABLE(ThreadCancelled(T)); })
 void ?{}( thread_dtor_guard_t & this,
-		T & thrd, void(*cancelHandler)(ThreadCancelled(T) &)) {
+		T & thrd, func_id_t func_id, void(*cancelHandler)(ThreadCancelled(T) &)) {
 	monitor$ * m = get_monitor(thrd);
 	thread$ * desc = get_thread(thrd);
@@ -100,5 +100,6 @@
 	void (*dtor)(T& mutex this) = ^?{};
 	bool join = cancelHandler != (void(*)(ThreadCancelled(T)&))0;
-	(this.mg){&m, (void(*)())dtor, join};
+	this.func_id = func_id;
+	(this.mg){&m, (void(*)())dtor, func_id, join};
 
 
@@ -172,5 +173,5 @@
 	| { EHM_DEFAULT_VTABLE(ThreadCancelled(T)); })
 T & join( T & this ) {
-	thread_dtor_guard_t guard = { this, defaultResumptionHandler };
+	thread_dtor_guard_t guard = { this, (func_id_t)0, defaultResumptionHandler };
 	return this;
 }
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ libcfa/src/concurrency/thread.hfa	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -83,9 +83,10 @@
 struct thread_dtor_guard_t {
 	monitor_dtor_guard_t mg;
+	func_id_t func_id;
 };
 
 forall( T & | is_thread(T) | IS_EXCEPTION(ThreadCancelled(T))
 	| { EHM_DEFAULT_VTABLE(ThreadCancelled(T)); })
-void ?{}( thread_dtor_guard_t & this, T & thrd, void(*)(ThreadCancelled(T) &) );
+void ?{}( thread_dtor_guard_t & this, T & thrd, func_id_t func_id, void(*)(ThreadCancelled(T) &) );
 void ^?{}( thread_dtor_guard_t & this );
 
Index: src/Concurrency/Keywords.cpp
===================================================================
--- src/Concurrency/Keywords.cpp	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ src/Concurrency/Keywords.cpp	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -29,4 +29,5 @@
 #include "Common/Examine.hpp"
 #include "Common/Utility.hpp"
+#include "Concurrency/MutexFuncHash.hpp"
 #include "Common/UniqueName.hpp"
 #include "ControlStruct/LabelGenerator.hpp"
@@ -1077,5 +1078,5 @@
 
 	// In reverse order:
-	// monitor_dtor_guard_t __guard = { __monitor, func, false };
+	// monitor_dtor_guard_t __guard = { __monitor, func, func_id, false };
 	mutBody->push_front(
 		new ast::DeclStmt( location, new ast::ObjectDecl(
@@ -1094,4 +1095,6 @@
 							generic_func,
 							ast::ExplicitCast ) ),
+					new ast::SingleInit( location,
+						Concurrency::hashMangleExpr( location, func ) ),
 					new ast::SingleInit( location,
 						ast::ConstantExpr::from_bool( location, false ) ),
@@ -1175,4 +1178,6 @@
 						ast::ExplicitCast
 					) ),
+					new ast::SingleInit( location,
+						Concurrency::hashMangleExpr( location, func ) ),
 				},
 				{},
@@ -1475,5 +1480,5 @@
 
 ast::CompoundStmt * MutexKeyword::addThreadDtorStatements(
-		const ast::FunctionDecl*, const ast::CompoundStmt * body,
+		const ast::FunctionDecl* func, const ast::CompoundStmt * body,
 		const std::vector<const ast::DeclWithType * > & args ) {
 	assert( args.size() == 1 );
@@ -1487,5 +1492,5 @@
 	const CodeLocation & location = mutBody->location;
 
-	// thread_dtor_guard_t __guard = { this, intptr( 0 ) };
+	// thread_dtor_guard_t __guard = { this, func_id, intptr( 0 ) };
 	mutBody->push_front( new ast::DeclStmt(
 		location,
@@ -1500,4 +1505,6 @@
 						new ast::CastExpr( location,
 							new ast::VariableExpr( location, arg ), argType ) ),
+					new ast::SingleInit( location,
+						Concurrency::hashMangleExpr( location, func ) ),
 					new ast::SingleInit(
 						location,
Index: src/Concurrency/MutexFuncHash.hpp
===================================================================
--- src/Concurrency/MutexFuncHash.hpp	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
+++ src/Concurrency/MutexFuncHash.hpp	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -0,0 +1,50 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// MutexFuncHash.hpp -- Hash utility for mutex function identity.
+//
+// Author           : Matthew Au-Yeung
+// Created On       : Tue Jan 28 2026
+//
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "AST/Decl.hpp"
+#include "AST/Expr.hpp"
+#include "AST/Type.hpp"
+#include "SymTab/Mangler.hpp"
+
+namespace Concurrency {
+
+// FNV-1a hash of a function declaration's mangled name.
+// Used to identify mutex functions across translation units,
+// since function pointers may differ for static inline functions.
+static inline uint64_t hashMangle( const ast::DeclWithType * decl ) {
+	std::string name = Mangle::mangle( decl );
+	uint64_t hash = 14695981039346656037ULL; // FNV offset basis
+	for ( char c : name ) {
+		hash ^= static_cast<uint64_t>( c );
+		hash *= 1099511628211ULL; // FNV prime
+	}
+	return hash;
+}
+
+// Create a ConstantExpr for the hash with proper ULL suffix to avoid
+// C compiler warnings about large unsigned constants.
+static inline ast::ConstantExpr * hashMangleExpr(
+		const CodeLocation & location, const ast::DeclWithType * decl ) {
+	uint64_t hash = hashMangle( decl );
+	return new ast::ConstantExpr{
+		location,
+		new ast::BasicType{ ast::BasicKind::LongLongUnsignedInt },
+		std::to_string( hash ) + "ull",
+		(unsigned long long)hash };
+}
+
+} // namespace Concurrency
Index: src/Concurrency/Waitfor.cpp
===================================================================
--- src/Concurrency/Waitfor.cpp	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ src/Concurrency/Waitfor.cpp	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -22,4 +22,5 @@
 #include "InitTweak/InitTweak.hpp"
 #include "ResolvExpr/Resolver.hpp"
+#include "Concurrency/MutexFuncHash.hpp"
 
 #include "AST/Print.hpp"
@@ -331,4 +332,8 @@
 		makeAccStmt( location, acceptables, index, "func",
 			funcExpr, context ),
+		makeAccStmt( location, acceptables, index, "func_id",
+			Concurrency::hashMangleExpr( location,
+				variableExpr->var.strict_as<ast::DeclWithType>() ),
+			context ),
 		makeAccStmt( location, acceptables, index, "data",
 			new ast::VariableExpr( location, monitors ), context ),
Index: src/Validate/Autogen.cpp
===================================================================
--- src/Validate/Autogen.cpp	(revision 507fc974654efc19152e0b3edf2539a9bf5c720f)
+++ src/Validate/Autogen.cpp	(revision a30fceb1a73c4ef2bbee39a2b5406da881f51111)
@@ -402,17 +402,5 @@
 	}
 
-	ast::FunctionDecl * decl = genProto( "^?{}", { dst }, {} );
-	// For concurrent types, remove static storage and inline specifier, and add
-	// cfa_linkonce attribute so the destructor has external linkage with linkonce
-	// semantics. This is required for waitfor to work correctly across translation
-	// units - the function pointer must be the same everywhere, and cfa_linkonce
-	// ensures only one definition survives linking.
-	if ( isConcurrentType() ) {
-		auto mut = ast::mutate( decl );
-		mut->storage = ast::Storage::Classes();
-		mut->funcSpec = ast::Function::Specs();
-		mut->attributes.push_back( new ast::Attribute( "cfa_linkonce" ) );
-	}
-	return decl;
+	return genProto( "^?{}", { dst }, {} );
 }
 
