Changeset 58fe85a for libcfa


Ignore:
Timestamp:
Jan 7, 2021, 3:27:00 PM (5 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
Children:
2b4daf2, 64aeca0
Parents:
3c64c668 (diff), eef8dfb (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into park_unpark

Location:
libcfa
Files:
43 added
18 deleted
47 edited
1 moved

Legend:

Unmodified
Added
Removed
  • libcfa/configure.ac

    r3c64c668 r58fe85a  
    33
    44AC_PREREQ([2.68])
    5 AC_INIT([cfa-cc],[1.0.0.0],[cforall@plg.uwaterloo.ca])
     5AC_INIT([cfa-cc],[1.0.0],[cforall@plg.uwaterloo.ca])
    66AC_CONFIG_AUX_DIR([automake])
    77AC_CONFIG_MACRO_DIRS([automake])
    88AM_SILENT_RULES([yes])
    99
    10 m4_include([../automake/cfa.m4])
     10m4_include([../tools/build/cfa.m4])
    1111
    1212AM_INIT_AUTOMAKE([subdir-objects])
     
    3030        [  --enable-distcc     whether or not to enable distributed compilation],
    3131        enable_distcc=$enableval, enable_distcc=no)
     32
     33AC_ARG_WITH(bwlimit,
     34        [  --with-bwlimit=RATE     RATE the maximum rate at which rsync will be limited when using distributed builds],
     35        DIST_BWLIMIT=$withval, DIST_BWLIMIT=0)
    3236
    3337echo -n "checking for distributated build... "
     
    5559AC_SUBST(CFADIR_HASH)
    5660AC_SUBST(CFA_VERSION)
     61AC_SUBST(DIST_BWLIMIT)
    5762
    5863#==============================================================================
     
    100105AM_CONDITIONAL([BUILDLIB], [test "x${CONFIG_BUILDLIB}" = "xyes"])
    101106
     107AM_T='$(T)'
     108AC_SUBST(AM_T)
     109
    102110#==============================================================================
    103111#Trasforming cc1 will break compilation
     
    109117
    110118# Checks for programs.
    111 LT_INIT
     119LT_INIT([disable-static])
    112120
    113121AC_PROG_CXX
     
    118126AC_PROG_MAKE_SET
    119127
     128
     129
     130#io_uring 5.4 and earlier uses defines
     131#io_uring 5.5 uses enum values
     132#io_uring 5.6 and later uses probes
     133
     134AH_TEMPLATE([CFA_HAVE_LINUX_IO_URING_H],[Defined if io_uring support is present when compiling libcfathread.])
     135AH_TEMPLATE([CFA_HAVE_IORING_OP_NOP],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_NOP.])
     136AH_TEMPLATE([CFA_HAVE_IORING_OP_READV],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_READV.])
     137AH_TEMPLATE([CFA_HAVE_IORING_OP_WRITEV],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_WRITEV.])
     138AH_TEMPLATE([CFA_HAVE_IORING_OP_FSYNC],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_FSYNC.])
     139AH_TEMPLATE([CFA_HAVE_IORING_OP_READ_FIXED],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_READ_FIXED.])
     140AH_TEMPLATE([CFA_HAVE_IORING_OP_WRITE_FIXED],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_WRITE_FIXED.])
     141AH_TEMPLATE([CFA_HAVE_IORING_OP_POLL_ADD],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_POLL_ADD.])
     142AH_TEMPLATE([CFA_HAVE_IORING_OP_POLL_REMOVE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_POLL_REMOVE.])
     143AH_TEMPLATE([CFA_HAVE_IORING_OP_SYNC_FILE_RANGE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_SYNC_FILE_RANGE.])
     144AH_TEMPLATE([CFA_HAVE_IORING_OP_SENDMSG],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_SENDMSG.])
     145AH_TEMPLATE([CFA_HAVE_IORING_OP_RECVMSG],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_RECVMSG.])
     146AH_TEMPLATE([CFA_HAVE_IORING_OP_TIMEOUT],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_TIMEOUT.])
     147AH_TEMPLATE([CFA_HAVE_IORING_OP_TIMEOUT_REMOVE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_TIMEOUT_REMOVE.])
     148AH_TEMPLATE([CFA_HAVE_IORING_OP_ACCEPT],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_ACCEPT.])
     149AH_TEMPLATE([CFA_HAVE_IORING_OP_ASYNC_CANCEL],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_ASYNC_CANCEL.])
     150AH_TEMPLATE([CFA_HAVE_IORING_OP_LINK_TIMEOUT],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_LINK_TIMEOUT.])
     151AH_TEMPLATE([CFA_HAVE_IORING_OP_CONNECT],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_CONNECT.])
     152AH_TEMPLATE([CFA_HAVE_IORING_OP_FALLOCATE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_FALLOCATE.])
     153AH_TEMPLATE([CFA_HAVE_IORING_OP_OPENAT],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_OPENAT.])
     154AH_TEMPLATE([CFA_HAVE_IORING_OP_CLOSE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_CLOSE.])
     155AH_TEMPLATE([CFA_HAVE_IORING_OP_FILES_UPDATE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_FILES_UPDATE.])
     156AH_TEMPLATE([CFA_HAVE_IORING_OP_STATX],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_STATX.])
     157AH_TEMPLATE([CFA_HAVE_IORING_OP_READ],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_READ.])
     158AH_TEMPLATE([CFA_HAVE_IORING_OP_WRITE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_WRITE.])
     159AH_TEMPLATE([CFA_HAVE_IORING_OP_FADVISE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_FADVISE.])
     160AH_TEMPLATE([CFA_HAVE_IORING_OP_MADVISE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_MADVISE.])
     161AH_TEMPLATE([CFA_HAVE_IORING_OP_SEND],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_SEND.])
     162AH_TEMPLATE([CFA_HAVE_IORING_OP_RECV],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_RECV.])
     163AH_TEMPLATE([CFA_HAVE_IORING_OP_OPENAT2],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_OPENAT2.])
     164AH_TEMPLATE([CFA_HAVE_IORING_OP_EPOLL_CTL],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_EPOLL_CTL.])
     165AH_TEMPLATE([CFA_HAVE_IORING_OP_SPLICE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_SPLICE.])
     166AH_TEMPLATE([CFA_HAVE_IORING_OP_PROVIDE_BUFFERS],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_PROVIDE_BUFFERS.])
     167AH_TEMPLATE([CFA_HAVE_IORING_OP_REMOVE_BUFFER],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_REMOVE_BUFFER.])
     168AH_TEMPLATE([CFA_HAVE_IORING_OP_TEE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_TEE.])
     169AH_TEMPLATE([CFA_HAVE_IOSQE_FIXED_FILE],[Defined if io_uring support is present when compiling libcfathread and supports the flag FIXED_FILE.])
     170AH_TEMPLATE([CFA_HAVE_IOSQE_IO_DRAIN],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_DRAIN.])
     171AH_TEMPLATE([CFA_HAVE_IOSQE_ASYNC],[Defined if io_uring support is present when compiling libcfathread and supports the flag ASYNC.])
     172AH_TEMPLATE([CFA_HAVE_IOSQE_IO_LINK],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_LINK.])
     173AH_TEMPLATE([CFA_HAVE_IOSQE_IO_HARDLINK],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_HARDLINK.])
     174AH_TEMPLATE([CFA_HAVE_SPLICE_F_FD_IN_FIXED],[Defined if io_uring support is present when compiling libcfathread and supports the flag SPLICE_F_FD_IN_FIXED.])
     175AH_TEMPLATE([CFA_HAVE_IORING_SETUP_ATTACH_WQ],[Defined if io_uring support is present when compiling libcfathread and supports the flag IORING_SETUP_ATTACH_WQ.])
     176AH_TEMPLATE([CFA_HAVE_PREADV2],[Defined if preadv2 support is present when compiling libcfathread.])
     177AH_TEMPLATE([CFA_HAVE_PWRITEV2],[Defined if pwritev2 support is present when compiling libcfathread.])
     178AH_TEMPLATE([CFA_HAVE_PWRITEV2],[Defined if pwritev2 support is present when compiling libcfathread.])
     179AH_TEMPLATE([CFA_HAVE_STATX],[Defined if statx support is present when compiling libcfathread.])
     180AH_TEMPLATE([CFA_HAVE_OPENAT2],[Defined if openat2 support is present when compiling libcfathread.])
     181AH_TEMPLATE([__CFA_NO_STATISTICS__],[Defined if libcfathread was compiled without support for statistics.])
     182
     183define(ioring_ops, [IORING_OP_NOP,IORING_OP_READV,IORING_OP_WRITEV,IORING_OP_FSYNC,IORING_OP_READ_FIXED,IORING_OP_WRITE_FIXED,IORING_OP_POLL_ADD,IORING_OP_POLL_REMOVE,IORING_OP_SYNC_FILE_RANGE,IORING_OP_SENDMSG,IORING_OP_RECVMSG,IORING_OP_TIMEOUT,IORING_OP_TIMEOUT_REMOVE,IORING_OP_ACCEPT,IORING_OP_ASYNC_CANCEL,IORING_OP_LINK_TIMEOUT,IORING_OP_CONNECT,IORING_OP_FALLOCATE,IORING_OP_OPENAT,IORING_OP_CLOSE,IORING_OP_FILES_UPDATE,IORING_OP_STATX,IORING_OP_READ,IORING_OP_WRITE,IORING_OP_FADVISE,IORING_OP_MADVISE,IORING_OP_SEND,IORING_OP_RECV,IORING_OP_OPENAT2,IORING_OP_EPOLL_CTL,IORING_OP_SPLICE,IORING_OP_PROVIDE_BUFFERS,IORING_OP_REMOVE_BUFFER,IORING_OP_TEE])
     184define(ioring_flags, [IOSQE_FIXED_FILE,IOSQE_IO_DRAIN,IOSQE_ASYNC,IOSQE_IO_LINK,IOSQE_IO_HARDLINK,SPLICE_F_FD_IN_FIXED,IORING_SETUP_ATTACH_WQ])
     185
     186define(ioring_from_decls, [
     187        m4_foreach([op], [ioring_ops], [
     188                AC_CHECK_DECL(op, [AC_DEFINE([CFA_HAVE_]op)], [], [[#include <linux/io_uring.h>]])
     189        ])
     190])
     191
     192AC_CHECK_HEADERS([linux/io_uring.h], [
     193        AC_DEFINE(CFA_HAVE_LINUX_IO_URING_H)
     194        AC_CHECK_HEADER([liburing.h], [
     195                AC_CHECK_LIB([uring], [io_uring_get_probe], [
     196                        m4_foreach([op], [ioring_ops], [
     197                                AC_CHECK_DECL(op, [
     198                                        AC_RUN_IFELSE([
     199                                                AC_LANG_PROGRAM(
     200                                                        [[#include <liburing.h>]],
     201                                                        [[int main() {]]
     202                                                        [[      struct io_uring_probe *probe = io_uring_get_probe();]]
     203                                                        [[      if(io_uring_opcode_supported(probe, ]]op[[))]]
     204                                                        [[              return 0;]]
     205                                                        [[      else]]
     206                                                        [[              return 1;]]
     207                                                        [[}]]
     208                                                )
     209                                        ],[
     210                                                AC_DEFINE([CFA_HAVE_]op)
     211                                        ],[
     212                                                AC_MSG_FAILURE([Check support for] op [ with liburing failed])
     213                                        ])
     214                                ], [], [[#include <linux/io_uring.h>]])
     215                        ])
     216                ], [
     217                        ioring_from_decls
     218                ])
     219        ], [
     220                ioring_from_decls
     221        ])
     222
     223        # check support for various io_uring flags
     224        m4_foreach([op], [ioring_flags], [
     225                AC_CHECK_DECL(op, [AC_DEFINE([CFA_HAVE_]op)], [], [[#include <linux/io_uring.h>]])
     226        ])
     227])
     228AC_CHECK_FUNC([preadv2], [AC_DEFINE([CFA_HAVE_PREADV2])])
     229AC_CHECK_FUNC([pwritev2], [AC_DEFINE([CFA_HAVE_PWRITEV2])])
     230
    120231AC_CONFIG_FILES([
    121232        Makefile
     
    123234        prelude/Makefile
    124235        ])
     236AC_CONFIG_FILES([src/concurrency/io/call.cfa], [python3 ${srcdir}/src/concurrency/io/call.cfa.in > src/concurrency/io/call.cfa])
     237
     238AC_CONFIG_HEADERS(prelude/defines.hfa)
    125239
    126240AC_OUTPUT()
  • libcfa/prelude/Makefile.am

    r3c64c668 r58fe85a  
    2121# put into lib for now
    2222cfalibdir = ${CFA_LIBDIR}
    23 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c
     23cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa
     24
     25EXTRA_DIST = bootloader.cf builtins.c builtins.def extras.c extras.regx extras.regx2 prelude-gen.cc prototypes.awk prototypes.c prototypes.sed sync-builtins.cf
    2426
    2527CC = @LOCAL_CFACC@
     
    6870
    6971MOSTLYCLEANFILES = bootloader.c builtins.cf extras.cf gcc-builtins.c gcc-builtins.cf prelude.cfa
     72DISTCLEANFILES = $(DEPDIR)/builtins.Po
    7073MAINTAINERCLEANFILES = ${addprefix ${libdir}/,${cfalib_DATA}} ${addprefix ${libdir}/,${lib_LIBRARIES}}
    7174
    7275if ENABLE_DISTCC
    7376distribution: @LOCAL_CFACC@ @LOCAL_CC1@ @CFACPP@ gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c $(srcdir)/../../tools/build/push2dist.sh
    74         ${AM_V_GEN}$(srcdir)/../../tools/build/push2dist.sh @CFADIR_HASH@
     77        ${AM_V_GEN}$(srcdir)/../../tools/build/push2dist.sh @CFADIR_HASH@ @DIST_BWLIMIT@
    7578        @echo "Dummy file to track distribution to remote hosts" > ${@}
    7679
  • libcfa/prelude/bootloader.cf

    r3c64c668 r58fe85a  
    11extern "C" { static inline int invoke_main(int argc, char* argv[], char* envp[]); }
     2int cfa_args_argc;
     3char ** cfa_args_argv;
     4char ** cfa_args_envp;
    25
    36int main(int argc, char* argv[], char* envp[]) {
     7        cfa_args_argc = argc;
     8        cfa_args_argv = argv;
     9        cfa_args_envp = envp;
    410        return invoke_main(argc, argv, envp);
    511}
  • libcfa/prelude/builtins.c

    r3c64c668 r58fe85a  
    99// Author           : Peter A. Buhr
    1010// Created On       : Fri Jul 21 16:21:03 2017
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Nov 21 16:31:39 2019
    13 // Update Count     : 101
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Tue Oct 27 14:42:00 2020
     13// Update Count     : 111
    1414//
     15
     16#define __cforall_builtins__
    1517
    1618// type that wraps a pointer and a destructor-like function - used in generating implicit destructor calls for struct members in user-defined functions
     
    4951void abort( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
    5052
     53forall(dtype T)
     54static inline T & identity(T & i) {
     55        return i;
     56}
     57
     58// generator support
     59struct $generator {
     60        inline int;
     61};
     62
     63static inline void  ?{}($generator & this) { ((int&)this) = 0; }
     64static inline void ^?{}($generator &) {}
     65
     66trait is_generator(dtype T) {
     67      void main(T & this);
     68      $generator * get_generator(T & this);
     69};
     70
     71forall(dtype T | is_generator(T))
     72static inline T & resume(T & gen) {
     73        main(gen);
     74        return gen;
     75}
     76
    5177// implicit increment, decrement if += defined, and implicit not if != defined
    5278
     
    7096// universal typed pointer constant
    7197static inline forall( dtype DT ) DT * intptr( uintptr_t addr ) { return (DT *)addr; }
     98static inline forall( ftype FT ) FT * intptr( uintptr_t addr ) { return (FT *)addr; }
     99
     100#if defined(__SIZEOF_INT128__)
     101// constructor for 128-bit numbers (all constants are unsigned as +/- are operators)
     102static inline void ?{}( unsigned int128 & this, unsigned long int h, unsigned long int l ) {
     103        this = (unsigned int128)h << 64 | (unsigned int128)l;
     104} // ?{}
     105#endif // __SIZEOF_INT128__
    72106
    73107// exponentiation operator implementation
  • libcfa/src/Makefile.am

    r3c64c668 r58fe85a  
    1111## Created On       : Sun May 31 08:54:01 2015
    1212## Last Modified By : Peter A. Buhr
    13 ## Last Modified On : Mon Jul 15 22:43:27 2019
    14 ## Update Count     : 241
     13## Last Modified On : Wed Dec  9 22:46:14 2020
     14## Update Count     : 250
    1515###############################################################################
    1616
     
    1919ACLOCAL_AMFLAGS  = -I automake
    2020
    21 include $(srcdir)/../../src/cfa.make
     21include $(top_srcdir)/../tools/build/cfa.make
    2222
    2323libdir = ${CFA_LIBDIR}
     
    3131# AM_CFAFLAGS for only cfa source
    3232# use -no-include-stdhdr to prevent rebuild cycles
    33 # The built sources must not depend on the installed headers
    34 AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
    35 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
     33# The built sources must not depend on the installed inst_headers_src
     34AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr -I$(srcdir)/concurrency $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
     35AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
    3636AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
    3737CFACC = @CFACC@
     
    3939#----------------------------------------------------------------------------------------------------------------
    4040if BUILDLIB
    41 headers_nosrc = math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa
    42 headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
    43           containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/vector.hfa
    44 
    45 libsrc = startup.cfa interpose.cfa bits/debug.cfa assert.cfa exception.c virtual.c heap.cfa ${headers:.hfa=.cfa}
     41inst_headers_nosrc = \
     42        bitmanip.hfa \
     43        clock.hfa \
     44        exception.hfa \
     45        exception.h \
     46        gmp.hfa \
     47        math.hfa \
     48        time_t.hfa \
     49        bits/align.hfa \
     50        bits/containers.hfa \
     51        bits/debug.hfa \
     52        bits/defs.hfa \
     53        bits/locks.hfa \
     54        bits/collection.hfa \
     55        bits/stack.hfa \
     56        bits/queue.hfa \
     57        bits/sequence.hfa \
     58        concurrency/iofwd.hfa \
     59        containers/list.hfa \
     60        containers/stackLockFree.hfa \
     61        vec/vec.hfa \
     62        vec/vec2.hfa \
     63        vec/vec3.hfa \
     64        vec/vec4.hfa
     65
     66inst_headers_src = \
     67        common.hfa \
     68        fstream.hfa \
     69        heap.hfa \
     70        iostream.hfa \
     71        iterator.hfa \
     72        limits.hfa \
     73        memory.hfa \
     74        parseargs.hfa \
     75        rational.hfa \
     76        stdlib.hfa \
     77        time.hfa \
     78        containers/maybe.hfa \
     79        containers/pair.hfa \
     80        containers/result.hfa \
     81        containers/vector.hfa
     82
     83libsrc = ${inst_headers_src} ${inst_headers_src:.hfa=.cfa} \
     84        assert.cfa \
     85        bits/algorithm.hfa \
     86        bits/debug.cfa \
     87        exception.c \
     88        interpose.cfa \
     89        lsda.h \
     90        startup.cfa \
     91        startup.hfa \
     92        virtual.c \
     93        virtual.h
    4694
    4795# not all platforms support concurrency, add option do disable it
    48 thread_headers_nosrc = concurrency/invoke.h
    49 thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa ${thread_headers:.hfa=.cfa}
     96inst_thread_headers_nosrc = \
     97        bits/random.hfa \
     98        concurrency/clib/cfathread.h \
     99        concurrency/invoke.h \
     100        concurrency/future.hfa \
     101        concurrency/kernel/fwd.hfa
     102
     103inst_thread_headers_src = \
     104        concurrency/coroutine.hfa \
     105        concurrency/exception.hfa \
     106        concurrency/kernel.hfa \
     107        concurrency/locks.hfa \
     108        concurrency/monitor.hfa \
     109        concurrency/mutex.hfa \
     110        concurrency/thread.hfa
     111
     112thread_libsrc = ${inst_thread_headers_src} ${inst_thread_headers_src:.hfa=.cfa} \
     113        bits/signal.hfa \
     114        concurrency/alarm.cfa \
     115        concurrency/alarm.hfa \
     116        concurrency/clib/cfathread.cfa \
     117        concurrency/CtxSwitch-@ARCHITECTURE@.S \
     118        concurrency/invoke.c \
     119        concurrency/io.cfa \
     120        concurrency/io/setup.cfa \
     121        concurrency/io/types.hfa \
     122        concurrency/io/call.cfa \
     123        concurrency/iofwd.hfa \
     124        concurrency/kernel_private.hfa \
     125        concurrency/kernel/startup.cfa \
     126        concurrency/preemption.cfa \
     127        concurrency/preemption.hfa \
     128        concurrency/ready_queue.cfa \
     129        concurrency/ready_subqueue.hfa \
     130        concurrency/snzi.hfa \
     131        concurrency/stats.cfa \
     132        concurrency/stats.hfa \
     133        concurrency/stats.hfa
     134
    51135else
    52 headers =
    53 thread_headers =
    54 headers_nosrc =
    55 thread_headers_nosrc =
     136inst_headers_src =
     137inst_thread_headers_src =
     138inst_headers_nosrc =
     139inst_thread_headers_nosrc =
    56140libsrc =
    57141endif
     
    96180
    97181prelude.o : prelude.cfa extras.cf gcc-builtins.cf builtins.cf @LOCAL_CFACC@ @CFACPP@
    98         ${AM_V_GEN}$(CFACOMPILE) -quiet -XCFA -l ${<} -c -o ${@}
     182        ${AM_V_GEN}$(CFACOMPILE) -quiet -XCFA,-l ${<} -c -o ${@}
    99183
    100184prelude.lo: prelude.cfa extras.cf gcc-builtins.cf builtins.cf @LOCAL_CFACC@ @CFACPP@
    101185        ${AM_V_GEN}$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile \
    102         $(CFACOMPILE) -quiet -XCFA -l ${<} -c -o ${@}
    103 
    104 #----------------------------------------------------------------------------------------------------------------
    105 libcfa_la_SOURCES = prelude.cfa ${libsrc}
     186        $(CFACOMPILE) -quiet -XCFA,-l ${<} -c -o ${@}
     187
     188#----------------------------------------------------------------------------------------------------------------
     189libcfa_la_SOURCES = ${libsrc}
     190nodist_libcfa_la_SOURCES = prelude.cfa
    106191libcfa_la_LDFLAGS = -version-info @CFA_VERSION@
    107192
     
    112197
    113198cfa_includedir = $(CFA_INCDIR)
    114 nobase_cfa_include_HEADERS = ${stdhdr} ${headers} ${headers_nosrc} ${thread_headers} ${thread_headers_nosrc}
     199nobase_cfa_include_HEADERS = ${stdhdr} ${inst_headers_src} ${inst_headers_nosrc} ${inst_thread_headers_src} ${inst_thread_headers_nosrc}
     200EXTRA_DIST = stdhdr
    115201
    116202#----------------------------------------------------------------------------------------------------------------
    117203maintainer-clean-local:
    118204        -rm -rf ${CFA_INCDIR} ${CFA_LIBDIR}
     205
     206distclean-local:
     207        find ${builddir} -path '*.Plo' -delete
    119208
    120209
  • libcfa/src/bits/containers.hfa

    r3c64c668 r58fe85a  
    1717#include "bits/align.hfa"
    1818#include "bits/defs.hfa"
    19 
     19#include <stdio.h>
    2020//-----------------------------------------------------------------------------
    2121// Array
     
    3636        #define __small_array_t(T) __small_array(T)
    3737#else
    38         #define __small_array_t(T) struct __small_array
     38        #define __small_array_t(T) __small_array
    3939#endif
    4040
     
    146146        static inline forall( dtype T | is_node(T) ) {
    147147                void ?{}( __queue(T) & this ) with( this ) {
    148                         head{ 1p };
    149                         tail{ &head };
    150                         verify(*tail == 1p);
     148                        (this.head){ 1p };
     149                        (this.tail){ &this.head };
     150                        verify(*this.tail == 1p);
    151151                }
    152152
    153153                void append( __queue(T) & this, T * val ) with( this ) {
    154                         verify(tail != 0p);
    155                         verify(*tail == 1p);
    156                         *tail = val;
    157                         tail = &get_next( *val );
    158                         *tail = 1p;
     154                        verify(this.tail != 0p);
     155                        verify(*this.tail == 1p);
     156                        *this.tail = val;
     157                        this.tail = &get_next( *val );
     158                        *this.tail = 1p;
     159                }
     160
     161                T * peek( __queue(T) & this ) {
     162                        verify(*this.tail == 1p);
     163                        T * front = this.head;
     164                        if( front != 1p ) {
     165                                verify(*this.tail == 1p);
     166                                return front;
     167                        }
     168                        verify(*this.tail == 1p);
     169                        return 0p;
    159170                }
    160171
    161172                T * pop_head( __queue(T) & this ) {
    162173                        verify(*this.tail == 1p);
    163                         T * head = this.head;
    164                         if( head != 1p ) {
    165                                 this.head = get_next( *head );
    166                                 if( get_next( *head ) == 1p ) {
     174                        T * _head = this.head;
     175                        if( _head != 1p ) {
     176                                this.head = get_next( *_head );
     177                                if( get_next( *_head ) == 1p ) {
    167178                                        this.tail = &this.head;
    168179                                }
    169                                 get_next( *head ) = 0p;
     180                                get_next( *_head ) = 0p;
    170181                                verify(*this.tail == 1p);
    171                                 return head;
     182                                verify( get_next(*_head) == 0p );
     183                                return _head;
    172184                        }
    173185                        verify(*this.tail == 1p);
     
    181193                        (*it) = get_next( *val );
    182194
    183                         if( tail == &get_next( *val ) ) {
    184                                 tail = it;
     195                        if( this.tail == &get_next( *val ) ) {
     196                                this.tail = it;
    185197                        }
    186198
    187199                        get_next( *val ) = 0p;
    188200
    189                         verify( (head == 1p) == (&head == tail) );
    190                         verify( *tail == 1p );
     201                        verify( (this.head == 1p) == (&this.head == this.tail) );
     202                        verify( *this.tail == 1p );
    191203                        return val;
    192204                }
    193205
    194206                int ?!=?( const __queue(T) & this, __attribute__((unused)) zero_t zero ) {
    195                         return this.head != 0;
     207                        return this.head != 1p;
    196208                }
    197209        }
     
    227239        forall(dtype T )
    228240        static inline [void] ?{}( __dllist(T) & this, * [T * & next, T * & prev] ( T & ) __get ) {
    229                 this.head{ 0p };
     241                (this.head){ 0p };
    230242                this.__get = __get;
    231243        }
     
    236248                void push_front( __dllist(T) & this, T & node ) with( this ) {
    237249                        verify(__get);
    238                         if ( head ) {
    239                                 __get( node ).next = head;
    240                                 __get( node ).prev = __get( *head ).prev;
     250                        if ( this.head ) {
     251                                __get( node ).next = this.head;
     252                                __get( node ).prev = __get( *this.head ).prev;
    241253                                // inserted node must be consistent before it is seen
    242254                                // prevent code movement across barrier
    243255                                asm( "" : : : "memory" );
    244                                 __get( *head ).prev = &node;
     256                                __get( *this.head ).prev = &node;
    245257                                T & _prev = *__get( node ).prev;
    246258                                __get( _prev ).next = &node;
     
    252264                        // prevent code movement across barrier
    253265                        asm( "" : : : "memory" );
    254                         head = &node;
     266                        this.head = &node;
    255267                }
    256268
    257269                void remove( __dllist(T) & this, T & node ) with( this ) {
    258270                        verify(__get);
    259                         if ( &node == head ) {
    260                                 if ( __get( *head ).next == head ) {
    261                                         head = 0p;
     271                        if ( &node == this.head ) {
     272                                if ( __get( *this.head ).next == this.head ) {
     273                                        this.head = 0p;
    262274                                } else {
    263                                         head = __get( *head ).next;
     275                                        this.head = __get( *this.head ).next;
    264276                                }
    265277                        }
     
    273285                        return this.head != 0;
    274286                }
     287
     288                void move_to_front( __dllist(T) & src, __dllist(T) & dst, T & node ) {
     289                        remove    (src, node);
     290                        push_front(dst, node);
     291                }
    275292        }
    276293        #undef next
  • libcfa/src/bits/debug.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Thu Mar 30 12:30:01 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 13:03:16 2020
    13 // Update Count     : 11
     12// Last Modified On : Wed Jun 17 11:07:13 2020
     13// Update Count     : 12
    1414//
    1515
    16 extern "C" {
    1716#include <stdio.h>
    1817#include <stdlib.h>
     
    2120#include <stdarg.h>
    2221#include <unistd.h>
    23 }
    2422
    2523enum { buffer_size = 4096 };
  • libcfa/src/bits/debug.hfa

    r3c64c668 r58fe85a  
    99// Author           : Thierry Delisle
    1010// Created On       : Mon Nov 28 12:27:26 2016
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 12:29:21 2020
    13 // Update Count     : 9
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Mon Apr 27 10:15:00 2020
     13// Update Count     : 10
    1414//
    1515
    1616#pragma once
     17
     18#include <assert.h>
    1719
    1820#ifdef __CFA_DEBUG__
     
    2325        #define __cfaabi_dbg_ctx_param const char caller[]
    2426        #define __cfaabi_dbg_ctx_param2 , const char caller[]
     27        #define __cfaabi_dbg_ctx_fwd caller
     28        #define __cfaabi_dbg_ctx_fwd2 , caller
    2529#else
    2630        #define __cfaabi_dbg_debug_do(...)
     
    3034        #define __cfaabi_dbg_ctx_param
    3135        #define __cfaabi_dbg_ctx_param2
     36        #define __cfaabi_dbg_ctx_fwd
     37        #define __cfaabi_dbg_ctx_fwd2
    3238#endif
    3339
     
    3642#endif
    3743        #include <stdarg.h>
    38         #include <stdio.h>
    3944
    4045        extern void __cfaabi_bits_write( int fd, const char buffer[], int len );
     
    4550        extern void __cfaabi_bits_print_vararg( int fd, const char fmt[], va_list arg );
    4651        extern void __cfaabi_bits_print_buffer( int fd, char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 4, 5) ));
     52
     53#if defined(__CFA_DEBUG_PRINT__) \
     54                || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__) \
     55                || defined(__CFA_DEBUG_PRINT_MONITOR__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__) \
     56                || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__) \
     57                || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
     58        #include <stdio.h>
     59        #include <unistd.h>
     60#endif
    4761#ifdef __cforall
    4862}
    4963#endif
    5064
     65// Deprecated: Use the versions with the new module names.
    5166#ifdef __CFA_DEBUG_PRINT__
    5267        #define __cfaabi_dbg_write( buffer, len )         __cfaabi_bits_write( STDERR_FILENO, buffer, len )
    5368        #define __cfaabi_dbg_acquire()                    __cfaabi_bits_acquire()
    5469        #define __cfaabi_dbg_release()                    __cfaabi_bits_release()
    55         #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe   (__VA_ARGS__)
    56         #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock (__VA_ARGS__)
    57         #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer (__VA_ARGS__)
    58         #define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len );
    59         #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( __dbg_text, __dbg_len );
     70        #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe   ( STDERR_FILENO, __VA_ARGS__ )
     71        #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock ( STDERR_FILENO, __VA_ARGS__ )
     72        #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer ( STDERR_FILENO, __VA_ARGS__ )
     73        #define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
     74        #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( STDERR_FILENO, __dbg_text, __dbg_len );
    6075#else
    6176        #define __cfaabi_dbg_write(...)               ((void)0)
     
    6984#endif
    7085
     86// Debug print functions and statements:
     87// Most are wrappers around the bits printing function but are not always used.
     88// If they are used depends if the group (first argument) is active or not. The group must be one
     89// defined belowe. The other arguments depend on the wrapped function.
     90#define __cfadbg_write(group, buffer, len) \
     91        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_write(STDERR_FILENO, buffer, len))
     92#define __cfadbg_acquire(group) \
     93        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_acquire())
     94#define __cfadbg_release(group) \
     95        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_release())
     96#define __cfadbg_print_safe(group, ...) \
     97        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_safe(STDERR_FILENO, __VA_ARGS__))
     98#define __cfadbg_print_nolock(group, ...) \
     99        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_nolock(STDERR_FILENO, __VA_ARGS__))
     100#define __cfadbg_print_buffer(group, ...) \
     101        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_buffer(STDERR_FILENO, __VA_ARGS__))
     102#define __cfadbg_print_buffer_decl(group, ...) \
     103        __CFADBG_PRINT_GROUP_##group(char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len ))
     104#define __cfadbg_print_buffer_local(group, ...) \
     105        __CFADBG_PRINT_GROUP_##group(__dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write(STDERR_FILENO, __dbg_text, __dbg_len))
     106
     107// The debug print groups:
     108#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__)
     109#       define __CFADBG_PRINT_GROUP_io(...) __VA_ARGS__
     110#else
     111#       define __CFADBG_PRINT_GROUP_io(...) ((void)0)
     112#endif
     113#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__)
     114#       define __CFADBG_PRINT_GROUP_io_core(...) __VA_ARGS__
     115#else
     116#       define __CFADBG_PRINT_GROUP_io_core(...) ((void)0)
     117#endif
     118#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_MONITOR__)
     119#       define __CFADBG_PRINT_GROUP_monitor(...) __VA_ARGS__
     120#else
     121#       define __CFADBG_PRINT_GROUP_monitor(...) ((void)0)
     122#endif
     123#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__)
     124#       define __CFADBG_PRINT_GROUP_preemption(...) __VA_ARGS__
     125#else
     126#       define __CFADBG_PRINT_GROUP_preemption(...) ((void)0)
     127#endif
     128#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__)
     129#       define __CFADBG_PRINT_GROUP_runtime_core(...) __VA_ARGS__
     130#else
     131#       define __CFADBG_PRINT_GROUP_runtime_core(...) ((void)0)
     132#endif
     133#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
     134#       define __CFADBG_PRINT_GROUP_ready_queue(...) __VA_ARGS__
     135#else
     136#       define __CFADBG_PRINT_GROUP_ready_queue(...) ((void)0)
     137#endif
     138#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__)
     139#       define __CFADBG_PRINT_GROUP_exception(...) __VA_ARGS__
     140#else
     141#       define __CFADBG_PRINT_GROUP_exception(...) ((void)0)
     142#endif
     143
    71144// Local Variables: //
    72145// mode: c //
  • libcfa/src/bits/defs.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Thu Nov  9 13:24:10 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Jan 28 22:38:27 2020
    13 // Update Count     : 9
     12// Last Modified On : Sat Oct 24 10:53:15 2020
     13// Update Count     : 21
    1414//
    1515
    1616#pragma once
    1717
    18 #include <stdbool.h>
    19 #include <stddef.h>
    2018#include <stdint.h>
     19#include <assert.h>
    2120
    2221#define likely(x)   __builtin_expect(!!(x), 1)
     
    3029#define __cfa_anonymous_object(x) inline struct x
    3130#else
    32 #define __cfa_anonymous_object(x) x __cfa_anonymous_object
     31#define __cfa_anonymous_object(x) struct x __cfa_anonymous_object
    3332#endif
    3433
     
    4948#endif
    5049
    51 static inline long long rdtscl(void) {
    52     unsigned int lo, hi;
    53     __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
    54     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
     50static inline long long int rdtscl(void) {
     51        #if defined( __i386 ) || defined( __x86_64 )
     52        unsigned int lo, hi;
     53        __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
     54        return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
     55        #elif defined( __aarch64__ ) || defined( __arm__ )
     56        // https://github.com/google/benchmark/blob/v1.1.0/src/cycleclock.h#L116
     57        long long int virtual_timer_value;
     58        asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
     59        return virtual_timer_value;
     60        #else
     61                #error unsupported hardware architecture
     62        #endif
    5563}
  • libcfa/src/bits/locks.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Tue Oct 31 15:14:38 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 13:03:19 2020
    13 // Update Count     : 11
     12// Last Modified On : Wed Aug 12 14:18:07 2020
     13// Update Count     : 13
    1414//
    1515
     
    2727
    2828// pause to prevent excess processor bus usage
    29 #if defined( __sparc )
    30         #define Pause() __asm__ __volatile__ ( "rd %ccr,%g0" )
    31 #elif defined( __i386 ) || defined( __x86_64 )
     29#if defined( __i386 ) || defined( __x86_64 )
    3230        #define Pause() __asm__ __volatile__ ( "pause" : : : )
    3331#elif defined( __ARM_ARCH )
    34         #define Pause() __asm__ __volatile__ ( "nop" : : : )
     32        #define Pause() __asm__ __volatile__ ( "YIELD" : : : )
    3533#else
    3634        #error unsupported architecture
     
    5452
    5553                #ifdef __CFA_DEBUG__
    56                         void __cfaabi_dbg_record(__spinlock_t & this, const char prev_name[]);
     54                        void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]);
    5755                #else
    58                         #define __cfaabi_dbg_record(x, y)
     56                        #define __cfaabi_dbg_record_lock(x, y)
    5957                #endif
    6058        }
     
    6967                bool result = (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0);
    7068                if( result ) {
    71                         __cfaabi_dbg_record( this, caller );
     69                        __cfaabi_dbg_record_lock( this, caller );
    7270                } else {
    7371                        enable_interrupts_noPoll();
     
    9997                        #endif
    10098                }
    101                 __cfaabi_dbg_record( this, caller );
     99                __cfaabi_dbg_record_lock( this, caller );
    102100        }
    103101
     
    112110        #endif
    113111
     112        extern "C" {
     113                char * strerror(int);
     114        }
     115        #define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }
     116
    114117        struct __bin_sem_t {
    115                 bool                    signaled;
    116118                pthread_mutex_t         lock;
    117119                pthread_cond_t          cond;
     120                int                     val;
    118121        };
    119122
    120123        static inline void ?{}(__bin_sem_t & this) with( this ) {
    121                 signaled = false;
    122                 pthread_mutex_init(&lock, NULL);
    123                 pthread_cond_init (&cond, NULL);
     124                // Create the mutex with error checking
     125                pthread_mutexattr_t mattr;
     126                pthread_mutexattr_init( &mattr );
     127                pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP);
     128                pthread_mutex_init(&lock, &mattr);
     129
     130                pthread_cond_init (&cond, (const pthread_condattr_t *)0p);  // workaround trac#208: cast should not be required
     131                val = 0;
    124132        }
    125133
    126134        static inline void ^?{}(__bin_sem_t & this) with( this ) {
    127                 pthread_mutex_destroy(&lock);
    128                 pthread_cond_destroy (&cond);
     135                CHECKED( pthread_mutex_destroy(&lock) );
     136                CHECKED( pthread_cond_destroy (&cond) );
    129137        }
    130138
    131139        static inline void wait(__bin_sem_t & this) with( this ) {
    132140                verify(__cfaabi_dbg_in_kernel());
    133                 pthread_mutex_lock(&lock);
    134                         if(!signaled) {   // this must be a loop, not if!
     141                CHECKED( pthread_mutex_lock(&lock) );
     142                        while(val < 1) {
    135143                                pthread_cond_wait(&cond, &lock);
    136144                        }
    137                         signaled = false;
    138                 pthread_mutex_unlock(&lock);
    139         }
    140 
    141         static inline void post(__bin_sem_t & this) with( this ) {
    142                 verify(__cfaabi_dbg_in_kernel());
    143 
    144                 pthread_mutex_lock(&lock);
    145                         bool needs_signal = !signaled;
    146                         signaled = true;
    147                 pthread_mutex_unlock(&lock);
    148 
    149                 if (needs_signal)
    150                         pthread_cond_signal(&cond);
     145                        val -= 1;
     146                CHECKED( pthread_mutex_unlock(&lock) );
     147        }
     148
     149        static inline bool post(__bin_sem_t & this) with( this ) {
     150                bool needs_signal = false;
     151
     152                CHECKED( pthread_mutex_lock(&lock) );
     153                        if(val < 1) {
     154                                val += 1;
     155                                pthread_cond_signal(&cond);
     156                                needs_signal = true;
     157                        }
     158                CHECKED( pthread_mutex_unlock(&lock) );
     159
     160                return needs_signal;
     161        }
     162
     163        #undef CHECKED
     164
     165        struct $thread;
     166        extern void park( void );
     167        extern void unpark( struct $thread * this );
     168        static inline struct $thread * active_thread ();
     169
     170        // Semaphore which only supports a single thread
     171        struct single_sem {
     172                struct $thread * volatile ptr;
     173        };
     174
     175        static inline {
     176                void  ?{}(single_sem & this) {
     177                        this.ptr = 0p;
     178                }
     179
     180                void ^?{}(single_sem &) {}
     181
     182                bool wait(single_sem & this) {
     183                        for() {
     184                                struct $thread * expected = this.ptr;
     185                                if(expected == 1p) {
     186                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     187                                                return false;
     188                                        }
     189                                }
     190                                else {
     191                                        /* paranoid */ verify( expected == 0p );
     192                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     193                                                park();
     194                                                return true;
     195                                        }
     196                                }
     197
     198                        }
     199                }
     200
     201                bool post(single_sem & this) {
     202                        for() {
     203                                struct $thread * expected = this.ptr;
     204                                if(expected == 1p) return false;
     205                                if(expected == 0p) {
     206                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 1p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     207                                                return false;
     208                                        }
     209                                }
     210                                else {
     211                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     212                                                unpark( expected );
     213                                                return true;
     214                                        }
     215                                }
     216                        }
     217                }
     218        }
     219
     220        // Synchronozation primitive which only supports a single thread and one post
     221        // Similar to a binary semaphore with a 'one shot' semantic
     222        // is expected to be discarded after each party call their side
     223        struct oneshot {
     224                // Internal state :
     225                //     0p     : is initial state (wait will block)
     226                //     1p     : fulfilled (wait won't block)
     227                // any thread : a thread is currently waiting
     228                struct $thread * volatile ptr;
     229        };
     230
     231        static inline {
     232                void  ?{}(oneshot & this) {
     233                        this.ptr = 0p;
     234                }
     235
     236                void ^?{}(oneshot &) {}
     237
     238                // Wait for the post, return immidiately if it already happened.
     239                // return true if the thread was parked
     240                bool wait(oneshot & this) {
     241                        for() {
     242                                struct $thread * expected = this.ptr;
     243                                if(expected == 1p) return false;
     244                                /* paranoid */ verify( expected == 0p );
     245                                if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     246                                        park();
     247                                        /* paranoid */ verify( this.ptr == 1p );
     248                                        return true;
     249                                }
     250                        }
     251                }
     252
     253                // Mark as fulfilled, wake thread if needed
     254                // return true if a thread was unparked
     255                bool post(oneshot & this) {
     256                        struct $thread * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
     257                        if( got == 0p ) return false;
     258                        unpark( got );
     259                        return true;
     260                }
     261        }
     262
     263        // base types for future to build upon
     264        // It is based on the 'oneshot' type to allow multiple futures
     265        // to block on the same instance, permitting users to block a single
     266        // thread on "any of" [a given set of] futures.
     267        // does not support multiple threads waiting on the same future
     268        struct future_t {
     269                // Internal state :
     270                //     0p      : is initial state (wait will block)
     271                //     1p      : fulfilled (wait won't block)
     272                //     2p      : in progress ()
     273                //     3p      : abandoned, server should delete
     274                // any oneshot : a context has been setup to wait, a thread could wait on it
     275                struct oneshot * volatile ptr;
     276        };
     277
     278        static inline {
     279                void  ?{}(future_t & this) {
     280                        this.ptr = 0p;
     281                }
     282
     283                void ^?{}(future_t &) {}
     284
     285                void reset(future_t & this) {
     286                        // needs to be in 0p or 1p
     287                        __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST);
     288                }
     289
     290                // check if the future is available
     291                bool available( future_t & this ) {
     292                        return this.ptr == 1p;
     293                }
     294
     295                // Prepare the future to be waited on
     296                // intented to be use by wait, wait_any, waitfor, etc. rather than used directly
     297                bool setup( future_t & this, oneshot & wait_ctx ) {
     298                        /* paranoid */ verify( wait_ctx.ptr == 0p );
     299                        // The future needs to set the wait context
     300                        for() {
     301                                struct oneshot * expected = this.ptr;
     302                                // Is the future already fulfilled?
     303                                if(expected == 1p) return false; // Yes, just return false (didn't block)
     304
     305                                // The future is not fulfilled, try to setup the wait context
     306                                /* paranoid */ verify( expected == 0p );
     307                                if(__atomic_compare_exchange_n(&this.ptr, &expected, &wait_ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     308                                        return true;
     309                                }
     310                        }
     311                }
     312
     313                // Stop waiting on a future
     314                // When multiple futures are waited for together in "any of" pattern
     315                // futures that weren't fulfilled before the thread woke up
     316                // should retract the wait ctx
     317                // intented to be use by wait, wait_any, waitfor, etc. rather than used directly
     318                void retract( future_t & this, oneshot & wait_ctx ) {
     319                        // Remove the wait context
     320                        struct oneshot * got = __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST);
     321
     322                        // got == 0p: future was never actually setup, just return
     323                        if( got == 0p ) return;
     324
     325                        // got == wait_ctx: since fulfil does an atomic_swap,
     326                        // if we got back the original then no one else saw context
     327                        // It is safe to delete (which could happen after the return)
     328                        if( got == &wait_ctx ) return;
     329
     330                        // got == 1p: the future is ready and the context was fully consumed
     331                        // the server won't use the pointer again
     332                        // It is safe to delete (which could happen after the return)
     333                        if( got == 1p ) return;
     334
     335                        // got == 2p: the future is ready but the context hasn't fully been consumed
     336                        // spin until it is safe to move on
     337                        if( got == 2p ) {
     338                                while( this.ptr != 1p ) Pause();
     339                                return;
     340                        }
     341
     342                        // got == any thing else, something wen't wrong here, abort
     343                        abort("Future in unexpected state");
     344                }
     345
     346                // Mark the future as abandoned, meaning it will be deleted by the server
     347                bool abandon( future_t & this ) {
     348                        /* paranoid */ verify( this.ptr != 3p );
     349
     350                        // Mark the future as abandonned
     351                        struct oneshot * got = __atomic_exchange_n( &this.ptr, 3p, __ATOMIC_SEQ_CST);
     352
     353                        // If the future isn't already fulfilled, let the server delete it
     354                        if( got == 0p ) return false;
     355
     356                        // got == 2p: the future is ready but the context hasn't fully been consumed
     357                        // spin until it is safe to move on
     358                        if( got == 2p ) {
     359                                while( this.ptr != 1p ) Pause();
     360                                got = 1p;
     361                        }
     362
     363                        // The future is completed delete it now
     364                        /* paranoid */ verify( this.ptr != 1p );
     365                        free( &this );
     366                        return true;
     367                }
     368
     369                // from the server side, mark the future as fulfilled
     370                // delete it if needed
     371                bool fulfil( future_t & this ) {
     372                        for() {
     373                                struct oneshot * expected = this.ptr;
     374                                // was this abandoned?
     375                                #if defined(__GNUC__) && __GNUC__ >= 7
     376                                        #pragma GCC diagnostic push
     377                                        #pragma GCC diagnostic ignored "-Wfree-nonheap-object"
     378                                #endif
     379                                        if( expected == 3p ) { free( &this ); return false; }
     380                                #if defined(__GNUC__) && __GNUC__ >= 7
     381                                        #pragma GCC diagnostic pop
     382                                #endif
     383
     384                                /* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen
     385                                /* paranoid */ verify( expected != 2p ); // Future is bein fulfilled by someone else, this is even less supported then the previous case.
     386
     387                                // If there is a wait context, we need to consume it and mark it as consumed after
     388                                // If there is no context then we can skip the in progress phase
     389                                struct oneshot * want = expected == 0p ? 1p : 2p;
     390                                if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
     391                                        if( expected == 0p ) { /* paranoid */ verify( this.ptr == 1p); return false; }
     392                                        bool ret = post( *expected );
     393                                        __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
     394                                        return ret;
     395                                }
     396                        }
     397
     398                }
     399
     400                // Wait for the future to be fulfilled
     401                bool wait( future_t & this ) {
     402                        oneshot temp;
     403                        if( !setup(this, temp) ) return false;
     404
     405                        // Wait context is setup, just wait on it
     406                        bool ret = wait( temp );
     407
     408                        // Wait for the future to tru
     409                        while( this.ptr == 2p ) Pause();
     410                        // Make sure the state makes sense
     411                        // Should be fulfilled, could be in progress but it's out of date if so
     412                        // since if that is the case, the oneshot was fulfilled (unparking this thread)
     413                        // and the oneshot should not be needed any more
     414                        __attribute__((unused)) struct oneshot * was = this.ptr;
     415                        /* paranoid */ verifyf( was == 1p, "Expected this.ptr to be 1p, was %p\n", was );
     416
     417                        // Mark the future as fulfilled, to be consistent
     418                        // with potential calls to avail
     419                        // this.ptr = 1p;
     420                        return ret;
     421                }
    151422        }
    152423#endif
  • libcfa/src/bits/signal.hfa

    r3c64c668 r58fe85a  
    1919#include "bits/defs.hfa"
    2020
    21 extern "C" {
    2221#include <errno.h>
    2322#define __USE_GNU
     
    2625#include <stdlib.h>
    2726#include <string.h>
    28 }
    2927
    3028// Short hands for signal context information
     
    5452                        sig, handler, flags, errno, strerror( errno )
    5553                );
    56                 _exit( EXIT_FAILURE );
     54                _Exit( EXIT_FAILURE );
    5755        } // if
    5856}
  • libcfa/src/common.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Wed Jul 11 17:54:36 2018
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Jul 12 08:02:18 2018
    13 // Update Count     : 5
     12// Last Modified On : Sat Aug 15 08:51:29 2020
     13// Update Count     : 14
    1414//
    1515
     
    6767
    6868static inline {
     69        char min( char t1, char t2 ) { return t1 < t2 ? t1 : t2; } // optimization
     70        intptr_t min( intptr_t t1, intptr_t t2 ) { return t1 < t2 ? t1 : t2; } // optimization
     71        uintptr_t min( uintptr_t t1, uintptr_t t2 ) { return t1 < t2 ? t1 : t2; } // optimization
    6972        forall( otype T | { int ?<?( T, T ); } )
    7073        T min( T t1, T t2 ) { return t1 < t2 ? t1 : t2; }
    7174
     75        char max( char t1, char t2 ) { return t1 > t2 ? t1 : t2; } // optimization
     76        intptr_t max( intptr_t t1, intptr_t t2 ) { return t1 > t2 ? t1 : t2; } // optimization
     77        uintptr_t max( uintptr_t t1, uintptr_t t2 ) { return t1 > t2 ? t1 : t2; } // optimization
    7278        forall( otype T | { int ?>?( T, T ); } )
    7379        T max( T t1, T t2 ) { return t1 > t2 ? t1 : t2; }
  • libcfa/src/concurrency/CtxSwitch-arm32.S

    r3c64c668 r58fe85a  
    1         @ 32 bit ARM context switch
    2         @ This function assumes that r9 has no special meaning on the platform it's
    3         @ being built on.
    4         @ If r9 is special, uncomment the following line and it will be left alone
     1        # 32 bit ARM context switch
     2        # This function assumes that r9 has no special meaning on the platform it's
     3        # being built on.
     4        # If r9 is special, uncomment the following line and it will be left alone
    55
    6         @ #define R9_SPECIAL
     6        # #define R9_SPECIAL
    77
    88        #define PTR_BYTE        4
     
    1717
    1818__cfactx_switch:
    19         @ save callee-saved registers: r4-r8, r10, r11, r13(sp) (plus r9 depending on platform specification)
    20         @ I've seen reference to 31 registers on 64-bit, if this is the case, more need to be saved
    21         @ save thread state registers: r14(lr)
    22         @ r12(ip) is intra-procedure-call scratch register, does not need saving between function calls
     19        # save callee-saved registers: r4-r8, r10, r11, r13(sp) (plus r9 depending on platform specification)
     20        # I've seen reference to 31 registers on 64-bit, if this is the case, more need to be saved
     21        # save thread state registers: r14(lr)
     22        # r12(ip) is intra-procedure-call scratch register, does not need saving between function calls
    2323
    2424        #ifdef R9_SPECIAL
     
    2828        #endif // R9_SPECIAL
    2929
    30         @ save floating point registers: s16-s31
     30        # save floating point registers: s16-s31
    3131        vstmdb r13!, {s16-s31}
    3232
    33         @ save frame pointer and stack pointer to outgoing datastructure
     33        # save frame pointer and stack pointer to outgoing datastructure
    3434        str sp, [r0, #SP_OFFSET]
    3535        str fp, [r0, #FP_OFFSET]
    3636
    37         @ restore frame pointer and stack pointer from incoming datastructure
     37        # restore frame pointer and stack pointer from incoming datastructure
    3838        ldr fp, [r1, #FP_OFFSET]
    3939        ldr sp, [r1, #SP_OFFSET]
    4040
    41         @ restore floating point registers: s16-s31
     41        # restore floating point registers: s16-s31
    4242        vldm r13!, {s16-s31}
    43         @ restore r14(lr)
    44         @ restore 64-bit extra registers?
    45         @ restore callee-saved registers: r4-r8, r10, r11, r13
     43        # restore r14(lr)
     44        # restore 64-bit extra registers?
     45        # restore callee-saved registers: r4-r8, r10, r11, r13
    4646
    4747        #ifdef R9_SPECIAL
    4848        ldmfd r13!, {r4-r8,r10,r11,r15}
    4949        #else
    50         ldmfd r13!, {r4-r11,r14}    @ loading r14 back into r15 returns
     50        ldmfd r13!, {r4-r11,r14}    # loading r14 back into r15 returns
    5151
    5252        mov r15, r14
  • libcfa/src/concurrency/CtxSwitch-i386.S

    r3c64c668 r58fe85a  
    1010// Created On       : Tue Dec 6 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Jul 21 22:29:25 2017
    13 // Update Count     : 1
    14 //
    15 // This  library is free  software; you  can redistribute  it and/or  modify it
    16 // under the terms of the GNU Lesser General Public License as published by the
    17 // Free Software  Foundation; either  version 2.1 of  the License, or  (at your
    18 // option) any later version.
    19 //
    20 // This library is distributed in the  hope that it will be useful, but WITHOUT
    21 // ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
    22 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
    23 // for more details.
    24 //
    25 // You should  have received a  copy of the  GNU Lesser General  Public License
    26 // along  with this library.
     12// Last Modified On : Sun Sep  6 18:23:37 2020
     13// Update Count     : 5
    2714//
    2815
    29 // This context switch routine depends on the fact that the stack of a new
    30 // thread has been set up to look like the thread has saved its context in
    31 // the normal manner.
    32 //
    33 // void CtxSwitch( machine_context *from, machine_context *to );
     16// The context switch routine requires the initial the stack of a thread to
     17// look like the thread has saved its context in the normal manner.
    3418
    35 // Offsets in the context structure. This needs to be synchronized with the
    36 // high level code a little better.
     19// Offsets must synchronized with the __stack_context_t in invoke.h.
    3720
    3821#define PTR_BYTE        4
    3922#define SP_OFFSET       ( 0 * PTR_BYTE )
    4023#define FP_OFFSET       ( 1 * PTR_BYTE )
    41 #define PC_OFFSET       ( 2 * PTR_BYTE )
    4224
     25// Context switch between coroutines/tasks.
     26//   void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) ;
     27// Arguments "from" in register 4(%esp), "to" in register 20(%esp)
     28
     29        .file "CtxSwitch-i386.S"
    4330        .text
    4431        .align 2
    45         .globl __cfactx_switch
    46         .type  __cfactx_switch, @function
     32        .global __cfactx_switch
     33        .type __cfactx_switch, @function
    4734__cfactx_switch:
    4835
    4936        // Copy the "from" context argument from the stack to register eax
    50         // Return address is at 0(%esp), with parameters following
     37        // Return address is at 0(%esp), with parameters following.
    5138
    5239        movl 4(%esp),%eax
     
    6350        movl %ebp,FP_OFFSET(%eax)
    6451
    65         // Copy the "to" context argument from the stack to register eax
    66         // Having pushed three words (= 12 bytes) on the stack, the
    67         // argument is now at 8 + 12 = 20(%esp)
     52        // Copy the "to" context argument from the stack to register eax. Having
     53        // pushed 3 words (= 12 bytes) on the stack, the argument is now at
     54        // 8 + 12 = 20(%esp).
    6855
    6956        movl 20(%esp),%eax
     
    8370
    8471        ret
    85         .size  __cfactx_switch, .-__cfactx_switch
     72        .size __cfactx_switch, .-__cfactx_switch
    8673
    8774// Local Variables: //
  • libcfa/src/concurrency/CtxSwitch-x86_64.S

    r3c64c668 r58fe85a  
    77// CtxSwitch-x86_64.S --
    88//
    9 // Author           : Thierry Delisle
    10 // Created On       : Mon Nov 28 12:27:26 2016
     9// Author           : Peter A. Buhr
     10// Created On       : Mon Aug 10 08:10:26 2020
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Jul 21 22:28:11 2017
    13 // Update Count     : 1
    14 //
    15 // This  library is free  software; you  can redistribute  it and/or  modify it
    16 // under the terms of the GNU Lesser General Public License as published by the
    17 // Free Software  Foundation; either  version 2.1 of  the License, or  (at your
    18 // option) any later version.
    19 //
    20 // This library is distributed in the  hope that it will be useful, but WITHOUT
    21 // ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
    22 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
    23 // for more details.
    24 //
    25 // You should  have received a  copy of the  GNU Lesser General  Public License
    26 // along  with this library.
     12// Last Modified On : Sat Oct 24 14:36:25 2020
     13// Update Count     : 10
    2714//
    2815
    29 // This context switch routine depends on the fact that the stack of a new
    30 // thread has been set up to look like the thread has saved its context in
    31 // the normal manner.
    32 //
    33 // void CtxSwitch( machine_context *from, machine_context *to );
     16// The context switch routine requires the initial the stack of a thread to
     17// look like the thread has saved its context in the normal manner.
    3418
    35 // Offsets in the context structure. This needs to be synchronized with the
    36 // high level code a little better.
     19// Offsets must synchronized with the __stack_context_t in invoke.h.
    3720
    3821#define PTR_BYTE        8
     
    4023#define FP_OFFSET       ( 1 * PTR_BYTE )
    4124
    42 //-----------------------------------------------------------------------------
    43 // Regular context switch routine which enables switching from one context to anouther
     25// Context switch between coroutines/tasks.
     26//   void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) ;
     27// Arguments "from" in register rdi, "to" in register rsi.
     28
     29        .file "CtxSwitch-x86_64.S"
    4430        .text
    4531        .align 2
    46         .globl __cfactx_switch
    47         .type  __cfactx_switch, @function
     32        .global __cfactx_switch
     33        .type __cfactx_switch, @function
    4834__cfactx_switch:
    4935
     
    7763
    7864        ret
    79         .size  __cfactx_switch, .-__cfactx_switch
     65        .size __cfactx_switch, .-__cfactx_switch
    8066
    81 //-----------------------------------------------------------------------------
    82 // Stub used to create new stacks which are ready to be context switched to
     67// Stub to create new stacks which can be context switched to
     68//   void __cfactx_invoke_stub( void );
     69
    8370        .text
    8471        .align 2
    85         .globl __cfactx_invoke_stub
    86         .type    __cfactx_invoke_stub, @function
     72        .global __cfactx_invoke_stub
     73        .type __cfactx_invoke_stub, @function
    8774__cfactx_invoke_stub:
    88         movq %rbx, %rdi
     75        movq %rbx, %rdi                                         // move main and this to first two arguments
    8976        movq %r12, %rsi
    90         jmp *%r13
    91         .size  __cfactx_invoke_stub, .-__cfactx_invoke_stub
     77        jmp *%r13                                                       // jmp to invoke
     78        .size __cfactx_invoke_stub, .-__cfactx_invoke_stub
    9279
    9380// Local Variables: //
    94 // mode: c //
     81// mode: asm //
    9582// tab-width: 4 //
    9683// End: //
  • libcfa/src/concurrency/alarm.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Fri Jun 2 11:31:25 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Jan  5 08:41:36 2020
    13 // Update Count     : 69
     12// Last Modified On : Wed Jun 17 16:11:35 2020
     13// Update Count     : 75
    1414//
    1515
    1616#define __cforall_thread__
    1717
    18 extern "C" {
    1918#include <errno.h>
    2019#include <stdio.h>
     20#include <unistd.h>
    2121#include <string.h>
    22 #include <unistd.h>
    2322#include <sys/time.h>
    24 }
    2523
    2624#include "alarm.hfa"
    27 #include "kernel_private.hfa"
     25#include "kernel/fwd.hfa"
    2826#include "preemption.hfa"
    2927
     
    4745//=============================================================================================
    4846
    49 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period ) with( this ) {
     47void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period) with( this ) {
    5048        this.thrd = thrd;
    5149        this.alarm = alarm;
    5250        this.period = period;
    53         next = 0;
    5451        set = false;
    55         kernel_alarm = false;
     52        type = User;
    5653}
    5754
    58 void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period ) with( this ) {
     55void ?{}( alarm_node_t & this, processor * proc, Time alarm, Duration period ) with( this ) {
    5956        this.proc = proc;
    6057        this.alarm = alarm;
    6158        this.period = period;
    62         next = 0;
    6359        set = false;
    64         kernel_alarm = true;
     60        type = Kernel;
     61}
     62void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period ) with( this ) {
     63        this.alarm = alarm;
     64        this.period = period;
     65        this.callback = callback;
     66        set = false;
     67        type = Callback;
    6568}
    6669
     
    7174}
    7275
    73 #if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
    74 bool validate( alarm_list_t * this ) {
    75         alarm_node_t ** it = &this->head;
    76         while( (*it) ) {
    77                 it = &(*it)->next;
     76void insert( alarm_list_t * this, alarm_node_t * n ) {
     77        alarm_node_t * it = & (*this)`first;
     78        while( it && (n->alarm > it->alarm) ) {
     79                it = & (*it)`next;
     80        }
     81        if ( it ) {
     82                insert_before( *it, *n );
     83        } else {
     84                insert_last(*this, *n);
    7885        }
    7986
    80         return it == this->tail;
    81 }
    82 #endif
    83 
    84 static inline void insert_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t p ) {
    85         verify( !n->next );
    86         if( p == this->tail ) {
    87                 this->tail = &n->next;
    88         }
    89         else {
    90                 n->next = *p;
    91         }
    92         *p = n;
    93 
    94         verify( validate( this ) );
    95 }
    96 
    97 void insert( alarm_list_t * this, alarm_node_t * n ) {
    98         alarm_node_t ** it = &this->head;
    99         while( (*it) && (n->alarm > (*it)->alarm) ) {
    100                 it = &(*it)->next;
    101         }
    102 
    103         insert_at( this, n, it );
    104 
    105         verify( validate( this ) );
     87        verify( validate( *this ) );
    10688}
    10789
    10890alarm_node_t * pop( alarm_list_t * this ) {
    109         alarm_node_t * head = this->head;
     91        verify( validate( *this ) );
     92        alarm_node_t * head = & (*this)`first;
    11093        if( head ) {
    111                 this->head = head->next;
    112                 if( !head->next ) {
    113                         this->tail = &this->head;
    114                 }
    115                 head->next = 0p;
     94                remove(*head);
    11695        }
    117         verify( validate( this ) );
     96        verify( validate( *this ) );
    11897        return head;
    11998}
    12099
    121 static inline void remove_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t it ) {
    122         verify( it );
    123         verify( (*it) == n );
    124 
    125         (*it) = n->next;
    126         if( !n-> next ) {
    127                 this->tail = it;
    128         }
    129         n->next = 0p;
    130 
    131         verify( validate( this ) );
    132 }
    133 
    134 static inline void remove( alarm_list_t * this, alarm_node_t * n ) {
    135         alarm_node_t ** it = &this->head;
    136         while( (*it) && (*it) != n ) {
    137                 it = &(*it)->next;
    138         }
    139 
    140         verify( validate( this ) );
    141 
    142         if( *it ) { remove_at( this, n, it ); }
    143 
    144         verify( validate( this ) );
    145 }
    146 
    147100void register_self( alarm_node_t * this ) {
    148         alarm_list_t * alarms = &event_kernel->alarms;
     101        alarm_list_t & alarms = event_kernel->alarms;
    149102
    150103        disable_interrupts();
     
    152105        {
    153106                verify( validate( alarms ) );
    154                 bool first = !alarms->head;
     107                bool first = ! & alarms`first;
    155108
    156                 insert( alarms, this );
     109                insert( &alarms, this );
    157110                if( first ) {
    158                         __kernel_set_timer( alarms->head->alarm - __kernel_get_time() );
     111                        __kernel_set_timer( alarms`first.alarm - __kernel_get_time() );
    159112                }
    160113        }
     
    168121        lock( event_kernel->lock __cfaabi_dbg_ctx2 );
    169122        {
    170                 verify( validate( &event_kernel->alarms ) );
    171                 remove( &event_kernel->alarms, this );
     123                verify( validate( event_kernel->alarms ) );
     124                remove( *this );
    172125        }
    173126        unlock( event_kernel->lock );
     
    176129}
    177130
     131//=============================================================================================
     132// Utilities
     133//=============================================================================================
     134
     135void sleep( Duration duration ) {
     136        alarm_node_t node = { active_thread(), __kernel_get_time() + duration, 0`s };
     137
     138        register_self( &node );
     139        park();
     140
     141        /* paranoid */ verify( !node.set );
     142        /* paranoid */ verify( & node`next == 0p );
     143        /* paranoid */ verify( & node`prev == 0p );
     144}
     145
    178146// Local Variables: //
    179147// mode: c //
  • libcfa/src/concurrency/alarm.hfa

    r3c64c668 r58fe85a  
    2323#include "time.hfa"
    2424
     25#include "containers/list.hfa"
     26
    2527struct $thread;
    2628struct processor;
     
    3739//=============================================================================================
    3840
     41enum alarm_type{ Kernel = 0, User = 1, Callback = 2 };
     42
     43struct alarm_node_t;
     44
     45typedef void (*Alarm_Callback)(alarm_node_t & );
     46
    3947struct alarm_node_t {
    4048        Time alarm;                             // time when alarm goes off
    4149        Duration period;                        // if > 0 => period of alarm
    42         alarm_node_t * next;            // intrusive link list field
     50
     51        DLISTED_MGD_IMPL_IN(alarm_node_t)
    4352
    4453        union {
    45                 $thread * thrd; // thrd who created event
    46                 processor * proc;               // proc who created event
     54                $thread * thrd;                                 // thrd who created event
     55                processor * proc;                               // proc who created event
     56                Alarm_Callback callback;                // callback to handle event
    4757        };
    4858
    4959        bool set                :1;             // whether or not the alarm has be registered
    50         bool kernel_alarm       :1;             // true if this is not a user defined alarm
     60        enum alarm_type type;           // true if this is not a user defined alarm
    5161};
    52 
    53 typedef alarm_node_t ** __alarm_it_t;
     62DLISTED_MGD_IMPL_OUT(alarm_node_t)
    5463
    5564void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
    5665void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period );
     66void ?{}( alarm_node_t & this, Alarm_Callback callback, Time alarm, Duration period );
    5767void ^?{}( alarm_node_t & this );
    5868
    59 struct alarm_list_t {
    60         alarm_node_t * head;
    61         __alarm_it_t tail;
    62 };
    63 
    64 static inline void ?{}( alarm_list_t & this ) with( this ) {
    65         head = 0;
    66         tail = &head;
    67 }
     69typedef dlist(alarm_node_t, alarm_node_t) alarm_list_t;
    6870
    6971void insert( alarm_list_t * this, alarm_node_t * n );
  • libcfa/src/concurrency/coroutine.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Mon Nov 28 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 12:29:25 2020
    13 // Update Count     : 16
     12// Last Modified On : Tue Dec 15 12:06:04 2020
     13// Update Count     : 23
    1414//
    1515
     
    1818#include "coroutine.hfa"
    1919
    20 extern "C" {
    2120#include <stddef.h>
    2221#include <malloc.h>
     
    2423#include <string.h>
    2524#include <unistd.h>
    26 // use this define to make unwind.h play nice, definetely a hack
    27 #define HIDE_EXPORTS
     25#include <sys/mman.h>                                                                   // mprotect
    2826#include <unwind.h>
    29 #undef HIDE_EXPORTS
    30 #include <sys/mman.h>
    31 }
    3227
    3328#include "kernel_private.hfa"
     29#include "exception.hfa"
     30#include "math.hfa"
     31
     32#define CFA_COROUTINE_USE_MMAP 0
    3433
    3534#define __CFA_INVOKE_PRIVATE__
     
    4746
    4847//-----------------------------------------------------------------------------
     48FORALL_DATA_INSTANCE(CoroutineCancelled, (dtype coroutine_t), (coroutine_t))
     49
     50forall(dtype T)
     51void mark_exception(CoroutineCancelled(T) *) {}
     52
     53forall(dtype T)
     54void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) {
     55        dst->virtual_table = src->virtual_table;
     56        dst->the_coroutine = src->the_coroutine;
     57        dst->the_exception = src->the_exception;
     58}
     59
     60forall(dtype T)
     61const char * msg(CoroutineCancelled(T) *) {
     62        return "CoroutineCancelled(...)";
     63}
     64
     65// This code should not be inlined. It is the error path on resume.
     66forall(dtype T | is_coroutine(T))
     67void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc ) {
     68        verify( desc->cancellation );
     69        desc->state = Cancelled;
     70        exception_t * except = __cfaehm_cancellation_exception( desc->cancellation );
     71
     72        // TODO: Remove explitate vtable set once trac#186 is fixed.
     73        CoroutineCancelled(T) except;
     74        except.virtual_table = &get_exception_vtable(&except);
     75        except.the_coroutine = &cor;
     76        except.the_exception = except;
     77        throwResume except;
     78
     79        except->virtual_table->free( except );
     80        free( desc->cancellation );
     81        desc->cancellation = 0p;
     82}
     83
     84//-----------------------------------------------------------------------------
    4985// Global state variables
    5086
    5187// minimum feasible stack size in bytes
    52 #define MinStackSize 1000
     88static const size_t MinStackSize = 1000;
    5389extern size_t __page_size;                              // architecture pagesize HACK, should go in proper runtime singleton
     90extern int __map_prot;
    5491
    5592void __stack_prepare( __stack_info_t * this, size_t create_size );
     93void __stack_clean  ( __stack_info_t * this );
    5694
    5795//-----------------------------------------------------------------------------
     
    74112        bool userStack = ((intptr_t)this.storage & 0x1) != 0;
    75113        if ( ! userStack && this.storage ) {
    76                 __attribute__((may_alias)) intptr_t * istorage = (intptr_t *)&this.storage;
    77                 *istorage &= (intptr_t)-1;
    78 
    79                 void * storage = this.storage->limit;
    80                 __cfaabi_dbg_debug_do(
    81                         storage = (char*)(storage) - __page_size;
    82                         if ( mprotect( storage, __page_size, PROT_READ | PROT_WRITE ) == -1 ) {
    83                                 abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
    84                         }
    85                 );
    86                 __cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
    87                 free( storage );
     114                __stack_clean( &this );
    88115        }
    89116}
     
    101128void ^?{}($coroutine& this) {
    102129        if(this.state != Halted && this.state != Start && this.state != Primed) {
    103                 $coroutine * src = TL_GET( this_thread )->curr_cor;
     130                $coroutine * src = active_coroutine();
    104131                $coroutine * dst = &this;
    105132
     
    134161        assert(__page_size != 0l);
    135162        size_t size = libCeiling( storageSize, 16 ) + stack_data_size;
     163        size = ceiling(size, __page_size);
    136164
    137165        // If we are running debug, we also need to allocate a guardpage to catch stack overflows.
    138166        void * storage;
    139         __cfaabi_dbg_debug_do(
    140                 storage = memalign( __page_size, size + __page_size );
    141         );
    142         __cfaabi_dbg_no_debug_do(
    143                 storage = (void*)malloc(size);
    144         );
    145 
     167        #if CFA_COROUTINE_USE_MMAP
     168                storage = mmap(0p, size + __page_size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
     169                if(storage == ((void*)-1)) {
     170                        abort( "coroutine stack creation : internal error, mmap failure, error(%d) %s.", errno, strerror( errno ) );
     171                }
     172                if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
     173                        abort( "coroutine stack creation : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
     174                } // if
     175                storage = (void *)(((intptr_t)storage) + __page_size);
     176        #else
     177                __cfaabi_dbg_debug_do(
     178                        storage = memalign( __page_size, size + __page_size );
     179                );
     180                __cfaabi_dbg_no_debug_do(
     181                        storage = (void*)malloc(size);
     182                );
     183
     184                __cfaabi_dbg_debug_do(
     185                        if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
     186                                abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
     187                        }
     188                        storage = (void *)(((intptr_t)storage) + __page_size);
     189                );
     190        #endif
    146191        __cfaabi_dbg_print_safe("Kernel : Created stack %p of size %zu\n", storage, size);
    147         __cfaabi_dbg_debug_do(
    148                 if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
    149                         abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
    150                 }
    151                 storage = (void *)(((intptr_t)storage) + __page_size);
    152         );
    153192
    154193        verify( ((intptr_t)storage & (libAlign() - 1)) == 0ul );
    155194        return [storage, size];
     195}
     196
     197void __stack_clean  ( __stack_info_t * this ) {
     198        size_t size = ((intptr_t)this->storage->base) - ((intptr_t)this->storage->limit) + sizeof(__stack_t);
     199        void * storage = this->storage->limit;
     200
     201        #if CFA_COROUTINE_USE_MMAP
     202                storage = (void *)(((intptr_t)storage) - __page_size);
     203                if(munmap(storage, size + __page_size) == -1) {
     204                        abort( "coroutine stack destruction : internal error, munmap failure, error(%d) %s.", errno, strerror( errno ) );
     205                }
     206        #else
     207                __cfaabi_dbg_debug_do(
     208                        storage = (char*)(storage) - __page_size;
     209                        if ( mprotect( storage, __page_size, __map_prot ) == -1 ) {
     210                                abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
     211                        }
     212                );
     213
     214                free( storage );
     215        #endif
     216        __cfaabi_dbg_print_safe("Kernel : Deleting stack %p\n", storage);
    156217}
    157218
     
    175236                size = libFloor(create_size - stack_data_size - diff, libAlign());
    176237        } // if
    177         assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", size, MinStackSize );
    178 
    179         this->storage = (__stack_t *)((intptr_t)storage + size);
     238        assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %zd bytes for a stack.", size, MinStackSize );
     239
     240        this->storage = (__stack_t *)((intptr_t)storage + size - sizeof(__stack_t));
    180241        this->storage->limit = storage;
    181         this->storage->base  = (void*)((intptr_t)storage + size);
     242        this->storage->base  = (void*)((intptr_t)storage + size - sizeof(__stack_t));
     243        this->storage->exception_context.top_resume = 0p;
     244        this->storage->exception_context.current_exception = 0p;
    182245        __attribute__((may_alias)) intptr_t * istorage = (intptr_t*)&this->storage;
    183246        *istorage |= userStack ? 0x1 : 0x0;
     
    205268
    206269        struct $coroutine * __cfactx_cor_finish(void) {
    207                 struct $coroutine * cor = kernelTLS.this_thread->curr_cor;
     270                struct $coroutine * cor = active_coroutine();
    208271
    209272                if(cor->state == Primed) {
    210                         suspend();
     273                        __cfactx_suspend();
    211274                }
    212275
  • libcfa/src/concurrency/coroutine.hfa

    r3c64c668 r58fe85a  
    1818#include <assert.h>
    1919#include "invoke.h"
     20#include "../exception.hfa"
     21
     22//-----------------------------------------------------------------------------
     23// Exception thrown from resume when a coroutine stack is cancelled.
     24FORALL_DATA_EXCEPTION(CoroutineCancelled, (dtype coroutine_t), (coroutine_t)) (
     25        coroutine_t * the_coroutine;
     26        exception_t * the_exception;
     27);
     28
     29forall(dtype T)
     30void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src);
     31
     32forall(dtype T)
     33const char * msg(CoroutineCancelled(T) *);
    2034
    2135//-----------------------------------------------------------------------------
     
    2337// Anything that implements this trait can be resumed.
    2438// Anything that is resumed is a coroutine.
    25 trait is_coroutine(dtype T) {
    26       void main(T & this);
    27       $coroutine * get_coroutine(T & this);
     39trait is_coroutine(dtype T | IS_RESUMPTION_EXCEPTION(CoroutineCancelled, (T))) {
     40        void main(T & this);
     41        $coroutine * get_coroutine(T & this);
    2842};
    2943
     
    4660//-----------------------------------------------------------------------------
    4761// Public coroutine API
    48 static inline void suspend(void);
    49 
    50 forall(dtype T | is_coroutine(T))
    51 static inline T & resume(T & cor);
    52 
    5362forall(dtype T | is_coroutine(T))
    5463void prime(T & cor);
    5564
    56 static inline struct $coroutine * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
     65static inline struct $coroutine * active_coroutine() { return active_thread()->curr_cor; }
    5766
    5867//-----------------------------------------------------------------------------
     
    7584static inline void $ctx_switch( $coroutine * src, $coroutine * dst ) __attribute__((nonnull (1, 2))) {
    7685        // set state of current coroutine to inactive
    77         src->state = src->state == Halted ? Halted : Inactive;
     86        src->state = src->state == Halted ? Halted : Blocked;
    7887
    7988        // set new coroutine that task is executing
    80         TL_GET( this_thread )->curr_cor = dst;
     89        active_thread()->curr_cor = dst;
    8190
    8291        // context switch to specified coroutine
     
    93102}
    94103
    95 extern void __stack_prepare   ( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
     104extern void __stack_prepare( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
     105extern void __stack_clean  ( __stack_info_t * this );
     106
    96107
    97108// Suspend implementation inlined for performance
    98 static inline void suspend(void) {
    99         // optimization : read TLS once and reuse it
    100         // Safety note: this is preemption safe since if
    101         // preemption occurs after this line, the pointer
    102         // will also migrate which means this value will
    103         // stay in syn with the TLS
    104         $coroutine * src = TL_GET( this_thread )->curr_cor;
     109extern "C" {
     110        static inline void __cfactx_suspend(void) {
     111                // optimization : read TLS once and reuse it
     112                // Safety note: this is preemption safe since if
     113                // preemption occurs after this line, the pointer
     114                // will also migrate which means this value will
     115                // stay in syn with the TLS
     116                $coroutine * src = active_coroutine();
    105117
    106         assertf( src->last != 0,
    107                 "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
    108                 "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
    109                 src->name, src );
    110         assertf( src->last->state != Halted,
    111                 "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
    112                 "Possible cause is terminated coroutine's main routine has already returned.",
    113                 src->name, src, src->last->name, src->last );
     118                assertf( src->last != 0,
     119                        "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
     120                        "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
     121                        src->name, src );
     122                assertf( src->last->state != Halted,
     123                        "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
     124                        "Possible cause is terminated coroutine's main routine has already returned.",
     125                        src->name, src, src->last->name, src->last );
    114126
    115         $ctx_switch( src, src->last );
     127                $ctx_switch( src, src->last );
     128        }
    116129}
     130
     131forall(dtype T | is_coroutine(T))
     132void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc );
    117133
    118134// Resume implementation inlined for performance
     
    124140        // will also migrate which means this value will
    125141        // stay in syn with the TLS
    126         $coroutine * src = TL_GET( this_thread )->curr_cor;
     142        $coroutine * src = active_coroutine();
    127143        $coroutine * dst = get_coroutine(cor);
    128144
    129145        if( unlikely(dst->context.SP == 0p) ) {
    130                 TL_GET( this_thread )->curr_cor = dst;
    131146                __stack_prepare(&dst->stack, 65000);
    132147                __cfactx_start(main, dst, cor, __cfactx_invoke_coroutine);
    133                 TL_GET( this_thread )->curr_cor = src;
    134148        }
    135149
     
    148162        // always done for performance testing
    149163        $ctx_switch( src, dst );
     164        if ( unlikely(dst->cancellation) ) {
     165                __cfaehm_cancelled_coroutine( cor, dst );
     166        }
    150167
    151168        return cor;
     
    158175        // will also migrate which means this value will
    159176        // stay in syn with the TLS
    160         $coroutine * src = TL_GET( this_thread )->curr_cor;
     177        $coroutine * src = active_coroutine();
    161178
    162179        // not resuming self ?
  • libcfa/src/concurrency/invoke.c

    r3c64c668 r58fe85a  
    1010// Created On       : Tue Jan 17 12:27:26 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Feb  9 16:37:42 2018
    13 // Update Count     : 5
     12// Last Modified On : Sat Oct 24 14:35:28 2020
     13// Update Count     : 32
    1414//
    1515
     
    109109
    110110        struct FakeStack {
    111             void *fixedRegisters[3];              // fixed registers ebx, edi, esi (popped on 1st uSwitch, values unimportant)
    112             void *rturn;                          // where to go on return from uSwitch
    113             void *dummyReturn;                    // fake return compiler would have pushed on call to uInvoke
    114             void *argument[3];                    // for 16-byte ABI, 16-byte alignment starts here
    115             void *padding;                        // padding to force 16-byte alignment, as "base" is 16-byte aligned
     111            void *fixedRegisters[3];                                            // fixed registers ebx, edi, esi (popped on 1st uSwitch, values unimportant)
     112            void *rturn;                                                                        // where to go on return from uSwitch
     113            void *dummyReturn;                                                          // fake return compiler would have pushed on call to uInvoke
     114            void *argument[3];                                                          // for 16-byte ABI, 16-byte alignment starts here
     115            void *padding;                                                                      // padding to force 16-byte alignment, as "base" is 16-byte aligned
    116116        };
    117117
     
    122122
    123123        fs->dummyReturn = NULL;
    124         fs->argument[0] = main;     // argument to invoke
    125         fs->argument[1] = this;     // argument to invoke
     124        fs->argument[0] = main;                                                         // argument to invoke
     125        fs->argument[1] = this;                                                         // argument to invoke
    126126        fs->rturn = invoke;
    127127
     
    129129
    130130        struct FakeStack {
    131                 void *fixedRegisters[5];            // fixed registers rbx, r12, r13, r14, r15
    132                 void *rturn;                        // where to go on return from uSwitch
    133                 void *dummyReturn;                  // NULL return address to provide proper alignment
     131                void *fixedRegisters[5];                                                // fixed registers rbx, r12, r13, r14, r15
     132                void *rturn;                                                                    // where to go on return from uSwitch
     133                void *dummyReturn;                                                              // NULL return address to provide proper alignment
    134134        };
    135135
    136136        cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
    137         cor->context.FP = NULL;         // terminate stack with NULL fp
     137        cor->context.FP = NULL;                                                         // terminate stack with NULL fp
    138138
    139139        struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
     
    141141        fs->dummyReturn = NULL;
    142142        fs->rturn = __cfactx_invoke_stub;
    143         fs->fixedRegisters[0] = main;
    144         fs->fixedRegisters[1] = this;
     143        fs->fixedRegisters[0] = main;                                           // argument to invoke
     144        fs->fixedRegisters[1] = this;                                           // argument to invoke
    145145        fs->fixedRegisters[2] = invoke;
    146146
    147 #elif defined( __ARM_ARCH )
    148 #error ARM needs to be upgrade to use to parameters like X86/X64 (A.K.A. : I broke this and do not know how to fix it)
     147#elif defined( __ARM_ARCH_32 )
     148#error ARM needs to be upgrade to use two parameters like X86/X64 (A.K.A. : I broke this and do not know how to fix it)
     149        // More details about the error:
     150        // To avoid the thunk problem, I changed the invoke routine to pass the main explicitly
     151        // instead of relying on an assertion. This effectively hoists any required thunk one level
     152        // which was enough to get to global scope in most cases.
     153        // This means that __cfactx_invoke_... now takes two parameters and the FakeStack needs
     154        // to be adjusted as a consequence of that.
     155        // I don't know how to do that for ARM, hence the #error
     156
    149157        struct FakeStack {
    150                 float fpRegs[16];                       // floating point registers
    151                 void *intRegs[9];                       // integer/pointer registers
    152                 void *arg[2];                           // placeholder for this pointer
     158                float fpRegs[16];                                                               // floating point registers
     159                void * intRegs[9];                                                              // integer/pointer registers
     160                void * arg[2];                                                                  // placeholder for this pointer
    153161        };
    154162
     
    162170        fs->arg[1] = invoke;
    163171
     172#elif defined( __ARM_ARCH )
     173        struct FakeStack {
     174                void * intRegs[12];                                                             // x19-x30 integer registers
     175                double fpRegs[8];                                                               // v8-v15 floating point
     176        };
     177
     178        cor->context.SP = (char *)stack->base - sizeof( struct FakeStack );
     179        cor->context.FP = NULL;
     180
     181        struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
     182
     183        fs->intRegs[0] = main;                                                          // argument to invoke x19 => x0
     184        fs->intRegs[1] = this;                                                          // argument to invoke x20 => x1
     185        fs->intRegs[2] = invoke;
     186        fs->intRegs[11] = __cfactx_invoke_stub;                         // link register x30 => ret moves to pc
    164187#else
    165188        #error uknown hardware architecture
  • libcfa/src/concurrency/invoke.h

    r3c64c668 r58fe85a  
    1717#include "bits/defs.hfa"
    1818#include "bits/locks.hfa"
     19#include "kernel/fwd.hfa"
    1920
    2021#ifdef __cforall
     
    2627#define _INVOKE_H_
    2728
    28 #ifdef __ARM_ARCH
    29         // function prototypes are only really used by these macros on ARM
    30         void disable_global_interrupts();
    31         void enable_global_interrupts();
    32 
    33         #define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
    34                 disable_global_interrupts(); \
    35                 target = kernelTLS.member; \
    36                 enable_global_interrupts(); \
    37                 target; } )
    38         #define TL_SET( member, value ) disable_global_interrupts(); \
    39                 kernelTLS.member = value; \
    40                 enable_global_interrupts();
    41 #else
    42         #define TL_GET( member ) kernelTLS.member
    43         #define TL_SET( member, value ) kernelTLS.member = value;
    44 #endif
    45 
    46         #ifdef __cforall
    47         extern "Cforall" {
    48                 extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
    49                         struct $thread    * volatile this_thread;
    50                         struct processor      * volatile this_processor;
    51 
    52                         struct {
    53                                 volatile unsigned short disable_count;
    54                                 volatile bool enabled;
    55                                 volatile bool in_progress;
    56                         } preemption_state;
    57 
    58                         uint32_t rand_seed;
    59                 } kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
    60         }
    61         #endif
     29        struct __cfaehm_try_resume_node;
     30        struct __cfaehm_base_exception_t;
     31        struct exception_context_t {
     32                struct __cfaehm_try_resume_node * top_resume;
     33                struct __cfaehm_base_exception_t * current_exception;
     34        };
    6235
    6336        struct __stack_context_t {
     
    8558                // base of stack
    8659                void * base;
     60
     61                // Information for exception handling.
     62                struct exception_context_t exception_context;
    8763        };
    8864
     
    9268        };
    9369
    94         enum coroutine_state { Halted, Start, Primed, Inactive, Active, Rerun };
    95         enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION };
     70        enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active, Cancelled, Halting };
    9671
    9772        struct $coroutine {
     
    10681
    10782                // current execution status for coroutine
    108                 enum coroutine_state state;
     83                enum __Coroutine_State state;
    10984
    11085                // first coroutine to resume this one
     
    11893
    11994        };
     95        // Wrapper for gdb
     96        struct cfathread_coroutine_t { struct $coroutine debug; };
     97
     98        static inline struct __stack_t * __get_stack( struct $coroutine * cor ) {
     99                return (struct __stack_t*)(((uintptr_t)cor->stack.storage) & ((uintptr_t)-2));
     100        }
    120101
    121102        // struct which calls the monitor is accepting
     
    150131                struct __condition_node_t * dtor_node;
    151132        };
     133        // Wrapper for gdb
     134        struct cfathread_monitor_t { struct $monitor debug; };
    152135
    153136        struct __monitor_group_t {
     
    157140                // last function that acquired monitors
    158141                fptr_t func;
     142        };
     143
     144        // Link lists fields
     145        // instrusive link field for threads
     146        struct __thread_desc_link {
     147                struct $thread * next;
     148                struct $thread * prev;
     149                volatile unsigned long long ts;
     150                int preferred;
    159151        };
    160152
     
    165157
    166158                // current execution status for coroutine
    167                 volatile int state;
    168                 enum __Preemption_Reason preempted;
     159                // Possible values are:
     160                //    - TICKET_BLOCKED (-1) thread is blocked
     161                //    - TICKET_RUNNING ( 0) thread is running
     162                //    - TICKET_UNBLOCK ( 1) thread should ignore next block
     163                volatile int ticket;
     164                enum __Coroutine_State state:8;
     165                enum __Preemption_Reason preempted:8;
    169166
    170167                //SKULLDUGGERY errno is not save in the thread data structure because returnToKernel appears to be the only function to require saving and restoring it
     168
     169                // pointer to the cluster on which the thread is running
     170                struct cluster * curr_cluster;
     171
     172                // Link lists fields
     173                // instrusive link field for threads
     174                struct __thread_desc_link link;
    171175
    172176                // coroutine body used to store context
     
    182186                struct $monitor *  self_mon_p;
    183187
    184                 // pointer to the cluster on which the thread is running
    185                 struct cluster * curr_cluster;
    186 
    187188                // monitors currently held by this thread
    188189                struct __monitor_group_t monitors;
    189190
    190                 // Link lists fields
    191                 // instrusive link field for threads
    192                 struct $thread * next;
     191                // used to put threads on user data structures
     192                struct {
     193                        struct $thread * next;
     194                        struct $thread * back;
     195                } seqable;
    193196
    194197                struct {
     
    196199                        struct $thread * prev;
    197200                } node;
    198         };
     201
     202                #if defined( __CFA_WITH_VERIFY__ )
     203                        void * canary;
     204                #endif
     205        };
     206        // Wrapper for gdb
     207        struct cfathread_thread_t { struct $thread debug; };
     208
     209        #ifdef __CFA_DEBUG__
     210                void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]);
     211        #else
     212                #define __cfaabi_dbg_record_thrd(x, y, z)
     213        #endif
    199214
    200215        #ifdef __cforall
    201216        extern "Cforall" {
     217
    202218                static inline $thread *& get_next( $thread & this ) __attribute__((const)) {
    203                         return this.next;
     219                        return this.link.next;
    204220                }
    205221
    206222                static inline [$thread *&, $thread *& ] __get( $thread & this ) __attribute__((const)) {
    207223                        return this.node.[next, prev];
     224                }
     225
     226                static inline $thread *& Back( $thread * this ) __attribute__((const)) {
     227                        return this->seqable.back;
     228                }
     229
     230                static inline $thread *& Next( $thread * this ) __attribute__((const)) {
     231                        return this->seqable.next;
     232                }
     233
     234                static inline bool listed( $thread * this ) {
     235                        return this->seqable.next != 0p;
    208236                }
    209237
  • libcfa/src/concurrency/kernel.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Tue Jan 17 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 13:03:15 2020
    13 // Update Count     : 58
     12// Last Modified On : Mon Aug 31 07:08:20 2020
     13// Update Count     : 71
    1414//
    1515
    1616#define __cforall_thread__
     17// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
    1718
    1819//C Includes
    19 #include <stddef.h>
    2020#include <errno.h>
    21 #include <string.h>
    22 extern "C" {
    2321#include <stdio.h>
    24 #include <fenv.h>
    25 #include <sys/resource.h>
    2622#include <signal.h>
    2723#include <unistd.h>
    28 #include <limits.h>                                                                             // PTHREAD_STACK_MIN
    29 #include <sys/mman.h>                                                                   // mprotect
    30 }
    3124
    3225//CFA Includes
    33 #include "time.hfa"
    3426#include "kernel_private.hfa"
    3527#include "preemption.hfa"
    36 #include "startup.hfa"
    3728
    3829//Private includes
     
    4031#include "invoke.h"
    4132
     33
    4234//-----------------------------------------------------------------------------
    4335// Some assembly required
    4436#if defined( __i386 )
    45         #define CtxGet( ctx )        \
    46                 __asm__ volatile (     \
    47                         "movl %%esp,%0\n"\
    48                         "movl %%ebp,%1\n"\
    49                         : "=rm" (ctx.SP),\
    50                                 "=rm" (ctx.FP) \
    51                 )
    52 
    5337        // mxcr : SSE Status and Control bits (control bits are preserved across function calls)
    5438        // fcw  : X87 FPU control word (preserved across function calls)
     
    7256
    7357#elif defined( __x86_64 )
    74         #define CtxGet( ctx )        \
    75                 __asm__ volatile (     \
    76                         "movq %%rsp,%0\n"\
    77                         "movq %%rbp,%1\n"\
    78                         : "=rm" (ctx.SP),\
    79                                 "=rm" (ctx.FP) \
    80                 )
    81 
    8258        #define __x87_store         \
    8359                uint32_t __mxcr;      \
     
    9874                )
    9975
    100 
    101 #elif defined( __ARM_ARCH )
    102 #define CtxGet( ctx ) __asm__ ( \
    103                 "mov %0,%%sp\n"   \
    104                 "mov %1,%%r11\n"   \
    105         : "=rm" (ctx.SP), "=rm" (ctx.FP) )
     76#elif defined( __arm__ )
     77        #define __x87_store
     78        #define __x87_load
     79
     80#elif defined( __aarch64__ )
     81        #define __x87_store              \
     82                uint32_t __fpcntl[2];    \
     83                __asm__ volatile (    \
     84                        "mrs x9, FPCR\n" \
     85                        "mrs x10, FPSR\n"  \
     86                        "stp x9, x10, %0\n"  \
     87                        : "=m" (__fpcntl) : : "x9", "x10" \
     88                )
     89
     90        #define __x87_load         \
     91                __asm__ volatile (    \
     92                        "ldp x9, x10, %0\n"  \
     93                        "msr FPSR, x10\n"  \
     94                        "msr FPCR, x9\n" \
     95                : "=m" (__fpcntl) : : "x9", "x10" \
     96                )
     97
    10698#else
    107         #error unknown hardware architecture
     99        #error unsupported hardware architecture
    108100#endif
    109101
     102extern $thread * mainThread;
     103extern processor * mainProcessor;
     104
    110105//-----------------------------------------------------------------------------
    111 //Start and stop routine for the kernel, declared first to make sure they run first
    112 static void __kernel_startup (void) __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
    113 static void __kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
    114 
    115 //-----------------------------------------------------------------------------
    116 // Kernel storage
    117 KERNEL_STORAGE(cluster,         mainCluster);
    118 KERNEL_STORAGE(processor,       mainProcessor);
    119 KERNEL_STORAGE($thread, mainThread);
    120 KERNEL_STORAGE(__stack_t,       mainThreadCtx);
    121 
    122 cluster     * mainCluster;
    123 processor   * mainProcessor;
    124 $thread * mainThread;
    125 
    126 extern "C" {
    127         struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
    128 }
    129 
    130 size_t __page_size = 0;
    131 
    132 //-----------------------------------------------------------------------------
    133 // Global state
    134 thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) = {
    135         NULL,                                                                                           // cannot use 0p
    136         NULL,
    137         { 1, false, false },
    138         6u //this should be seeded better but due to a bug calling rdtsc doesn't work
    139 };
    140 
    141 //-----------------------------------------------------------------------------
    142 // Struct to steal stack
    143 struct current_stack_info_t {
    144         __stack_t * storage;                                                            // pointer to stack object
    145         void * base;                                                                            // base of stack
    146         void * limit;                                                                           // stack grows towards stack limit
    147         void * context;                                                                         // address of cfa_context_t
    148 };
    149 
    150 void ?{}( current_stack_info_t & this ) {
    151         __stack_context_t ctx;
    152         CtxGet( ctx );
    153         this.base = ctx.FP;
    154 
    155         rlimit r;
    156         getrlimit( RLIMIT_STACK, &r);
    157         size_t size = r.rlim_cur;
    158 
    159         this.limit = (void *)(((intptr_t)this.base) - size);
    160         this.context = &storage_mainThreadCtx;
    161 }
    162 
    163 //-----------------------------------------------------------------------------
    164 // Main thread construction
    165 
    166 void ?{}( $coroutine & this, current_stack_info_t * info) with( this ) {
    167         stack.storage = info->storage;
    168         with(*stack.storage) {
    169                 limit     = info->limit;
    170                 base      = info->base;
    171         }
    172         __attribute__((may_alias)) intptr_t * istorage = (intptr_t*) &stack.storage;
    173         *istorage |= 0x1;
    174         name = "Main Thread";
    175         state = Start;
    176         starter = 0p;
    177         last = 0p;
    178         cancellation = 0p;
    179 }
    180 
    181 void ?{}( $thread & this, current_stack_info_t * info) with( this ) {
    182         state = Start;
    183         self_cor{ info };
    184         curr_cor = &self_cor;
    185         curr_cluster = mainCluster;
    186         self_mon.owner = &this;
    187         self_mon.recursion = 1;
    188         self_mon_p = &self_mon;
    189         next = 0p;
    190 
    191         node.next = 0p;
    192         node.prev = 0p;
    193         doregister(curr_cluster, this);
    194 
    195         monitors{ &self_mon_p, 1, (fptr_t)0 };
    196 }
    197 
    198 //-----------------------------------------------------------------------------
    199 // Processor coroutine
    200 void ?{}(processorCtx_t & this) {
    201 
    202 }
    203 
    204 // Construct the processor context of non-main processors
    205 static void ?{}(processorCtx_t & this, processor * proc, current_stack_info_t * info) {
    206         (this.__cor){ info };
    207         this.proc = proc;
    208 }
    209 
    210 static void * __invoke_processor(void * arg);
    211 
    212 void ?{}(processor & this, const char name[], cluster & cltr) with( this ) {
    213         this.name = name;
    214         this.cltr = &cltr;
    215         terminated{ 0 };
    216         destroyer = 0p;
    217         do_terminate = false;
    218         preemption_alarm = 0p;
    219         pending_preemption = false;
    220         runner.proc = &this;
    221 
    222         idleLock{};
    223 
    224         __cfaabi_dbg_print_safe("Kernel : Starting core %p\n", &this);
    225 
    226         this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this );
    227 
    228         __cfaabi_dbg_print_safe("Kernel : core %p started\n", &this);
    229 }
    230 
    231 void ^?{}(processor & this) with( this ){
    232         if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) {
    233                 __cfaabi_dbg_print_safe("Kernel : core %p signaling termination\n", &this);
    234 
    235                 __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED);
    236                 wake( &this );
    237 
    238                 P( terminated );
    239                 verify( kernelTLS.this_processor != &this);
    240         }
    241 
    242         pthread_join( kernel_thread, 0p );
    243         free( this.stack );
    244 }
    245 
    246 void ?{}(cluster & this, const char name[], Duration preemption_rate) with( this ) {
    247         this.name = name;
    248         this.preemption_rate = preemption_rate;
    249         ready_queue{};
    250         ready_queue_lock{};
    251 
    252         procs{ __get };
    253         idles{ __get };
    254         threads{ __get };
    255 
    256         doregister(this);
    257 }
    258 
    259 void ^?{}(cluster & this) {
    260         unregister(this);
    261 }
     106// Kernel Scheduling logic
     107static $thread * __next_thread(cluster * this);
     108static $thread * __next_thread_slow(cluster * this);
     109static void __run_thread(processor * this, $thread * dst);
     110static void __wake_one(cluster * cltr);
     111
     112static void push  (__cluster_idles & idles, processor & proc);
     113static void remove(__cluster_idles & idles, processor & proc);
     114static [unsigned idle, unsigned total, * processor] query( & __cluster_idles idles );
     115
    262116
    263117//=============================================================================================
    264118// Kernel Scheduling logic
    265119//=============================================================================================
    266 static $thread * __next_thread(cluster * this);
    267 static void __run_thread(processor * this, $thread * dst);
    268 static void __halt(processor * this);
    269 
    270120//Main of the processor contexts
    271121void main(processorCtx_t & runner) {
    272122        // Because of a bug, we couldn't initialized the seed on construction
    273123        // Do it here
    274         kernelTLS.rand_seed ^= rdtscl();
     124        __cfaabi_tls.rand_seed ^= rdtscl();
     125        __cfaabi_tls.ready_rng.fwd_seed = 25214903917_l64u * (rdtscl() ^ (uintptr_t)&runner);
     126        __tls_rand_advance_bck();
    275127
    276128        processor * this = runner.proc;
    277129        verify(this);
    278130
    279         __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
    280 
    281         doregister(this->cltr, this);
     131        __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
     132        #if !defined(__CFA_NO_STATISTICS__)
     133                if( this->print_halts ) {
     134                        __cfaabi_bits_print_safe( STDOUT_FILENO, "Processor : %d - %s (%p)\n", this->id, this->name, (void*)this);
     135                }
     136        #endif
    282137
    283138        {
     
    285140                preemption_scope scope = { this };
    286141
    287                 __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
     142                __cfadbg_print_safe(runtime_core, "Kernel : core %p started\n", this);
    288143
    289144                $thread * readyThread = 0p;
    290                 for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ ) {
     145                MAIN_LOOP:
     146                for() {
     147                        // Try to get the next thread
    291148                        readyThread = __next_thread( this->cltr );
    292149
    293                         if(readyThread) {
    294                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    295                                 /* paranoid */ verifyf( readyThread->state == Inactive || readyThread->state == Start || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted);
    296                                 /* paranoid */ verifyf( readyThread->next == 0p, "Expected null got %p", readyThread->next );
    297 
    298                                 __run_thread(this, readyThread);
    299 
    300                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    301 
    302                                 spin_count = 0;
    303                         } else {
    304                                 // spin(this, &spin_count);
    305                                 __halt(this);
     150                        if( !readyThread ) {
     151                                readyThread = __next_thread_slow( this->cltr );
    306152                        }
    307                 }
    308 
    309                 __cfaabi_dbg_print_safe("Kernel : core %p stopping\n", this);
    310         }
    311 
    312         unregister(this->cltr, this);
     153
     154                        HALT:
     155                        if( !readyThread ) {
     156                                // Don't block if we are done
     157                                if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
     158
     159                                #if !defined(__CFA_NO_STATISTICS__)
     160                                        __tls_stats()->ready.sleep.halts++;
     161                                #endif
     162
     163                                // Push self to idle stack
     164                                push(this->cltr->idles, * this);
     165
     166                                // Confirm the ready-queue is empty
     167                                readyThread = __next_thread_slow( this->cltr );
     168                                if( readyThread ) {
     169                                        // A thread was found, cancel the halt
     170                                        remove(this->cltr->idles, * this);
     171
     172                                        #if !defined(__CFA_NO_STATISTICS__)
     173                                                __tls_stats()->ready.sleep.cancels++;
     174                                        #endif
     175
     176                                        // continue the mai loop
     177                                        break HALT;
     178                                }
     179
     180                                #if !defined(__CFA_NO_STATISTICS__)
     181                                        if(this->print_halts) {
     182                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->id, rdtscl());
     183                                        }
     184                                #endif
     185
     186                                wait( this->idle );
     187
     188                                #if !defined(__CFA_NO_STATISTICS__)
     189                                        if(this->print_halts) {
     190                                                __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->id, rdtscl());
     191                                        }
     192                                #endif
     193
     194                                // We were woken up, remove self from idle
     195                                remove(this->cltr->idles, * this);
     196
     197                                // DON'T just proceed, start looking again
     198                                continue MAIN_LOOP;
     199                        }
     200
     201                        /* paranoid */ verify( readyThread );
     202
     203                        // We found a thread run it
     204                        __run_thread(this, readyThread);
     205
     206                        // Are we done?
     207                        if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
     208                }
     209
     210                __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this);
     211        }
    313212
    314213        V( this->terminated );
    315214
    316         __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
     215        if(this == mainProcessor) {
     216                // HACK : the coroutine context switch expects this_thread to be set
     217                // and it make sense for it to be set in all other cases except here
     218                // fake it
     219                __cfaabi_tls.this_thread = mainThread;
     220        }
     221
     222        __cfadbg_print_safe(runtime_core, "Kernel : core %p terminated\n", this);
    317223}
    318224
     
    324230// from the processor coroutine to the target thread
    325231static void __run_thread(processor * this, $thread * thrd_dst) {
     232        /* paranoid */ verify( ! __preemption_enabled() );
     233        /* paranoid */ verifyf( thrd_dst->state == Ready || thrd_dst->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", thrd_dst->state, thrd_dst->preempted);
     234        /* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
     235        __builtin_prefetch( thrd_dst->context.SP );
     236
    326237        $coroutine * proc_cor = get_coroutine(this->runner);
    327 
    328         // Update global state
    329         kernelTLS.this_thread = thrd_dst;
    330238
    331239        // set state of processor coroutine to inactive
    332240        verify(proc_cor->state == Active);
    333         proc_cor->state = Inactive;
     241        proc_cor->state = Blocked;
    334242
    335243        // Actually run the thread
    336244        RUNNING:  while(true) {
    337                 if(unlikely(thrd_dst->preempted)) {
    338                         thrd_dst->preempted = __NO_PREEMPTION;
    339                         verify(thrd_dst->state == Active || thrd_dst->state == Rerun);
    340                 } else {
    341                         verify(thrd_dst->state == Start || thrd_dst->state == Primed || thrd_dst->state == Inactive);
    342                         thrd_dst->state = Active;
    343                 }
    344 
    345                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     245                thrd_dst->preempted = __NO_PREEMPTION;
     246                thrd_dst->state = Active;
     247
     248                // Update global state
     249                kernelTLS().this_thread = thrd_dst;
     250
     251                /* paranoid */ verify( ! __preemption_enabled() );
     252                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
     253                /* paranoid */ verify( thrd_dst->curr_cluster == this->cltr );
     254                /* paranoid */ verify( thrd_dst->context.SP );
     255                /* paranoid */ verify( thrd_dst->state != Halted );
     256                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
     257                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
     258                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
     259
     260
    346261
    347262                // set context switch to the thread that the processor is executing
    348                 verify( thrd_dst->context.SP );
    349263                __cfactx_switch( &proc_cor->context, &thrd_dst->context );
    350264                // when __cfactx_switch returns we are back in the processor coroutine
    351265
    352                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    353 
     266                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
     267                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
     268                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
     269                /* paranoid */ verify( thrd_dst->context.SP );
     270                /* paranoid */ verify( thrd_dst->curr_cluster == this->cltr );
     271                /* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
     272                /* paranoid */ verify( ! __preemption_enabled() );
     273
     274                // Reset global state
     275                kernelTLS().this_thread = 0p;
    354276
    355277                // We just finished running a thread, there are a few things that could have happened.
    356278                // 1 - Regular case : the thread has blocked and now one has scheduled it yet.
    357279                // 2 - Racy case    : the thread has blocked but someone has already tried to schedule it.
    358                 // 3 - Polite Racy case : the thread has blocked, someone has already tried to schedule it, but the thread is nice and wants to go through the ready-queue any way
    359280                // 4 - Preempted
    360281                // In case 1, we may have won a race so we can't write to the state again.
    361282                // In case 2, we lost the race so we now own the thread.
    362                 // In case 3, we lost the race but can just reschedule the thread.
    363283
    364284                if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
     
    368288                }
    369289
     290                if(unlikely(thrd_dst->state == Halting)) {
     291                        // The thread has halted, it should never be scheduled/run again
     292                        // finish the thread
     293                        __thread_finish( thrd_dst );
     294                        break RUNNING;
     295                }
     296
     297                /* paranoid */ verify( thrd_dst->state == Active );
     298                thrd_dst->state = Blocked;
     299
    370300                // set state of processor coroutine to active and the thread to inactive
    371                 static_assert(sizeof(thrd_dst->state) == sizeof(int));
    372                 enum coroutine_state old_state = __atomic_exchange_n(&thrd_dst->state, Inactive, __ATOMIC_SEQ_CST);
    373                 switch(old_state) {
    374                         case Halted:
    375                                 // The thread has halted, it should never be scheduled/run again, leave it back to Halted and move on
    376                                 thrd_dst->state = Halted;
    377 
    378                                 // We may need to wake someone up here since
    379                                 unpark( this->destroyer );
    380                                 this->destroyer = 0p;
    381                                 break RUNNING;
    382                         case Active:
     301                int old_ticket = __atomic_fetch_sub(&thrd_dst->ticket, 1, __ATOMIC_SEQ_CST);
     302                switch(old_ticket) {
     303                        case TICKET_RUNNING:
    383304                                // This is case 1, the regular case, nothing more is needed
    384305                                break RUNNING;
    385                         case Rerun:
     306                        case TICKET_UNBLOCK:
    386307                                // This is case 2, the racy case, someone tried to run this thread before it finished blocking
    387308                                // In this case, just run it again.
     
    389310                        default:
    390311                                // This makes no sense, something is wrong abort
    391                                 abort("Finished running a thread that was Inactive/Start/Primed %d\n", old_state);
     312                                abort();
    392313                }
    393314        }
     
    395316        // Just before returning to the processor, set the processor coroutine to active
    396317        proc_cor->state = Active;
     318
     319        /* paranoid */ verify( ! __preemption_enabled() );
    397320}
    398321
    399322// KERNEL_ONLY
    400323void returnToKernel() {
    401         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    402         $coroutine * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
    403         $thread * thrd_src = kernelTLS.this_thread;
     324        /* paranoid */ verify( ! __preemption_enabled() );
     325        $coroutine * proc_cor = get_coroutine(kernelTLS().this_processor->runner);
     326        $thread * thrd_src = kernelTLS().this_thread;
     327
     328        #if !defined(__CFA_NO_STATISTICS__)
     329                struct processor * last_proc = kernelTLS().this_processor;
     330        #endif
    404331
    405332        // Run the thread on this processor
     
    409336                        __x87_store;
    410337                #endif
    411                 verify( proc_cor->context.SP );
     338                /* paranoid */ verify( proc_cor->context.SP );
     339                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_src->canary );
    412340                __cfactx_switch( &thrd_src->context, &proc_cor->context );
     341                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_src->canary );
    413342                #if defined( __i386 ) || defined( __x86_64 )
    414343                        __x87_load;
     
    417346        }
    418347
    419         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    420 }
    421 
    422 // KERNEL_ONLY
    423 // Context invoker for processors
    424 // This is the entry point for processors (kernel threads)
    425 // It effectively constructs a coroutine by stealing the pthread stack
    426 static void * __invoke_processor(void * arg) {
    427         processor * proc = (processor *) arg;
    428         kernelTLS.this_processor = proc;
    429         kernelTLS.this_thread    = 0p;
    430         kernelTLS.preemption_state.[enabled, disable_count] = [false, 1];
    431         // SKULLDUGGERY: We want to create a context for the processor coroutine
    432         // which is needed for the 2-step context switch. However, there is no reason
    433         // to waste the perfectly valid stack create by pthread.
    434         current_stack_info_t info;
    435         __stack_t ctx;
    436         info.storage = &ctx;
    437         (proc->runner){ proc, &info };
    438 
    439         __cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
    440 
    441         //Set global state
    442         kernelTLS.this_thread = 0p;
    443 
    444         //We now have a proper context from which to schedule threads
    445         __cfaabi_dbg_print_safe("Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
    446 
    447         // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
    448         // resume it to start it like it normally would, it will just context switch
    449         // back to here. Instead directly call the main since we already are on the
    450         // appropriate stack.
    451         get_coroutine(proc->runner)->state = Active;
    452         main( proc->runner );
    453         get_coroutine(proc->runner)->state = Halted;
    454 
    455         // Main routine of the core returned, the core is now fully terminated
    456         __cfaabi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner);
    457 
    458         return 0p;
    459 }
    460 
    461 static void Abort( int ret, const char func[] ) {
    462         if ( ret ) {                                                                            // pthread routines return errno values
    463                 abort( "%s : internal error, error(%d) %s.", func, ret, strerror( ret ) );
    464         } // if
    465 } // Abort
    466 
    467 void * __create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
    468         pthread_attr_t attr;
    469 
    470         Abort( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
    471 
    472         size_t stacksize;
    473         // default stack size, normally defined by shell limit
    474         Abort( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
    475         assert( stacksize >= PTHREAD_STACK_MIN );
    476 
    477         void * stack;
    478         __cfaabi_dbg_debug_do(
    479                 stack = memalign( __page_size, stacksize + __page_size );
    480                 // pthread has no mechanism to create the guard page in user supplied stack.
    481                 if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
    482                         abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
    483                 } // if
    484         );
    485         __cfaabi_dbg_no_debug_do(
    486                 stack = malloc( stacksize );
    487         );
    488 
    489         Abort( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
    490 
    491         Abort( pthread_create( pthread, &attr, start, arg ), "pthread_create" );
    492         return stack;
    493 }
    494 
    495 // KERNEL_ONLY
    496 static void __kernel_first_resume( processor * this ) {
    497         $thread * src = mainThread;
    498         $coroutine * dst = get_coroutine(this->runner);
    499 
    500         verify( ! kernelTLS.preemption_state.enabled );
    501 
    502         kernelTLS.this_thread->curr_cor = dst;
    503         __stack_prepare( &dst->stack, 65000 );
    504         __cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine);
    505 
    506         verify( ! kernelTLS.preemption_state.enabled );
    507 
    508         dst->last = &src->self_cor;
    509         dst->starter = dst->starter ? dst->starter : &src->self_cor;
    510 
    511         // set state of current coroutine to inactive
    512         src->state = src->state == Halted ? Halted : Inactive;
    513 
    514         // context switch to specified coroutine
    515         verify( dst->context.SP );
    516         __cfactx_switch( &src->context, &dst->context );
    517         // when __cfactx_switch returns we are back in the src coroutine
    518 
    519         mainThread->curr_cor = &mainThread->self_cor;
    520 
    521         // set state of new coroutine to active
    522         src->state = Active;
    523 
    524         verify( ! kernelTLS.preemption_state.enabled );
    525 }
    526 
    527 // KERNEL_ONLY
    528 static void __kernel_last_resume( processor * this ) {
    529         $coroutine * src = &mainThread->self_cor;
    530         $coroutine * dst = get_coroutine(this->runner);
    531 
    532         verify( ! kernelTLS.preemption_state.enabled );
    533         verify( dst->starter == src );
    534         verify( dst->context.SP );
    535 
    536         // context switch to the processor
    537         __cfactx_switch( &src->context, &dst->context );
     348        #if !defined(__CFA_NO_STATISTICS__)
     349                if(last_proc != kernelTLS().this_processor) {
     350                        __tls_stats()->ready.threads.migration++;
     351                }
     352        #endif
     353
     354        /* paranoid */ verify( ! __preemption_enabled() );
     355        /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
     356        /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
    538357}
    539358
     
    541360// Scheduler routines
    542361// KERNEL ONLY
    543 void __schedule_thread( $thread * thrd ) with( *thrd->curr_cluster ) {
    544         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     362void __schedule_thread( $thread * thrd ) {
     363        /* paranoid */ verify( ! __preemption_enabled() );
     364        /* paranoid */ verify( kernelTLS().this_proc_id );
     365        /* paranoid */ verify( thrd );
     366        /* paranoid */ verify( thrd->state != Halted );
     367        /* paranoid */ verify( thrd->curr_cluster );
    545368        /* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
    546         /* paranoid */ if( thrd->state == Inactive || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
    547                           "Error inactive thread marked as preempted, state %d, preemption %d\n", thrd->state, thrd->preempted );
    548         /* paranoid */ if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active || thrd->state == Rerun,
    549                           "Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted );
     369        /* paranoid */  if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
     370                                        "Error inactive thread marked as preempted, state %d, preemption %d\n", thrd->state, thrd->preempted );
     371        /* paranoid */  if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active,
     372                                        "Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted );
    550373        /* paranoid */ #endif
    551         /* paranoid */ verifyf( thrd->next == 0p, "Expected null got %p", thrd->next );
    552 
    553         lock  ( ready_queue_lock __cfaabi_dbg_ctx2 );
    554         bool was_empty = !(ready_queue != 0);
    555         append( ready_queue, thrd );
    556         unlock( ready_queue_lock );
    557 
    558         if(was_empty) {
    559                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    560                 if(idles) {
    561                         wake_fast(idles.head);
    562                 }
    563                 unlock    (proc_list_lock);
    564         }
    565         else if( struct processor * idle = idles.head ) {
    566                 wake_fast(idle);
    567         }
    568 
    569         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     374        /* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
     375        /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
     376
     377
     378        if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
     379
     380        ready_schedule_lock();
     381                // Dereference the thread now because once we push it, there is not guaranteed it's still valid.
     382                struct cluster * cl = thrd->curr_cluster;
     383
     384                // push the thread to the cluster ready-queue
     385                push( cl, thrd );
     386
     387                // variable thrd is no longer safe to use
     388
     389                // wake the cluster using the save variable.
     390                __wake_one( cl );
     391        ready_schedule_unlock();
     392
     393        /* paranoid */ verify( ! __preemption_enabled() );
    570394}
    571395
    572396// KERNEL ONLY
    573 static $thread * __next_thread(cluster * this) with( *this ) {
    574         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    575 
    576         lock( ready_queue_lock __cfaabi_dbg_ctx2 );
    577         $thread * head = pop_head( ready_queue );
    578         unlock( ready_queue_lock );
    579 
    580         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    581         return head;
     397static inline $thread * __next_thread(cluster * this) with( *this ) {
     398        /* paranoid */ verify( ! __preemption_enabled() );
     399        /* paranoid */ verify( kernelTLS().this_proc_id );
     400
     401        ready_schedule_lock();
     402                $thread * thrd = pop( this );
     403        ready_schedule_unlock();
     404
     405        /* paranoid */ verify( kernelTLS().this_proc_id );
     406        /* paranoid */ verify( ! __preemption_enabled() );
     407        return thrd;
     408}
     409
     410// KERNEL ONLY
     411static inline $thread * __next_thread_slow(cluster * this) with( *this ) {
     412        /* paranoid */ verify( ! __preemption_enabled() );
     413        /* paranoid */ verify( kernelTLS().this_proc_id );
     414
     415        ready_schedule_lock();
     416                $thread * thrd = pop_slow( this );
     417        ready_schedule_unlock();
     418
     419        /* paranoid */ verify( kernelTLS().this_proc_id );
     420        /* paranoid */ verify( ! __preemption_enabled() );
     421        return thrd;
    582422}
    583423
     
    585425        if( !thrd ) return;
    586426
    587         disable_interrupts();
    588         static_assert(sizeof(thrd->state) == sizeof(int));
    589         enum coroutine_state old_state = __atomic_exchange_n(&thrd->state, Rerun, __ATOMIC_SEQ_CST);
    590         switch(old_state) {
    591                 case Active:
     427        int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
     428        switch(old_ticket) {
     429                case TICKET_RUNNING:
    592430                        // Wake won the race, the thread will reschedule/rerun itself
    593431                        break;
    594                 case Inactive:
     432                case TICKET_BLOCKED:
    595433                        /* paranoid */ verify( ! thrd->preempted != __NO_PREEMPTION );
    596 
    597                         // Wake lost the race,
    598                         thrd->state = Inactive;
    599                         __schedule_thread( thrd );
     434                        /* paranoid */ verify( thrd->state == Blocked );
     435
     436                        {
     437                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
     438                                bool full = publicTLS_get(this_proc_id)->full_proc;
     439                                if(full) disable_interrupts();
     440
     441                                /* paranoid */ verify( ! __preemption_enabled() );
     442
     443                                // Wake lost the race,
     444                                __schedule_thread( thrd );
     445
     446                                /* paranoid */ verify( ! __preemption_enabled() );
     447
     448                                if(full) enable_interrupts( __cfaabi_dbg_ctx );
     449                                /* paranoid */ verify( publicTLS_get(this_proc_id) );
     450                        }
     451
    600452                        break;
    601                 case Rerun:
    602                         abort("More than one thread attempted to schedule thread %p\n", thrd);
    603                         break;
    604                 case Halted:
    605                 case Start:
    606                 case Primed:
    607453                default:
    608454                        // This makes no sense, something is wrong abort
    609                         abort();
    610         }
     455                        abort("Thread %p (%s) has mismatch park/unpark\n", thrd, thrd->self_cor.name);
     456        }
     457}
     458
     459void park( void ) {
     460        /* paranoid */ verify( __preemption_enabled() );
     461        disable_interrupts();
     462        /* paranoid */ verify( ! __preemption_enabled() );
     463        /* paranoid */ verify( kernelTLS().this_thread->preempted == __NO_PREEMPTION );
     464
     465        returnToKernel();
     466
     467        /* paranoid */ verify( ! __preemption_enabled() );
    611468        enable_interrupts( __cfaabi_dbg_ctx );
    612 }
    613 
    614 void park( void ) {
    615         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
    616         disable_interrupts();
    617         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    618         /* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
    619 
    620         returnToKernel();
    621 
    622         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    623         enable_interrupts( __cfaabi_dbg_ctx );
    624         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
    625 
    626 }
    627 
    628 // KERNEL ONLY
    629 void __leave_thread() {
    630         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    631         returnToKernel();
    632         abort();
     469        /* paranoid */ verify( __preemption_enabled() );
     470
     471}
     472
     473extern "C" {
     474        // Leave the thread monitor
     475        // last routine called by a thread.
     476        // Should never return
     477        void __cfactx_thrd_leave() {
     478                $thread * thrd = active_thread();
     479                $monitor * this = &thrd->self_mon;
     480
     481                // Lock the monitor now
     482                lock( this->lock __cfaabi_dbg_ctx2 );
     483
     484                disable_interrupts();
     485
     486                /* paranoid */ verify( ! __preemption_enabled() );
     487                /* paranoid */ verify( thrd->state == Active );
     488                /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
     489                /* paranoid */ verify( kernelTLS().this_thread == thrd );
     490                /* paranoid */ verify( thrd->context.SP );
     491                /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) > ((uintptr_t)__get_stack(thrd->curr_cor)->limit), "ERROR : $thread %p has been corrupted.\n StackPointer too large.\n", thrd );
     492                /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) < ((uintptr_t)__get_stack(thrd->curr_cor)->base ), "ERROR : $thread %p has been corrupted.\n StackPointer too small.\n", thrd );
     493
     494                thrd->state = Halting;
     495                if( TICKET_RUNNING != thrd->ticket ) { abort( "Thread terminated with pending unpark" ); }
     496                if( thrd != this->owner ) { abort( "Thread internal monitor has incorrect owner" ); }
     497                if( this->recursion != 1) { abort( "Thread internal monitor has unbalanced recursion" ); }
     498
     499                // Leave the thread
     500                returnToKernel();
     501
     502                // Control flow should never reach here!
     503                abort();
     504        }
    633505}
    634506
    635507// KERNEL ONLY
    636508bool force_yield( __Preemption_Reason reason ) {
    637         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
     509        /* paranoid */ verify( __preemption_enabled() );
    638510        disable_interrupts();
    639         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    640 
    641         $thread * thrd = kernelTLS.this_thread;
    642         /* paranoid */ verify(thrd->state == Active || thrd->state == Rerun);
     511        /* paranoid */ verify( ! __preemption_enabled() );
     512
     513        $thread * thrd = kernelTLS().this_thread;
     514        /* paranoid */ verify(thrd->state == Active);
    643515
    644516        // SKULLDUGGERY: It is possible that we are preempting this thread just before
     
    647519        // If that is the case, abandon the preemption.
    648520        bool preempted = false;
    649         if(thrd->next == 0p) {
     521        if(thrd->link.next == 0p) {
    650522                preempted = true;
    651523                thrd->preempted = reason;
     
    653525        }
    654526
    655         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     527        /* paranoid */ verify( ! __preemption_enabled() );
    656528        enable_interrupts_noPoll();
    657         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
     529        /* paranoid */ verify( __preemption_enabled() );
    658530
    659531        return preempted;
     
    661533
    662534//=============================================================================================
    663 // Kernel Setup logic
     535// Kernel Idle Sleep
    664536//=============================================================================================
    665 //-----------------------------------------------------------------------------
    666 // Kernel boot procedures
    667 static void __kernel_startup(void) {
    668         verify( ! kernelTLS.preemption_state.enabled );
    669         __cfaabi_dbg_print_safe("Kernel : Starting\n");
    670 
    671         __page_size = sysconf( _SC_PAGESIZE );
    672 
    673         __cfa_dbg_global_clusters.list{ __get };
    674         __cfa_dbg_global_clusters.lock{};
    675 
    676         // Initialize the main cluster
    677         mainCluster = (cluster *)&storage_mainCluster;
    678         (*mainCluster){"Main Cluster"};
    679 
    680         __cfaabi_dbg_print_safe("Kernel : Main cluster ready\n");
    681 
    682         // Start by initializing the main thread
    683         // SKULLDUGGERY: the mainThread steals the process main thread
    684         // which will then be scheduled by the mainProcessor normally
    685         mainThread = ($thread *)&storage_mainThread;
    686         current_stack_info_t info;
    687         info.storage = (__stack_t*)&storage_mainThreadCtx;
    688         (*mainThread){ &info };
    689 
    690         __cfaabi_dbg_print_safe("Kernel : Main thread ready\n");
    691 
    692 
    693 
    694         // Construct the processor context of the main processor
    695         void ?{}(processorCtx_t & this, processor * proc) {
    696                 (this.__cor){ "Processor" };
    697                 this.__cor.starter = 0p;
    698                 this.proc = proc;
    699         }
    700 
    701         void ?{}(processor & this) with( this ) {
    702                 name = "Main Processor";
    703                 cltr = mainCluster;
    704                 terminated{ 0 };
    705                 do_terminate = false;
    706                 preemption_alarm = 0p;
    707                 pending_preemption = false;
    708                 kernel_thread = pthread_self();
    709 
    710                 runner{ &this };
    711                 __cfaabi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner);
    712         }
    713 
    714         // Initialize the main processor and the main processor ctx
    715         // (the coroutine that contains the processing control flow)
    716         mainProcessor = (processor *)&storage_mainProcessor;
    717         (*mainProcessor){};
    718 
    719         //initialize the global state variables
    720         kernelTLS.this_processor = mainProcessor;
    721         kernelTLS.this_thread    = mainThread;
    722 
    723         // Enable preemption
    724         kernel_start_preemption();
    725 
    726         // Add the main thread to the ready queue
    727         // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
    728         __schedule_thread(mainThread);
    729 
    730         // SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
    731         // context. Hence, the main thread does not begin through __cfactx_invoke_thread, like all other threads. The trick here is that
    732         // mainThread is on the ready queue when this call is made.
    733         __kernel_first_resume( kernelTLS.this_processor );
    734 
    735 
    736 
    737         // THE SYSTEM IS NOW COMPLETELY RUNNING
    738         __cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n");
    739 
    740         verify( ! kernelTLS.preemption_state.enabled );
     537// Wake a thread from the front if there are any
     538static void __wake_one(cluster * this) {
     539        /* paranoid */ verify( ! __preemption_enabled() );
     540        /* paranoid */ verify( ready_schedule_islocked() );
     541
     542        // Check if there is a sleeping processor
     543        processor * p;
     544        unsigned idle;
     545        unsigned total;
     546        [idle, total, p] = query(this->idles);
     547
     548        // If no one is sleeping, we are done
     549        if( idle == 0 ) return;
     550
     551        // We found a processor, wake it up
     552        post( p->idle );
     553
     554        #if !defined(__CFA_NO_STATISTICS__)
     555                __tls_stats()->ready.sleep.wakes++;
     556        #endif
     557
     558        /* paranoid */ verify( ready_schedule_islocked() );
     559        /* paranoid */ verify( ! __preemption_enabled() );
     560
     561        return;
     562}
     563
     564// Unconditionnaly wake a thread
     565void __wake_proc(processor * this) {
     566        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
     567
     568        disable_interrupts();
     569                /* paranoid */ verify( ! __preemption_enabled() );
     570                post( this->idle );
    741571        enable_interrupts( __cfaabi_dbg_ctx );
    742         verify( TL_GET( preemption_state.enabled ) );
    743 }
    744 
    745 static void __kernel_shutdown(void) {
    746         __cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
    747 
    748         verify( TL_GET( preemption_state.enabled ) );
    749         disable_interrupts();
    750         verify( ! kernelTLS.preemption_state.enabled );
    751 
    752         // SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
    753         // When its coroutine terminates, it return control to the mainThread
    754         // which is currently here
    755         __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
    756         __kernel_last_resume( kernelTLS.this_processor );
    757         mainThread->self_cor.state = Halted;
    758 
    759         // THE SYSTEM IS NOW COMPLETELY STOPPED
    760 
    761         // Disable preemption
    762         kernel_stop_preemption();
    763 
    764         // Destroy the main processor and its context in reverse order of construction
    765         // These were manually constructed so we need manually destroy them
    766         ^(mainProcessor->runner){};
    767         ^(mainProcessor){};
    768 
    769         // Final step, destroy the main thread since it is no longer needed
    770         // Since we provided a stack to this taxk it will not destroy anything
    771         ^(mainThread){};
    772 
    773         ^(__cfa_dbg_global_clusters.list){};
    774         ^(__cfa_dbg_global_clusters.lock){};
    775 
    776         __cfaabi_dbg_print_safe("Kernel : Shutdown complete\n");
    777 }
    778 
    779 //=============================================================================================
    780 // Kernel Quiescing
    781 //=============================================================================================
    782 static void __halt(processor * this) with( *this ) {
    783         // verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) );
    784 
    785         with( *cltr ) {
    786                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    787                 remove    (procs, *this);
    788                 push_front(idles, *this);
    789                 unlock    (proc_list_lock);
    790         }
    791 
    792         __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
    793 
    794         wait( idleLock );
    795 
    796         __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
    797 
    798         with( *cltr ) {
    799                 lock      (proc_list_lock __cfaabi_dbg_ctx2);
    800                 remove    (idles, *this);
    801                 push_front(procs, *this);
    802                 unlock    (proc_list_lock);
     572}
     573
     574static void push  (__cluster_idles & this, processor & proc) {
     575        /* paranoid */ verify( ! __preemption_enabled() );
     576        lock( this );
     577                this.idle++;
     578                /* paranoid */ verify( this.idle <= this.total );
     579
     580                insert_first(this.list, proc);
     581        unlock( this );
     582        /* paranoid */ verify( ! __preemption_enabled() );
     583}
     584
     585static void remove(__cluster_idles & this, processor & proc) {
     586        /* paranoid */ verify( ! __preemption_enabled() );
     587        lock( this );
     588                this.idle--;
     589                /* paranoid */ verify( this.idle >= 0 );
     590
     591                remove(proc);
     592        unlock( this );
     593        /* paranoid */ verify( ! __preemption_enabled() );
     594}
     595
     596static [unsigned idle, unsigned total, * processor] query( & __cluster_idles this ) {
     597        for() {
     598                uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
     599                if( 1 == (l % 2) ) { Pause(); continue; }
     600                unsigned idle    = this.idle;
     601                unsigned total   = this.total;
     602                processor * proc = &this.list`first;
     603                // Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it
     604                asm volatile("": : :"memory");
     605                if(l != __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST)) { Pause(); continue; }
     606                return [idle, total, proc];
    803607        }
    804608}
     
    814618        // the globalAbort flag is true.
    815619        lock( kernel_abort_lock __cfaabi_dbg_ctx2 );
     620
     621        // disable interrupts, it no longer makes sense to try to interrupt this processor
     622        disable_interrupts();
    816623
    817624        // first task to abort ?
     
    831638        }
    832639
    833         return kernelTLS.this_thread;
     640        return __cfaabi_tls.this_thread;
    834641}
    835642
    836643void kernel_abort_msg( void * kernel_data, char * abort_text, int abort_text_size ) {
    837         $thread * thrd = kernel_data;
     644        $thread * thrd = ( $thread * ) kernel_data;
    838645
    839646        if(thrd) {
     
    856663
    857664int kernel_abort_lastframe( void ) __attribute__ ((__nothrow__)) {
    858         return get_coroutine(kernelTLS.this_thread) == get_coroutine(mainThread) ? 4 : 2;
     665        return get_coroutine(kernelTLS().this_thread) == get_coroutine(mainThread) ? 4 : 2;
    859666}
    860667
     
    883690void ^?{}(semaphore & this) {}
    884691
    885 void P(semaphore & this) with( this ){
     692bool P(semaphore & this) with( this ){
    886693        lock( lock __cfaabi_dbg_ctx2 );
    887694        count -= 1;
    888695        if ( count < 0 ) {
    889696                // queue current task
    890                 append( waiting, kernelTLS.this_thread );
     697                append( waiting, active_thread() );
    891698
    892699                // atomically release spin lock and block
    893700                unlock( lock );
    894701                park();
     702                return true;
    895703        }
    896704        else {
    897705            unlock( lock );
    898         }
    899 }
    900 
    901 void V(semaphore & this) with( this ) {
     706            return false;
     707        }
     708}
     709
     710bool V(semaphore & this) with( this ) {
    902711        $thread * thrd = 0p;
    903712        lock( lock __cfaabi_dbg_ctx2 );
     
    912721        // make new owner
    913722        unpark( thrd );
    914 }
    915 
    916 //-----------------------------------------------------------------------------
    917 // Global Queues
    918 void doregister( cluster     & cltr ) {
    919         lock      ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2);
    920         push_front( __cfa_dbg_global_clusters.list, cltr );
    921         unlock    ( __cfa_dbg_global_clusters.lock );
    922 }
    923 
    924 void unregister( cluster     & cltr ) {
    925         lock  ( __cfa_dbg_global_clusters.lock __cfaabi_dbg_ctx2);
    926         remove( __cfa_dbg_global_clusters.list, cltr );
    927         unlock( __cfa_dbg_global_clusters.lock );
    928 }
    929 
    930 void doregister( cluster * cltr, $thread & thrd ) {
    931         lock      (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    932         cltr->nthreads += 1;
    933         push_front(cltr->threads, thrd);
    934         unlock    (cltr->thread_list_lock);
    935 }
    936 
    937 void unregister( cluster * cltr, $thread & thrd ) {
    938         lock  (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    939         remove(cltr->threads, thrd );
    940         cltr->nthreads -= 1;
    941         unlock(cltr->thread_list_lock);
    942 }
    943 
    944 void doregister( cluster * cltr, processor * proc ) {
    945         lock      (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    946         cltr->nprocessors += 1;
    947         push_front(cltr->procs, *proc);
    948         unlock    (cltr->proc_list_lock);
    949 }
    950 
    951 void unregister( cluster * cltr, processor * proc ) {
    952         lock  (cltr->proc_list_lock __cfaabi_dbg_ctx2);
    953         remove(cltr->procs, *proc );
    954         cltr->nprocessors -= 1;
    955         unlock(cltr->proc_list_lock);
     723
     724        return thrd != 0p;
     725}
     726
     727bool V(semaphore & this, unsigned diff) with( this ) {
     728        $thread * thrd = 0p;
     729        lock( lock __cfaabi_dbg_ctx2 );
     730        int release = max(-count, (int)diff);
     731        count += diff;
     732        for(release) {
     733                unpark( pop_head( waiting ) );
     734        }
     735
     736        unlock( lock );
     737
     738        return thrd != 0p;
    956739}
    957740
     
    960743__cfaabi_dbg_debug_do(
    961744        extern "C" {
    962                 void __cfaabi_dbg_record(__spinlock_t & this, const char prev_name[]) {
     745                void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]) {
    963746                        this.prev_name = prev_name;
    964                         this.prev_thrd = kernelTLS.this_thread;
     747                        this.prev_thrd = kernelTLS().this_thread;
    965748                }
    966749        }
     
    972755        return true;
    973756}
     757
     758//-----------------------------------------------------------------------------
     759// Statistics
     760#if !defined(__CFA_NO_STATISTICS__)
     761        void print_halts( processor & this ) {
     762                this.print_halts = true;
     763        }
     764
     765        void print_stats_now( cluster & this, int flags ) {
     766                __print_stats( this.stats, this.print_stats, "Cluster", this.name, (void*)&this );
     767        }
     768
     769        extern int __print_alarm_stats;
     770        void print_alarm_stats() {
     771                __print_alarm_stats = -1;
     772        }
     773#endif
    974774// Local Variables: //
    975775// mode: c //
  • libcfa/src/concurrency/kernel.hfa

    r3c64c668 r58fe85a  
    1616#pragma once
    1717
    18 #include <stdbool.h>
    19 
    2018#include "invoke.h"
    2119#include "time_t.hfa"
    2220#include "coroutine.hfa"
    2321
     22#include "containers/list.hfa"
     23
    2424extern "C" {
    25 #include <pthread.h>
    26 #include <semaphore.h>
     25        #include <bits/pthreadtypes.h>
     26        #include <linux/types.h>
    2727}
    2828
     
    3737void  ?{}(semaphore & this, int count = 1);
    3838void ^?{}(semaphore & this);
    39 void   P (semaphore & this);
    40 void   V (semaphore & this);
     39bool   P (semaphore & this);
     40bool   V (semaphore & this);
     41bool   V (semaphore & this, unsigned count);
    4142
    4243
     
    4546extern struct cluster * mainCluster;
    4647
    47 // Processor
     48// Processor id, required for scheduling threads
     49struct __processor_id_t {
     50        unsigned id:24;
     51        bool full_proc:1;
     52
     53        #if !defined(__CFA_NO_STATISTICS__)
     54                struct __stats_t * stats;
     55        #endif
     56};
     57
    4858coroutine processorCtx_t {
    4959        struct processor * proc;
     
    5161
    5262// Wrapper around kernel threads
    53 struct processor {
     63struct __attribute__((aligned(128))) processor {
    5464        // Main state
     65        inline __processor_id_t;
     66
     67        // Cluster from which to get threads
     68        struct cluster * cltr;
     69
     70        // Set to true to notify the processor should terminate
     71        volatile bool do_terminate;
     72
    5573        // Coroutine ctx who does keeps the state of the processor
    5674        struct processorCtx_t runner;
    5775
    58         // Cluster from which to get threads
    59         struct cluster * cltr;
    60 
    6176        // Name of the processor
    6277        const char * name;
     
    6479        // Handle to pthreads
    6580        pthread_t kernel_thread;
    66 
    67         // RunThread data
    68         // Action to do after a thread is ran
    69         $thread * destroyer;
    7081
    7182        // Preemption data
     
    7687        bool pending_preemption;
    7788
    78         // Idle lock
    79         __bin_sem_t idleLock;
    80 
    81         // Termination
    82         // Set to true to notify the processor should terminate
    83         volatile bool do_terminate;
    84 
    85         // Termination synchronisation
     89        // Idle lock (kernel semaphore)
     90        __bin_sem_t idle;
     91
     92        // Termination synchronisation (user semaphore)
    8693        semaphore terminated;
    8794
     
    9097
    9198        // Link lists fields
    92         struct __dbg_node_proc {
    93                 struct processor * next;
    94                 struct processor * prev;
    95         } node;
     99        DLISTED_MGD_IMPL_IN(processor)
     100
     101        #if !defined(__CFA_NO_STATISTICS__)
     102                int print_stats;
     103                bool print_halts;
     104        #endif
    96105
    97106#ifdef __CFA_DEBUG__
     
    108117static inline void  ?{}(processor & this, const char name[]) { this{name, *mainCluster }; }
    109118
    110 static inline [processor *&, processor *& ] __get( processor & this ) __attribute__((const)) { return this.node.[next, prev]; }
     119DLISTED_MGD_IMPL_OUT(processor)
     120
     121//-----------------------------------------------------------------------------
     122// I/O
     123struct __io_data;
     124
     125// IO poller user-thread
     126// Not using the "thread" keyword because we want to control
     127// more carefully when to start/stop it
     128struct $io_ctx_thread {
     129        struct __io_data * ring;
     130        single_sem sem;
     131        volatile bool done;
     132        $thread self;
     133};
     134
     135
     136struct io_context {
     137        $io_ctx_thread thrd;
     138};
     139
     140struct io_context_params {
     141        int num_entries;
     142        int num_ready;
     143        int submit_aff;
     144        bool eager_submits:1;
     145        bool poller_submits:1;
     146        bool poll_submit:1;
     147        bool poll_complete:1;
     148};
     149
     150void  ?{}(io_context_params & this);
     151
     152void  ?{}(io_context & this, struct cluster & cl);
     153void  ?{}(io_context & this, struct cluster & cl, const io_context_params & params);
     154void ^?{}(io_context & this);
     155
     156struct io_cancellation {
     157        __u64 target;
     158};
     159
     160static inline void  ?{}(io_cancellation & this) { this.target = -1u; }
     161static inline void ^?{}(io_cancellation &) {}
     162bool cancel(io_cancellation & this);
     163
     164//-----------------------------------------------------------------------------
     165// Cluster Tools
     166
     167// Intrusives lanes which are used by the relaxed ready queue
     168struct __attribute__((aligned(128))) __intrusive_lane_t;
     169void  ?{}(__intrusive_lane_t & this);
     170void ^?{}(__intrusive_lane_t & this);
     171
     172// Counter used for wether or not the lanes are all empty
     173struct __attribute__((aligned(128))) __snzi_node_t;
     174struct __snzi_t {
     175        unsigned mask;
     176        int root;
     177        __snzi_node_t * nodes;
     178};
     179
     180void  ?{}( __snzi_t & this, unsigned depth );
     181void ^?{}( __snzi_t & this );
     182
     183//TODO adjust cache size to ARCHITECTURE
     184// Structure holding the relaxed ready queue
     185struct __ready_queue_t {
     186        // Data tracking how many/which lanes are used
     187        // Aligned to 128 for cache locality
     188        __snzi_t snzi;
     189
     190        // Data tracking the actual lanes
     191        // On a seperate cacheline from the used struct since
     192        // used can change on each push/pop but this data
     193        // only changes on shrink/grow
     194        struct {
     195                // Arary of lanes
     196                __intrusive_lane_t * volatile data;
     197
     198                // Number of lanes (empty or not)
     199                volatile size_t count;
     200        } lanes;
     201};
     202
     203void  ?{}(__ready_queue_t & this);
     204void ^?{}(__ready_queue_t & this);
     205
     206// Idle Sleep
     207struct __cluster_idles {
     208        // Spin lock protecting the queue
     209        volatile uint64_t lock;
     210
     211        // Total number of processors
     212        unsigned total;
     213
     214        // Total number of idle processors
     215        unsigned idle;
     216
     217        // List of idle processors
     218        dlist(processor, processor) list;
     219};
    111220
    112221//-----------------------------------------------------------------------------
    113222// Cluster
    114 struct cluster {
    115         // Ready queue locks
    116         __spinlock_t ready_queue_lock;
    117 
     223struct __attribute__((aligned(128))) cluster {
    118224        // Ready queue for threads
    119         __queue_t($thread) ready_queue;
     225        __ready_queue_t ready_queue;
    120226
    121227        // Name of the cluster
     
    125231        Duration preemption_rate;
    126232
    127         // List of processors
    128         __spinlock_t proc_list_lock;
    129         __dllist_t(struct processor) procs;
    130         __dllist_t(struct processor) idles;
    131         unsigned int nprocessors;
     233        // List of idle processors
     234        __cluster_idles idles;
    132235
    133236        // List of threads
     
    141244                cluster * prev;
    142245        } node;
     246
     247        struct {
     248                io_context * ctxs;
     249                unsigned cnt;
     250        } io;
     251
     252        #if !defined(__CFA_NO_STATISTICS__)
     253                struct __stats_t * stats;
     254                int print_stats;
     255        #endif
    143256};
    144257extern Duration default_preemption();
    145258
    146 void ?{} (cluster & this, const char name[], Duration preemption_rate);
     259void ?{} (cluster & this, const char name[], Duration preemption_rate, unsigned num_io, const io_context_params & io_params);
    147260void ^?{}(cluster & this);
    148261
    149 static inline void ?{} (cluster & this)                           { this{"Anonymous Cluster", default_preemption()}; }
    150 static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; }
    151 static inline void ?{} (cluster & this, const char name[])        { this{name, default_preemption()}; }
     262static inline void ?{} (cluster & this)                                            { io_context_params default_params;    this{"Anonymous Cluster", default_preemption(), 1, default_params}; }
     263static inline void ?{} (cluster & this, Duration preemption_rate)                  { io_context_params default_params;    this{"Anonymous Cluster", preemption_rate, 1, default_params}; }
     264static inline void ?{} (cluster & this, const char name[])                         { io_context_params default_params;    this{name, default_preemption(), 1, default_params}; }
     265static inline void ?{} (cluster & this, unsigned num_io)                           { io_context_params default_params;    this{"Anonymous Cluster", default_preemption(), num_io, default_params}; }
     266static inline void ?{} (cluster & this, Duration preemption_rate, unsigned num_io) { io_context_params default_params;    this{"Anonymous Cluster", preemption_rate, num_io, default_params}; }
     267static inline void ?{} (cluster & this, const char name[], unsigned num_io)        { io_context_params default_params;    this{name, default_preemption(), num_io, default_params}; }
     268static inline void ?{} (cluster & this, const io_context_params & io_params)                                            { this{"Anonymous Cluster", default_preemption(), 1, io_params}; }
     269static inline void ?{} (cluster & this, Duration preemption_rate, const io_context_params & io_params)                  { this{"Anonymous Cluster", preemption_rate, 1, io_params}; }
     270static inline void ?{} (cluster & this, const char name[], const io_context_params & io_params)                         { this{name, default_preemption(), 1, io_params}; }
     271static inline void ?{} (cluster & this, unsigned num_io, const io_context_params & io_params)                           { this{"Anonymous Cluster", default_preemption(), num_io, io_params}; }
     272static inline void ?{} (cluster & this, Duration preemption_rate, unsigned num_io, const io_context_params & io_params) { this{"Anonymous Cluster", preemption_rate, num_io, io_params}; }
     273static inline void ?{} (cluster & this, const char name[], unsigned num_io, const io_context_params & io_params)        { this{name, default_preemption(), num_io, io_params}; }
    152274
    153275static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
    154276
    155 static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
    156 static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
     277static inline struct processor * active_processor() { return publicTLS_get( this_processor ); } // UNSAFE
     278static inline struct cluster   * active_cluster  () { return publicTLS_get( this_processor )->cltr; }
     279
     280#if !defined(__CFA_NO_STATISTICS__)
     281        void print_stats_now( cluster & this, int flags );
     282
     283        static inline void print_stats_at_exit( cluster & this, int flags ) {
     284                this.print_stats |= flags;
     285        }
     286
     287        static inline void print_stats_at_exit( processor & this, int flags ) {
     288                this.print_stats |= flags;
     289        }
     290
     291        void print_halts( processor & this );
     292#endif
    157293
    158294// Local Variables: //
  • libcfa/src/concurrency/kernel_private.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Mon Feb 13 12:27:26 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Nov 30 19:25:02 2019
    13 // Update Count     : 8
     12// Last Modified On : Wed Aug 12 08:21:33 2020
     13// Update Count     : 9
    1414//
    1515
     
    2020
    2121#include "alarm.hfa"
    22 
     22#include "stats.hfa"
    2323
    2424//-----------------------------------------------------------------------------
    2525// Scheduler
     26
     27struct __attribute__((aligned(128))) __scheduler_lock_id_t;
    2628
    2729extern "C" {
     
    3133}
    3234
    33 void __schedule_thread( $thread * ) __attribute__((nonnull (1)));
    34 
    35 //Block current thread and release/wake-up the following resources
    36 void __leave_thread() __attribute__((noreturn));
     35void __schedule_thread( $thread * )
     36#if defined(NDEBUG) || (!defined(__CFA_DEBUG__) && !defined(__CFA_VERIFY__))
     37        __attribute__((nonnull (1)))
     38#endif
     39;
     40
     41extern bool __preemption_enabled();
     42
     43//release/wake-up the following resources
     44void __thread_finish( $thread * thrd );
    3745
    3846//-----------------------------------------------------------------------------
     
    4149
    4250void * __create_pthread( pthread_t *, void * (*)(void *), void * );
    43 
    44 static inline void wake_fast(processor * this) {
    45         __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
    46         post( this->idleLock );
    47 }
    48 
    49 static inline void wake(processor * this) {
    50         disable_interrupts();
    51         wake_fast(this);
    52         enable_interrupts( __cfaabi_dbg_ctx );
    53 }
    54 
    55 struct event_kernel_t {
    56         alarm_list_t alarms;
    57         __spinlock_t lock;
    58 };
    59 
    60 extern event_kernel_t * event_kernel;
    61 
    62 struct __cfa_kernel_preemption_state_t {
    63         bool enabled;
    64         bool in_progress;
    65         unsigned short disable_count;
    66 };
    67 
    68 extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
     51void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
     52
     53
     54
     55extern cluster * mainCluster;
    6956
    7057//-----------------------------------------------------------------------------
     
    7966)
    8067
     68#define TICKET_BLOCKED (-1) // thread is blocked
     69#define TICKET_RUNNING ( 0) // thread is running
     70#define TICKET_UNBLOCK ( 1) // thread should ignore next block
     71
    8172//-----------------------------------------------------------------------------
    8273// Utils
    83 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
    84 
    85 static inline uint32_t __tls_rand() {
    86         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
    87         kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
    88         kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
    89         return kernelTLS.rand_seed;
    90 }
    91 
    92 
    93 void doregister( struct cluster & cltr );
    94 void unregister( struct cluster & cltr );
    95 
    9674void doregister( struct cluster * cltr, struct $thread & thrd );
    9775void unregister( struct cluster * cltr, struct $thread & thrd );
    9876
    99 void doregister( struct cluster * cltr, struct processor * proc );
    100 void unregister( struct cluster * cltr, struct processor * proc );
     77//-----------------------------------------------------------------------------
     78// I/O
     79void ^?{}(io_context & this, bool );
     80
     81//=======================================================================
     82// Cluster lock API
     83//=======================================================================
     84// Cells use by the reader writer lock
     85// while not generic it only relies on a opaque pointer
     86struct __attribute__((aligned(128))) __scheduler_lock_id_t {
     87        // Spin lock used as the underlying lock
     88        volatile bool lock;
     89
     90        // Handle pointing to the proc owning this cell
     91        // Used for allocating cells and debugging
     92        __processor_id_t * volatile handle;
     93
     94        #ifdef __CFA_WITH_VERIFY__
     95                // Debug, check if this is owned for reading
     96                bool owned;
     97        #endif
     98};
     99
     100static_assert( sizeof(struct __scheduler_lock_id_t) <= __alignof(struct __scheduler_lock_id_t));
     101
     102// Lock-Free registering/unregistering of threads
     103// Register a processor to a given cluster and get its unique id in return
     104unsigned doregister( struct __processor_id_t * proc );
     105
     106// Unregister a processor from a given cluster using its id, getting back the original pointer
     107void     unregister( struct __processor_id_t * proc );
     108
     109//-----------------------------------------------------------------------
     110// Cluster idle lock/unlock
     111static inline void lock(__cluster_idles & this) {
     112        for() {
     113                uint64_t l = this.lock;
     114                if(
     115                        (0 == (l % 2))
     116                        && __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
     117                ) return;
     118                Pause();
     119        }
     120}
     121
     122static inline void unlock(__cluster_idles & this) {
     123        /* paranoid */ verify( 1 == (this.lock % 2) );
     124        __atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
     125}
     126
     127//=======================================================================
     128// Reader-writer lock implementation
     129// Concurrent with doregister/unregister,
     130//    i.e., threads can be added at any point during or between the entry/exit
     131
     132//-----------------------------------------------------------------------
     133// simple spinlock underlying the RWLock
     134// Blocking acquire
     135static inline void __atomic_acquire(volatile bool * ll) {
     136        while( __builtin_expect(__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST), false) ) {
     137                while(__atomic_load_n(ll, (int)__ATOMIC_RELAXED))
     138                        Pause();
     139        }
     140        /* paranoid */ verify(*ll);
     141}
     142
     143// Non-Blocking acquire
     144static inline bool __atomic_try_acquire(volatile bool * ll) {
     145        return !__atomic_exchange_n(ll, (bool)true, __ATOMIC_SEQ_CST);
     146}
     147
     148// Release
     149static inline void __atomic_unlock(volatile bool * ll) {
     150        /* paranoid */ verify(*ll);
     151        __atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
     152}
     153
     154//-----------------------------------------------------------------------
     155// Reader-Writer lock protecting the ready-queues
     156// while this lock is mostly generic some aspects
     157// have been hard-coded to for the ready-queue for
     158// simplicity and performance
     159struct __scheduler_RWLock_t {
     160        // total cachelines allocated
     161        unsigned int max;
     162
     163        // cachelines currently in use
     164        volatile unsigned int alloc;
     165
     166        // cachelines ready to itereate over
     167        // (!= to alloc when thread is in second half of doregister)
     168        volatile unsigned int ready;
     169
     170        // writer lock
     171        volatile bool lock;
     172
     173        // data pointer
     174        __scheduler_lock_id_t * data;
     175};
     176
     177void  ?{}(__scheduler_RWLock_t & this);
     178void ^?{}(__scheduler_RWLock_t & this);
     179
     180extern __scheduler_RWLock_t * __scheduler_lock;
     181
     182//-----------------------------------------------------------------------
     183// Reader side : acquire when using the ready queue to schedule but not
     184//  creating/destroying queues
     185static inline void ready_schedule_lock(void) with(*__scheduler_lock) {
     186        /* paranoid */ verify( ! __preemption_enabled() );
     187        /* paranoid */ verify( kernelTLS().this_proc_id );
     188
     189        unsigned iproc = kernelTLS().this_proc_id->id;
     190        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     191        /*paranoid*/ verify(iproc < ready);
     192
     193        // Step 1 : make sure no writer are in the middle of the critical section
     194        while(__atomic_load_n(&lock, (int)__ATOMIC_RELAXED))
     195                Pause();
     196
     197        // Fence needed because we don't want to start trying to acquire the lock
     198        // before we read a false.
     199        // Not needed on x86
     200        // std::atomic_thread_fence(std::memory_order_seq_cst);
     201
     202        // Step 2 : acquire our local lock
     203        __atomic_acquire( &data[iproc].lock );
     204        /*paranoid*/ verify(data[iproc].lock);
     205
     206        #ifdef __CFA_WITH_VERIFY__
     207                // Debug, check if this is owned for reading
     208                data[iproc].owned = true;
     209        #endif
     210}
     211
     212static inline void ready_schedule_unlock(void) with(*__scheduler_lock) {
     213        /* paranoid */ verify( ! __preemption_enabled() );
     214        /* paranoid */ verify( kernelTLS().this_proc_id );
     215
     216        unsigned iproc = kernelTLS().this_proc_id->id;
     217        /*paranoid*/ verify(data[iproc].handle == kernelTLS().this_proc_id);
     218        /*paranoid*/ verify(iproc < ready);
     219        /*paranoid*/ verify(data[iproc].lock);
     220        /*paranoid*/ verify(data[iproc].owned);
     221        #ifdef __CFA_WITH_VERIFY__
     222                // Debug, check if this is owned for reading
     223                data[iproc].owned = false;
     224        #endif
     225        __atomic_unlock(&data[iproc].lock);
     226}
     227
     228#ifdef __CFA_WITH_VERIFY__
     229        static inline bool ready_schedule_islocked(void) {
     230                /* paranoid */ verify( ! __preemption_enabled() );
     231                /*paranoid*/ verify( kernelTLS().this_proc_id );
     232                __processor_id_t * proc = kernelTLS().this_proc_id;
     233                return __scheduler_lock->data[proc->id].owned;
     234        }
     235
     236        static inline bool ready_mutate_islocked() {
     237                return __scheduler_lock->lock;
     238        }
     239#endif
     240
     241//-----------------------------------------------------------------------
     242// Writer side : acquire when changing the ready queue, e.g. adding more
     243//  queues or removing them.
     244uint_fast32_t ready_mutate_lock( void );
     245
     246void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
     247
     248//=======================================================================
     249// Ready-Queue API
     250//-----------------------------------------------------------------------
     251// pop thread from the ready queue of a cluster
     252// returns 0p if empty
     253__attribute__((hot)) bool query(struct cluster * cltr);
     254
     255//-----------------------------------------------------------------------
     256// push thread onto a ready queue for a cluster
     257// returns true if the list was previously empty, false otherwise
     258__attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
     259
     260//-----------------------------------------------------------------------
     261// pop thread from the ready queue of a cluster
     262// returns 0p if empty
     263// May return 0p spuriously
     264__attribute__((hot)) struct $thread * pop(struct cluster * cltr);
     265
     266//-----------------------------------------------------------------------
     267// pop thread from the ready queue of a cluster
     268// returns 0p if empty
     269// guaranteed to find any threads added before this call
     270__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr);
     271
     272//-----------------------------------------------------------------------
     273// remove thread from the ready queue of a cluster
     274// returns bool if it wasn't found
     275bool remove_head(struct cluster * cltr, struct $thread * thrd);
     276
     277//-----------------------------------------------------------------------
     278// Increase the width of the ready queue (number of lanes) by 4
     279void ready_queue_grow  (struct cluster * cltr, int target);
     280
     281//-----------------------------------------------------------------------
     282// Decrease the width of the ready queue (number of lanes) by 4
     283void ready_queue_shrink(struct cluster * cltr, int target);
     284
    101285
    102286// Local Variables: //
  • libcfa/src/concurrency/monitor.cfa

    r3c64c668 r58fe85a  
    8282// Enter single monitor
    8383static void __enter( $monitor * this, const __monitor_group_t & group ) {
     84        $thread * thrd = active_thread();
     85
    8486        // Lock the monitor spinlock
    8587        lock( this->lock __cfaabi_dbg_ctx2 );
    86         // Interrupts disable inside critical section
    87         $thread * thrd = kernelTLS.this_thread;
    8888
    8989        __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
    9090
    91         if( !this->owner ) {
     91        if( unlikely(0 != (0x1 & (uintptr_t)this->owner)) ) {
     92                abort( "Attempt by thread \"%.256s\" (%p) to access joined monitor %p.", thrd->self_cor.name, thrd, this );
     93        }
     94        else if( !this->owner ) {
    9295                // No one has the monitor, just take it
    9396                __set_owner( this, thrd );
     
    114117
    115118                // Some one else has the monitor, wait in line for it
    116                 /* paranoid */ verify( thrd->next == 0p );
     119                /* paranoid */ verify( thrd->link.next == 0p );
    117120                append( this->entry_queue, thrd );
    118                 /* paranoid */ verify( thrd->next == 1p );
     121                /* paranoid */ verify( thrd->link.next == 1p );
    119122
    120123                unlock( this->lock );
     
    123126                __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    124127
    125                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     128                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    126129                return;
    127130        }
     
    129132        __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    130133
    131         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     134        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    132135        /* paranoid */ verify( this->lock.lock );
    133136
     
    137140}
    138141
    139 static void __dtor_enter( $monitor * this, fptr_t func ) {
     142static void __dtor_enter( $monitor * this, fptr_t func, bool join ) {
     143        $thread * thrd = active_thread();
     144        #if defined( __CFA_WITH_VERIFY__ )
     145                bool is_thrd = this == &thrd->self_mon;
     146        #endif
     147
    140148        // Lock the monitor spinlock
    141149        lock( this->lock __cfaabi_dbg_ctx2 );
    142         // Interrupts disable inside critical section
    143         $thread * thrd = kernelTLS.this_thread;
    144150
    145151        __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
     
    152158                __set_owner( this, thrd );
    153159
    154                 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     160                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     161                /* paranoid */ verify( !is_thrd || thrd->state == Halted || thrd->state == Cancelled );
    155162
    156163                unlock( this->lock );
    157164                return;
    158165        }
    159         else if( this->owner == thrd) {
     166        else if( this->owner == thrd && !join) {
    160167                // We already have the monitor... but where about to destroy it so the nesting will fail
    161168                // Abort!
    162169                abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
    163170        }
     171        // SKULLDUGGERY: join will act as a dtor so it would normally trigger to above check
     172        // because join will not release the monitor after it executed.
     173        // to avoid that it sets the owner to the special value thrd | 1p before exiting
     174        else if( this->owner == ($thread*)(1 | (uintptr_t)thrd) ) {
     175                // restore the owner and just return
     176                __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
     177
     178                // No one has the monitor, just take it
     179                __set_owner( this, thrd );
     180
     181                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     182                /* paranoid */ verify( !is_thrd || thrd->state == Halted || thrd->state == Cancelled );
     183
     184                unlock( this->lock );
     185                return;
     186        }
     187
     188        // The monitor is busy, if this is a thread and the thread owns itself, it better be active
     189        /* paranoid */ verify( !is_thrd || this->owner != thrd || (thrd->state != Halted && thrd->state != Cancelled) );
    164190
    165191        __lock_size_t count = 1;
     
    183209
    184210                // Release the next thread
    185                 /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     211                /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    186212                unpark( urgent->owner->waiting_thread );
    187213
     
    190216
    191217                // Some one was waiting for us, enter
    192                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     218                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     219
     220                __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
     221                return;
    193222        }
    194223        else {
     
    199228
    200229                // Some one else has the monitor, wait in line for it
    201                 /* paranoid */ verify( thrd->next == 0p );
     230                /* paranoid */ verify( thrd->link.next == 0p );
    202231                append( this->entry_queue, thrd );
    203                 /* paranoid */ verify( thrd->next == 1p );
     232                /* paranoid */ verify( thrd->link.next == 1p );
    204233                unlock( this->lock );
    205234
     
    207236                park();
    208237
    209                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     238                /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    210239                return;
    211240        }
    212 
    213         __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
    214 
    215241}
    216242
     
    220246        lock( this->lock __cfaabi_dbg_ctx2 );
    221247
    222         __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
    223 
    224         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     248        __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", active_thread(), this, this->owner);
     249
     250        /* paranoid */ verifyf( active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    225251
    226252        // Leaving a recursion level, decrement the counter
     
    251277
    252278// Leave single monitor for the last time
    253 void __dtor_leave( $monitor * this ) {
     279void __dtor_leave( $monitor * this, bool join ) {
    254280        __cfaabi_dbg_debug_do(
    255                 if( TL_GET( this_thread ) != this->owner ) {
    256                         abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
     281                if( active_thread() != this->owner ) {
     282                        abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, active_thread(), this->owner);
    257283                }
    258                 if( this->recursion != 1 ) {
     284                if( this->recursion != 1  && !join ) {
    259285                        abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
    260286                }
    261287        )
    262 }
    263 
    264 extern "C" {
    265         // Leave the thread monitor
    266         // last routine called by a thread.
    267         // Should never return
    268         void __cfactx_thrd_leave() {
    269                 $thread * thrd = TL_GET( this_thread );
    270                 $monitor * this = &thrd->self_mon;
    271 
    272                 // Lock the monitor now
    273                 lock( this->lock __cfaabi_dbg_ctx2 );
    274 
    275                 disable_interrupts();
    276 
    277                 thrd->state = Halted;
    278 
    279                 /* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
    280 
    281                 // Leaving a recursion level, decrement the counter
    282                 this->recursion -= 1;
    283 
    284                 // If we haven't left the last level of recursion
    285                 // it must mean there is an error
    286                 if( this->recursion != 0) { abort( "Thread internal monitor has unbalanced recursion" ); }
    287 
    288                 // Fetch the next thread, can be null
    289                 $thread * new_owner = next_thread( this );
    290 
    291                 // Release the monitor lock
    292                 unlock( this->lock );
    293 
    294                 // Unpark the next owner if needed
    295                 /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
    296                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    297                 /* paranoid */ verify( ! kernelTLS.this_processor->destroyer );
    298                 /* paranoid */ verify( thrd->state == Halted );
    299 
    300                 kernelTLS.this_processor->destroyer = new_owner;
    301 
    302                 // Leave the thread
    303                 __leave_thread();
    304 
    305                 // Control flow should never reach here!
    306         }
     288
     289        this->owner = ($thread*)(1 | (uintptr_t)this->owner);
     290}
     291
     292void __thread_finish( $thread * thrd ) {
     293        $monitor * this = &thrd->self_mon;
     294
     295        // Lock the monitor now
     296        /* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
     297        /* paranoid */ verify( this->lock.lock );
     298        /* paranoid */ verify( thrd->context.SP );
     299        /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) > ((uintptr_t)__get_stack(thrd->curr_cor)->limit), "ERROR : $thread %p has been corrupted.\n StackPointer too large.\n", thrd );
     300        /* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) < ((uintptr_t)__get_stack(thrd->curr_cor)->base ), "ERROR : $thread %p has been corrupted.\n StackPointer too small.\n", thrd );
     301        /* paranoid */ verify( ! __preemption_enabled() );
     302
     303        /* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
     304        /* paranoid */ verify( thrd->state == Halting );
     305        /* paranoid */ verify( this->recursion == 1 );
     306
     307        // Leaving a recursion level, decrement the counter
     308        this->recursion -= 1;
     309        this->owner = 0p;
     310
     311        // Fetch the next thread, can be null
     312        $thread * new_owner = next_thread( this );
     313
     314        // Mark the state as fully halted
     315        thrd->state = Halted;
     316
     317        // Release the monitor lock
     318        unlock( this->lock );
     319
     320        // Unpark the next owner if needed
     321        /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
     322        /* paranoid */ verify( ! __preemption_enabled() );
     323        /* paranoid */ verify( thrd->state == Halted );
     324        unpark( new_owner );
    307325}
    308326
     
    326344// Sorts monitors before entering
    327345void ?{}( monitor_guard_t & this, $monitor * m [], __lock_size_t count, fptr_t func ) {
    328         $thread * thrd = TL_GET( this_thread );
     346        $thread * thrd = active_thread();
    329347
    330348        // Store current array
     
    361379
    362380        // Restore thread context
    363         TL_GET( this_thread )->monitors = this.prev;
     381        active_thread()->monitors = this.prev;
    364382}
    365383
    366384// Ctor for monitor guard
    367385// Sorts monitors before entering
    368 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func ) {
     386void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) {
    369387        // optimization
    370         $thread * thrd = TL_GET( this_thread );
     388        $thread * thrd = active_thread();
    371389
    372390        // Store current array
     
    376394        this.prev = thrd->monitors;
    377395
     396        // Save whether we are in a join or not
     397        this.join = join;
     398
    378399        // Update thread context (needed for conditions)
    379400        (thrd->monitors){m, 1, func};
    380401
    381         __dtor_enter( this.m, func );
     402        __dtor_enter( this.m, func, join );
    382403}
    383404
     
    385406void ^?{}( monitor_dtor_guard_t & this ) {
    386407        // Leave the monitors in order
    387         __dtor_leave( this.m );
     408        __dtor_leave( this.m, this.join );
    388409
    389410        // Restore thread context
    390         TL_GET( this_thread )->monitors = this.prev;
     411        active_thread()->monitors = this.prev;
    391412}
    392413
     
    428449
    429450        // Create the node specific to this wait operation
    430         wait_ctx( TL_GET( this_thread ), user_info );
     451        wait_ctx( active_thread(), user_info );
    431452
    432453        // Append the current wait operation to the ones already queued on the condition
     
    479500        //Some more checking in debug
    480501        __cfaabi_dbg_debug_do(
    481                 $thread * this_thrd = TL_GET( this_thread );
     502                $thread * this_thrd = active_thread();
    482503                if ( this.monitor_count != this_thrd->monitors.size ) {
    483504                        abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
     
    527548
    528549        // Create the node specific to this wait operation
    529         wait_ctx_primed( kernelTLS.this_thread, 0 )
     550        wait_ctx_primed( active_thread(), 0 )
    530551
    531552        //save contexts
     
    534555        //Find the thread to run
    535556        $thread * signallee = pop_head( this.blocked )->waiting_thread;
    536         /* paranoid */ verify( signallee->next == 0p );
    537557        __set_owner( monitors, count, signallee );
    538558
     
    627647
    628648                                // Create the node specific to this wait operation
    629                                 wait_ctx_primed( kernelTLS.this_thread, 0 );
     649                                wait_ctx_primed( active_thread(), 0 );
    630650
    631651                                // Save monitor states
     
    679699
    680700        // Create the node specific to this wait operation
    681         wait_ctx_primed( kernelTLS.this_thread, 0 );
     701        wait_ctx_primed( active_thread(), 0 );
    682702
    683703        monitor_save;
     
    685705
    686706        for( __lock_size_t i = 0; i < count; i++) {
    687                 verify( monitors[i]->owner == kernelTLS.this_thread );
     707                verify( monitors[i]->owner == active_thread() );
    688708        }
    689709
     
    721741static inline void __set_owner( $monitor * monitors [], __lock_size_t count, $thread * owner ) {
    722742        /* paranoid */ verify ( monitors[0]->lock.lock );
    723         /* paranoid */ verifyf( monitors[0]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[0]->owner, monitors[0]->recursion, monitors[0] );
     743        /* paranoid */ verifyf( monitors[0]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[0]->owner, monitors[0]->recursion, monitors[0] );
    724744        monitors[0]->owner        = owner;
    725745        monitors[0]->recursion    = 1;
    726746        for( __lock_size_t i = 1; i < count; i++ ) {
    727747                /* paranoid */ verify ( monitors[i]->lock.lock );
    728                 /* paranoid */ verifyf( monitors[i]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[i]->owner, monitors[i]->recursion, monitors[i] );
     748                /* paranoid */ verifyf( monitors[i]->owner == active_thread(), "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), monitors[i]->owner, monitors[i]->recursion, monitors[i] );
    729749                monitors[i]->owner        = owner;
    730750                monitors[i]->recursion    = 0;
     
    752772                //regardless of if we are ready to baton pass,
    753773                //we need to set the monitor as in use
    754                 /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     774                /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
    755775                __set_owner( this,  urgent->owner->waiting_thread );
    756776
     
    761781        // Get the next thread in the entry_queue
    762782        $thread * new_owner = pop_head( this->entry_queue );
    763         /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    764         /* paranoid */ verify( !new_owner || new_owner->next == 0p );
     783        /* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
     784        /* paranoid */ verify( !new_owner || new_owner->link.next == 0p );
    765785        __set_owner( this, new_owner );
    766786
     
    884904        }
    885905
    886         __cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? node->waiting_thread : 0p );
     906        __cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? (thread*)node->waiting_thread : (thread*)0p );
    887907        return ready2run ? node->waiting_thread : 0p;
    888908}
    889909
    890910static inline void brand_condition( condition & this ) {
    891         $thread * thrd = TL_GET( this_thread );
     911        $thread * thrd = active_thread();
    892912        if( !this.monitors ) {
    893913                // __cfaabi_dbg_print_safe( "Branding\n" );
     
    908928        // For each thread in the entry-queue
    909929        for(    $thread ** thrd_it = &entry_queue.head;
    910                 *thrd_it != 1p;
    911                 thrd_it = &(*thrd_it)->next
     930                (*thrd_it) != 1p;
     931                thrd_it = &(*thrd_it)->link.next
    912932        ) {
    913933                // For each acceptable check if it matches
  • libcfa/src/concurrency/monitor.hfa

    r3c64c668 r58fe85a  
    5353        $monitor *    m;
    5454        __monitor_group_t prev;
     55        bool join;
    5556};
    5657
    57 void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)() );
     58void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)(), bool join );
    5859void ^?{}( monitor_dtor_guard_t & this );
    5960
     
    131132
    132133              void wait        ( condition & this, uintptr_t user_info = 0 );
     134static inline bool is_empty    ( condition & this ) { return this.blocked.head == 1p; }
    133135              bool signal      ( condition & this );
    134136              bool signal_block( condition & this );
    135 static inline bool is_empty    ( condition & this ) { return this.blocked.head == 1p; }
     137static inline bool signal_all  ( condition & this ) { bool ret = false; while(!is_empty(this)) { ret = signal(this) || ret; } return ret; }
    136138         uintptr_t front       ( condition & this );
    137139
  • libcfa/src/concurrency/mutex.cfa

    r3c64c668 r58fe85a  
    3030        this.lock{};
    3131        this.blocked_threads{};
     32        this.is_locked = false;
    3233}
    3334
     
    3940        lock( lock __cfaabi_dbg_ctx2 );
    4041        if( is_locked ) {
    41                 append( blocked_threads, kernelTLS.this_thread );
     42                append( blocked_threads, active_thread() );
    4243                unlock( lock );
    4344                park();
     
    8586        lock( lock __cfaabi_dbg_ctx2 );
    8687        if( owner == 0p ) {
    87                 owner = kernelTLS.this_thread;
     88                owner = active_thread();
    8889                recursion_count = 1;
    8990                unlock( lock );
    9091        }
    91         else if( owner == kernelTLS.this_thread ) {
     92        else if( owner == active_thread() ) {
    9293                recursion_count++;
    9394                unlock( lock );
    9495        }
    9596        else {
    96                 append( blocked_threads, kernelTLS.this_thread );
     97                append( blocked_threads, active_thread() );
    9798                unlock( lock );
    9899                park();
     
    104105        lock( lock __cfaabi_dbg_ctx2 );
    105106        if( owner == 0p ) {
    106                 owner = kernelTLS.this_thread;
     107                owner = active_thread();
    107108                recursion_count = 1;
    108109                ret = true;
    109110        }
    110         else if( owner == kernelTLS.this_thread ) {
     111        else if( owner == active_thread() ) {
    111112                recursion_count++;
    112113                ret = true;
     
    158159void wait(condition_variable & this) {
    159160        lock( this.lock __cfaabi_dbg_ctx2 );
    160         append( this.blocked_threads, kernelTLS.this_thread );
     161        append( this.blocked_threads, active_thread() );
    161162        unlock( this.lock );
    162163        park();
     
    166167void wait(condition_variable & this, L & l) {
    167168        lock( this.lock __cfaabi_dbg_ctx2 );
    168         append( this.blocked_threads, kernelTLS.this_thread );
     169        append( this.blocked_threads, active_thread() );
    169170        unlock(l);
    170171        unlock(this.lock);
  • libcfa/src/concurrency/preemption.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Mon Jun 5 14:20:42 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Dec  5 16:34:05 2019
    13 // Update Count     : 43
     12// Last Modified On : Fri Nov  6 07:42:13 2020
     13// Update Count     : 54
    1414//
    1515
     
    1919#include <assert.h>
    2020
    21 extern "C" {
    2221#include <errno.h>
    2322#include <stdio.h>
     
    2524#include <unistd.h>
    2625#include <limits.h>                                                                             // PTHREAD_STACK_MIN
    27 }
    2826
    2927#include "bits/signal.hfa"
     28#include "kernel_private.hfa"
    3029
    3130#if !defined(__CFA_DEFAULT_PREEMPTION__)
     
    4342// FwdDeclarations : Signal handlers
    4443static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
     44static void sigHandler_alarm    ( __CFA_SIGPARMS__ );
    4545static void sigHandler_segv     ( __CFA_SIGPARMS__ );
    4646static void sigHandler_ill      ( __CFA_SIGPARMS__ );
     
    5656#elif defined( __x86_64 )
    5757#define CFA_REG_IP gregs[REG_RIP]
    58 #elif defined( __ARM_ARCH )
     58#elif defined( __arm__ )
    5959#define CFA_REG_IP arm_pc
     60#elif defined( __aarch64__ )
     61#define CFA_REG_IP pc
    6062#else
    61 #error unknown hardware architecture
     63#error unsupported hardware architecture
    6264#endif
    6365
     
    8385// Get next expired node
    8486static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
    85         if( !alarms->head ) return 0p;                                          // If no alarms return null
    86         if( alarms->head->alarm >= currtime ) return 0p;        // If alarms head not expired return null
     87        if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
     88        if( (*alarms)`first.alarm >= currtime ) return 0p;      // If alarms head not expired return null
    8789        return pop(alarms);                                                                     // Otherwise just pop head
    8890}
    8991
    9092// Tick one frame of the Discrete Event Simulation for alarms
    91 static void tick_preemption() {
     93static void tick_preemption(void) {
    9294        alarm_node_t * node = 0p;                                                       // Used in the while loop but cannot be declared in the while condition
    9395        alarm_list_t * alarms = &event_kernel->alarms;          // Local copy for ease of reading
     
    9799        while( node = get_expired( alarms, currtime ) ) {
    98100                // __cfaabi_dbg_print_buffer_decl( " KERNEL: preemption tick.\n" );
     101                Duration period = node->period;
     102                if( period == 0) {
     103                        node->set = false;                  // Node is one-shot, just mark it as not pending
     104                }
    99105
    100106                // Check if this is a kernel
    101                 if( node->kernel_alarm ) {
     107                if( node->type == Kernel ) {
    102108                        preempt( node->proc );
    103109                }
     110                else if( node->type == User ) {
     111                        timeout( node->thrd );
     112                }
    104113                else {
    105                         timeout( node->thrd );
     114                        node->callback(*node);
    106115                }
    107116
    108117                // Check if this is a periodic alarm
    109                 Duration period = node->period;
    110118                if( period > 0 ) {
    111119                        // __cfaabi_dbg_print_buffer_local( " KERNEL: alarm period is %lu.\n", period.tv );
     
    113121                        insert( alarms, node );             // Reinsert the node for the next time it triggers
    114122                }
    115                 else {
    116                         node->set = false;                  // Node is one-shot, just mark it as not pending
    117                 }
    118123        }
    119124
    120125        // If there are still alarms pending, reset the timer
    121         if( alarms->head ) {
    122                 __cfaabi_dbg_print_buffer_decl( " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
    123                 Duration delta = alarms->head->alarm - currtime;
    124                 Duration caped = max(delta, 50`us);
     126        if( & (*alarms)`first ) {
     127                __cfadbg_print_buffer_decl(preemption, " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
     128                Duration delta = (*alarms)`first.alarm - currtime;
     129                Duration capped = max(delta, 50`us);
    125130                // itimerval tim  = { caped };
    126131                // __cfaabi_dbg_print_buffer_local( "    Values are %lu, %lu, %lu %lu.\n", delta.tv, caped.tv, tim.it_value.tv_sec, tim.it_value.tv_usec);
    127132
    128                 __kernel_set_timer( caped );
     133                __kernel_set_timer( capped );
    129134        }
    130135}
     
    158163// Kernel Signal Tools
    159164//=============================================================================================
    160 
    161 __cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
     165// In a user-level threading system, there are handful of thread-local variables where this problem occurs on the ARM.
     166//
     167// For each kernel thread running user-level threads, there is a flag variable to indicate if interrupts are
     168// enabled/disabled for that kernel thread. Therefore, this variable is made thread local.
     169//
     170// For example, this code fragment sets the state of the "interrupt" variable in thread-local memory.
     171//
     172// _Thread_local volatile int interrupts;
     173// int main() {
     174//     interrupts = 0; // disable interrupts }
     175//
     176// which generates the following code on the ARM
     177//
     178// (gdb) disassemble main
     179// Dump of assembler code for function main:
     180//    0x0000000000000610 <+0>:  mrs     x1, tpidr_el0
     181//    0x0000000000000614 <+4>:  mov     w0, #0x0                        // #0
     182//    0x0000000000000618 <+8>:  add     x1, x1, #0x0, lsl #12
     183//    0x000000000000061c <+12>: add     x1, x1, #0x10
     184//    0x0000000000000620 <+16>: str     wzr, [x1]
     185//    0x0000000000000624 <+20>: ret
     186//
     187// The mrs moves a pointer from coprocessor register tpidr_el0 into register x1.  Register w0 is set to 0. The two adds
     188// increase the TLS pointer with the displacement (offset) 0x10, which is the location in the TSL of variable
     189// "interrupts".  Finally, 0 is stored into "interrupts" through the pointer in register x1 that points into the
     190// TSL. Now once x1 has the pointer to the location of the TSL for kernel thread N, it can be be preempted at a
     191// user-level and the user thread is put on the user-level ready-queue. When the preempted thread gets to the front of
     192// the user-level ready-queue it is run on kernel thread M. It now stores 0 into "interrupts" back on kernel thread N,
     193// turning off interrupt on the wrong kernel thread.
     194//
     195// On the x86, the following code is generated for the same code fragment.
     196//
     197// (gdb) disassemble main
     198// Dump of assembler code for function main:
     199//    0x0000000000400420 <+0>:  movl   $0x0,%fs:0xfffffffffffffffc
     200//    0x000000000040042c <+12>: xor    %eax,%eax
     201//    0x000000000040042e <+14>: retq
     202//
     203// and there is base-displacement addressing used to atomically reset variable "interrupts" off of the TSL pointer in
     204// register "fs".
     205//
     206// Hence, the ARM has base-displacement address for the general purpose registers, BUT not to the coprocessor
     207// registers. As a result, generating the address for the write into variable "interrupts" is no longer atomic.
     208//
     209// Note this problem does NOT occur when just using multiple kernel threads because the preemption ALWAYS restarts the
     210// thread on the same kernel thread.
     211//
     212// The obvious question is why does ARM use a coprocessor register to store the TSL pointer given that coprocessor
     213// registers are second-class registers with respect to the instruction set. One possible answer is that they did not
     214// want to dedicate one of the general registers to hold the TLS pointer and there was a free coprocessor register
     215// available.
     216
     217//-----------------------------------------------------------------------------
     218// Some assembly required
     219#define __cfaasm_label(label, when) when: asm volatile goto(".global __cfaasm_" #label "_" #when "\n" "__cfaasm_" #label "_" #when ":":::"memory":when)
     220
     221//----------
     222// special case for preemption since used often
     223bool __preemption_enabled() {
     224        // create a assembler label before
     225        // marked as clobber all to avoid movement
     226        __cfaasm_label(check, before);
     227
     228        // access tls as normal
     229        bool enabled = __cfaabi_tls.preemption_state.enabled;
     230
     231        // create a assembler label after
     232        // marked as clobber all to avoid movement
     233        __cfaasm_label(check, after);
     234        return enabled;
     235}
     236
     237struct asm_region {
     238        void * before;
     239        void * after;
     240};
     241
     242static inline bool __cfaasm_in( void * ip, struct asm_region & region ) {
     243        return ip >= region.before && ip <= region.after;
     244}
     245
     246
     247//----------
     248// Get data from the TLS block
     249// struct asm_region __cfaasm_get;
     250uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
     251uintptr_t __cfatls_get( unsigned long int offset ) {
     252        // create a assembler label before
     253        // marked as clobber all to avoid movement
     254        __cfaasm_label(get, before);
     255
     256        // access tls as normal (except for pointer arithmetic)
     257        uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
     258
     259        // create a assembler label after
     260        // marked as clobber all to avoid movement
     261        __cfaasm_label(get, after);
     262        return val;
     263}
    162264
    163265extern "C" {
    164266        // Disable interrupts by incrementing the counter
    165267        void disable_interrupts() {
    166                 with( kernelTLS.preemption_state ) {
     268                // create a assembler label before
     269                // marked as clobber all to avoid movement
     270                __cfaasm_label(dsable, before);
     271
     272                with( __cfaabi_tls.preemption_state ) {
    167273                        #if GCC_VERSION > 50000
    168274                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
     
    181287                        verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
    182288                }
     289
     290                // create a assembler label after
     291                // marked as clobber all to avoid movement
     292                __cfaasm_label(dsable, after);
     293
    183294        }
    184295
     
    186297        // If counter reaches 0, execute any pending __cfactx_switch
    187298        void enable_interrupts( __cfaabi_dbg_ctx_param ) {
    188                 processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
    189 
    190                 with( kernelTLS.preemption_state ){
     299                // Cache the processor now since interrupts can start happening after the atomic store
     300                processor   * proc = __cfaabi_tls.this_processor;
     301                /* paranoid */ verify( proc );
     302
     303                with( __cfaabi_tls.preemption_state ){
    191304                        unsigned short prev = disable_count;
    192305                        disable_count -= 1;
    193                         verify( prev != 0u );                     // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
     306
     307                        // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
     308                        /* paranoid */ verify( prev != 0u );
    194309
    195310                        // Check if we need to prempt the thread because an interrupt was missed
    196311                        if( prev == 1 ) {
    197312                                #if GCC_VERSION > 50000
    198                                 static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
     313                                        static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
    199314                                #endif
    200315
     
    220335        // Don't execute any pending __cfactx_switch even if counter reaches 0
    221336        void enable_interrupts_noPoll() {
    222                 unsigned short prev = kernelTLS.preemption_state.disable_count;
    223                 kernelTLS.preemption_state.disable_count -= 1;
    224                 verifyf( prev != 0u, "Incremented from %u\n", prev );                     // If this triggers someone is enabled already enabled interrupts
     337                unsigned short prev = __cfaabi_tls.preemption_state.disable_count;
     338                __cfaabi_tls.preemption_state.disable_count -= 1;
     339                // If this triggers someone is enabled already enabled interrupts
     340                /* paranoid */ verifyf( prev != 0u, "Incremented from %u\n", prev );
    225341                if( prev == 1 ) {
    226342                        #if GCC_VERSION > 50000
    227                         static_assert(__atomic_always_lock_free(sizeof(kernelTLS.preemption_state.enabled), &kernelTLS.preemption_state.enabled), "Must be lock-free");
     343                                static_assert(__atomic_always_lock_free(sizeof(__cfaabi_tls.preemption_state.enabled), &__cfaabi_tls.preemption_state.enabled), "Must be lock-free");
    228344                        #endif
    229345                        // Set enabled flag to true
    230346                        // should be atomic to avoid preemption in the middle of the operation.
    231347                        // use memory order RELAXED since there is no inter-thread on this variable requirements
    232                         __atomic_store_n(&kernelTLS.preemption_state.enabled, true, __ATOMIC_RELAXED);
     348                        __atomic_store_n(&__cfaabi_tls.preemption_state.enabled, true, __ATOMIC_RELAXED);
    233349
    234350                        // Signal the compiler that a fence is needed but only for signal handlers
     
    237353        }
    238354}
     355
     356//-----------------------------------------------------------------------------
     357// Kernel Signal Debug
     358void __cfaabi_check_preemption() {
     359        bool ready = __preemption_enabled();
     360        if(!ready) { abort("Preemption should be ready"); }
     361
     362        __cfaasm_label(debug, before);
     363
     364                sigset_t oldset;
     365                int ret;
     366                ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
     367                if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
     368
     369                ret = sigismember(&oldset, SIGUSR1);
     370                if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
     371                if(ret == 1) { abort("ERROR SIGUSR1 is disabled"); }
     372
     373                ret = sigismember(&oldset, SIGALRM);
     374                if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
     375                if(ret == 0) { abort("ERROR SIGALRM is enabled"); }
     376
     377                ret = sigismember(&oldset, SIGTERM);
     378                if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
     379                if(ret == 1) { abort("ERROR SIGTERM is disabled"); }
     380
     381        __cfaasm_label(debug, after);
     382}
     383
     384#ifdef __CFA_WITH_VERIFY__
     385bool __cfaabi_dbg_in_kernel() {
     386        return !__preemption_enabled();
     387}
     388#endif
     389
     390#undef __cfaasm_label
     391
     392//-----------------------------------------------------------------------------
     393// Signal handling
    239394
    240395// sigprocmask wrapper : unblock a single signal
     
    256411
    257412        if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
    258             abort( "internal error, pthread_sigmask" );
     413                abort( "internal error, pthread_sigmask" );
    259414        }
    260415}
     
    268423// reserved for future use
    269424static void timeout( $thread * this ) {
    270         //TODO : implement waking threads
    271 }
     425        unpark( this );
     426}
     427
     428//-----------------------------------------------------------------------------
     429// Some assembly required
     430#if defined( __i386 )
     431        #ifdef __PIC__
     432                #define RELOC_PRELUDE( label ) \
     433                        "calll   .Lcfaasm_prelude_" #label "$pb\n\t" \
     434                        ".Lcfaasm_prelude_" #label "$pb:\n\t" \
     435                        "popl    %%eax\n\t" \
     436                        ".Lcfaasm_prelude_" #label "_end:\n\t" \
     437                        "addl    $_GLOBAL_OFFSET_TABLE_+(.Lcfaasm_prelude_" #label "_end-.Lcfaasm_prelude_" #label "$pb), %%eax\n\t"
     438                #define RELOC_PREFIX ""
     439                #define RELOC_SUFFIX "@GOT(%%eax)"
     440        #else
     441                #define RELOC_PREFIX "$"
     442                #define RELOC_SUFFIX ""
     443        #endif
     444        #define __cfaasm_label( label ) struct asm_region label = \
     445                ({ \
     446                        struct asm_region region; \
     447                        asm( \
     448                                RELOC_PRELUDE( label ) \
     449                                "movl " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
     450                                "movl " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
     451                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     452                        ); \
     453                        region; \
     454                });
     455#elif defined( __x86_64 )
     456        #ifdef __PIC__
     457                #define RELOC_PREFIX ""
     458                #define RELOC_SUFFIX "@GOTPCREL(%%rip)"
     459        #else
     460                #define RELOC_PREFIX "$"
     461                #define RELOC_SUFFIX ""
     462        #endif
     463        #define __cfaasm_label( label ) struct asm_region label = \
     464                ({ \
     465                        struct asm_region region; \
     466                        asm( \
     467                                "movq " RELOC_PREFIX "__cfaasm_" #label "_before" RELOC_SUFFIX ", %[vb]\n\t" \
     468                                "movq " RELOC_PREFIX "__cfaasm_" #label "_after"  RELOC_SUFFIX ", %[va]\n\t" \
     469                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     470                        ); \
     471                        region; \
     472                });
     473#elif defined( __aarch64__ )
     474        #ifdef __PIC__
     475                // Note that this works only for gcc
     476                #define __cfaasm_label( label ) struct asm_region label = \
     477                ({ \
     478                        struct asm_region region; \
     479                        asm( \
     480                                "adrp %[vb], _GLOBAL_OFFSET_TABLE_"                              "\n\t" \
     481                                "ldr  %[vb], [%[vb], #:gotpage_lo15:__cfaasm_" #label "_before]" "\n\t" \
     482                                "adrp %[va], _GLOBAL_OFFSET_TABLE_"                              "\n\t" \
     483                                "ldr  %[va], [%[va], #:gotpage_lo15:__cfaasm_" #label "_after]"  "\n\t" \
     484                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     485                        ); \
     486                        region; \
     487                });
     488        #else
     489                #error this is not the right thing to do
     490                /*
     491                #define __cfaasm_label( label ) struct asm_region label = \
     492                ({ \
     493                        struct asm_region region; \
     494                        asm( \
     495                                "adrp %[vb], __cfaasm_" #label "_before"              "\n\t" \
     496                                "add  %[vb], %[vb], :lo12:__cfaasm_" #label "_before" "\n\t" \
     497                                "adrp %[va], :got:__cfaasm_" #label "_after"          "\n\t" \
     498                                "add  %[va], %[va], :lo12:__cfaasm_" #label "_after"  "\n\t" \
     499                                 : [vb]"=r"(region.before), [va]"=r"(region.after) \
     500                        ); \
     501                        region; \
     502                });
     503                */
     504        #endif
     505#else
     506        #error unknown hardware architecture
     507#endif
    272508
    273509// KERNEL ONLY
     
    275511// If true  : preemption is safe
    276512// If false : preemption is unsafe and marked as pending
    277 static inline bool preemption_ready() {
     513static inline bool preemption_ready( void * ip ) {
     514        // Get all the region for which it is not safe to preempt
     515        __cfaasm_label( get    );
     516        __cfaasm_label( check  );
     517        __cfaasm_label( dsable );
     518        __cfaasm_label( debug  );
     519
    278520        // Check if preemption is safe
    279         bool ready = kernelTLS.preemption_state.enabled && ! kernelTLS.preemption_state.in_progress;
    280 
     521        bool ready = true;
     522        if( __cfaasm_in( ip, get    ) ) { ready = false; goto EXIT; };
     523        if( __cfaasm_in( ip, check  ) ) { ready = false; goto EXIT; };
     524        if( __cfaasm_in( ip, dsable ) ) { ready = false; goto EXIT; };
     525        if( __cfaasm_in( ip, debug  ) ) { ready = false; goto EXIT; };
     526        if( !__cfaabi_tls.preemption_state.enabled) { ready = false; goto EXIT; };
     527        if( __cfaabi_tls.preemption_state.in_progress ) { ready = false; goto EXIT; };
     528
     529EXIT:
    281530        // Adjust the pending flag accordingly
    282         kernelTLS.this_processor->pending_preemption = !ready;
     531        __cfaabi_tls.this_processor->pending_preemption = !ready;
    283532        return ready;
    284533}
     
    290539// Startup routine to activate preemption
    291540// Called from kernel_startup
    292 void kernel_start_preemption() {
     541void __kernel_alarm_startup() {
    293542        __cfaabi_dbg_print_safe( "Kernel : Starting preemption\n" );
    294543
    295544        // Start with preemption disabled until ready
    296         kernelTLS.preemption_state.enabled = false;
    297         kernelTLS.preemption_state.disable_count = 1;
     545        __cfaabi_tls.preemption_state.enabled = false;
     546        __cfaabi_tls.preemption_state.disable_count = 1;
    298547
    299548        // Initialize the event kernel
     
    303552        // Setup proper signal handlers
    304553        __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
     554        __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
    305555
    306556        signal_block( SIGALRM );
     
    311561// Shutdown routine to deactivate preemption
    312562// Called from kernel_shutdown
    313 void kernel_stop_preemption() {
     563void __kernel_alarm_shutdown() {
    314564        __cfaabi_dbg_print_safe( "Kernel : Preemption stopping\n" );
    315565
     
    325575        // Wait for the preemption thread to finish
    326576
    327         pthread_join( alarm_thread, 0p );
    328         free( alarm_stack );
     577        __destroy_pthread( alarm_thread, alarm_stack, 0p );
    329578
    330579        // Preemption is now fully stopped
     
    352601// Kernel Signal Handlers
    353602//=============================================================================================
     603__cfaabi_dbg_debug_do( static thread_local void * last_interrupt = 0; )
    354604
    355605// Context switch signal handler
    356606// Receives SIGUSR1 signal and causes the current thread to yield
    357607static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ ) {
    358         __cfaabi_dbg_debug_do( last_interrupt = (void *)(cxt->uc_mcontext.CFA_REG_IP); )
     608        void * ip = (void *)(cxt->uc_mcontext.CFA_REG_IP);
     609        __cfaabi_dbg_debug_do( last_interrupt = ip; )
    359610
    360611        // SKULLDUGGERY: if a thread creates a processor and the immediately deletes it,
    361612        // the interrupt that is supposed to force the kernel thread to preempt might arrive
    362         // before the kernel thread has even started running. When that happens an iterrupt
    363         // we a null 'this_processor' will be caught, just ignore it.
    364         if(! kernelTLS.this_processor ) return;
     613        // before the kernel thread has even started running. When that happens, an interrupt
     614        // with a null 'this_processor' will be caught, just ignore it.
     615        if(! __cfaabi_tls.this_processor ) return;
    365616
    366617        choose(sfp->si_value.sival_int) {
    367618                case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
    368                 case PREEMPT_TERMINATE: verify( __atomic_load_n( &kernelTLS.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
     619                case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
    369620                default:
    370621                        abort( "internal error, signal value is %d", sfp->si_value.sival_int );
     
    372623
    373624        // Check if it is safe to preempt here
    374         if( !preemption_ready() ) { return; }
    375 
    376         __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", kernelTLS.this_processor, kernelTLS.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
     625        if( !preemption_ready( ip ) ) { return; }
     626
     627        __cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p @ %p).\n", __cfaabi_tls.this_processor, __cfaabi_tls.this_thread, (void *)(cxt->uc_mcontext.CFA_REG_IP) );
    377628
    378629        // Sync flag : prevent recursive calls to the signal handler
    379         kernelTLS.preemption_state.in_progress = true;
     630        __cfaabi_tls.preemption_state.in_progress = true;
    380631
    381632        // Clear sighandler mask before context switching.
     
    387638        }
    388639
    389         // TODO: this should go in finish action
    390640        // Clear the in progress flag
    391         kernelTLS.preemption_state.in_progress = false;
     641        __cfaabi_tls.preemption_state.in_progress = false;
    392642
    393643        // Preemption can occur here
     
    395645        force_yield( __ALARM_PREEMPTION ); // Do the actual __cfactx_switch
    396646}
     647
     648static void sigHandler_alarm( __CFA_SIGPARMS__ ) {
     649        abort("SIGALRM should never reach the signal handler");
     650}
     651
     652#if !defined(__CFA_NO_STATISTICS__)
     653        int __print_alarm_stats = 0;
     654#endif
    397655
    398656// Main of the alarm thread
    399657// Waits on SIGALRM and send SIGUSR1 to whom ever needs it
    400658static void * alarm_loop( __attribute__((unused)) void * args ) {
     659        __processor_id_t id;
     660        id.full_proc = false;
     661        id.id = doregister(&id);
     662        __cfaabi_tls.this_proc_id = &id;
     663
     664        #if !defined(__CFA_NO_STATISTICS__)
     665                struct __stats_t local_stats;
     666                __cfaabi_tls.this_stats = &local_stats;
     667                __init_stats( &local_stats );
     668        #endif
     669
    401670        // Block sigalrms to control when they arrive
    402671        sigset_t mask;
     
    456725EXIT:
    457726        __cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
     727        unregister(&id);
     728
     729        #if !defined(__CFA_NO_STATISTICS__)
     730                if( 0 != __print_alarm_stats ) {
     731                        __print_stats( &local_stats, __print_alarm_stats, "Alarm", "Thread", 0p );
     732                }
     733        #endif
    458734        return 0p;
    459735}
    460 
    461 //=============================================================================================
    462 // Kernel Signal Debug
    463 //=============================================================================================
    464 
    465 void __cfaabi_check_preemption() {
    466         bool ready = kernelTLS.preemption_state.enabled;
    467         if(!ready) { abort("Preemption should be ready"); }
    468 
    469         sigset_t oldset;
    470         int ret;
    471         ret = pthread_sigmask(0, 0p, &oldset);
    472         if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
    473 
    474         ret = sigismember(&oldset, SIGUSR1);
    475         if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
    476         if(ret == 1) { abort("ERROR SIGUSR1 is disabled"); }
    477 
    478         ret = sigismember(&oldset, SIGALRM);
    479         if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
    480         if(ret == 0) { abort("ERROR SIGALRM is enabled"); }
    481 
    482         ret = sigismember(&oldset, SIGTERM);
    483         if(ret <  0) { abort("ERROR sigismember returned %d", ret); }
    484         if(ret == 1) { abort("ERROR SIGTERM is disabled"); }
    485 }
    486 
    487 #ifdef __CFA_WITH_VERIFY__
    488 bool __cfaabi_dbg_in_kernel() {
    489         return !kernelTLS.preemption_state.enabled;
    490 }
    491 #endif
    492736
    493737// Local Variables: //
  • libcfa/src/concurrency/preemption.hfa

    r3c64c668 r58fe85a  
    1616#pragma once
    1717
     18#include "bits/locks.hfa"
    1819#include "alarm.hfa"
    19 #include "kernel_private.hfa"
    2020
    21 void kernel_start_preemption();
    22 void kernel_stop_preemption();
     21struct event_kernel_t {
     22        alarm_list_t alarms;
     23        __spinlock_t lock;
     24};
     25
     26extern event_kernel_t * event_kernel;
     27
    2328void update_preemption( processor * this, Duration duration );
    2429
  • libcfa/src/concurrency/thread.cfa

    r3c64c668 r58fe85a  
    1919
    2020#include "kernel_private.hfa"
     21#include "exception.hfa"
    2122
    2223#define __CFA_INVOKE_PRIVATE__
     
    2829        context{ 0p, 0p };
    2930        self_cor{ name, storage, storageSize };
     31        ticket = TICKET_RUNNING;
    3032        state = Start;
    3133        preempted = __NO_PREEMPTION;
     
    3537        self_mon_p = &self_mon;
    3638        curr_cluster = &cl;
    37         next = 0p;
     39        link.next = 0p;
     40        link.prev = 0p;
     41        link.preferred = -1;
     42        #if defined( __CFA_WITH_VERIFY__ )
     43                canary = 0x0D15EA5E0D15EA5Ep;
     44        #endif
     45
     46        seqable.next = 0p;
     47        seqable.back = 0p;
    3848
    3949        node.next = 0p;
     
    4555
    4656void ^?{}($thread& this) with( this ) {
     57        #if defined( __CFA_WITH_VERIFY__ )
     58                canary = 0xDEADDEADDEADDEADp;
     59        #endif
    4760        unregister(curr_cluster, this);
    4861        ^self_cor{};
     62}
     63
     64FORALL_DATA_INSTANCE(ThreadCancelled, (dtype thread_t), (thread_t))
     65
     66forall(dtype T)
     67void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src) {
     68        dst->virtual_table = src->virtual_table;
     69        dst->the_thread = src->the_thread;
     70        dst->the_exception = src->the_exception;
     71}
     72
     73forall(dtype T)
     74const char * msg(ThreadCancelled(T) *) {
     75        return "ThreadCancelled";
     76}
     77
     78forall(dtype T)
     79static void default_thread_cancel_handler(ThreadCancelled(T) & ) {
     80        abort( "Unhandled thread cancellation.\n" );
     81}
     82
     83forall(dtype T | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)))
     84void ?{}( thread_dtor_guard_t & this,
     85                T & thrd, void(*defaultResumptionHandler)(ThreadCancelled(T) &)) {
     86        $monitor * m = get_monitor(thrd);
     87        $thread * desc = get_thread(thrd);
     88
     89        // Setup the monitor guard
     90        void (*dtor)(T& mutex this) = ^?{};
     91        bool join = defaultResumptionHandler != (void(*)(ThreadCancelled(T)&))0;
     92        (this.mg){&m, (void(*)())dtor, join};
     93
     94
     95        /* paranoid */ verifyf( Halted == desc->state || Cancelled == desc->state, "Expected thread to be Halted or Cancelled, was %d\n", (int)desc->state );
     96
     97        // After the guard set-up and any wait, check for cancellation.
     98        struct _Unwind_Exception * cancellation = desc->self_cor.cancellation;
     99        if ( likely( 0p == cancellation ) ) {
     100                return;
     101        } else if ( Cancelled == desc->state ) {
     102                return;
     103        }
     104        desc->state = Cancelled;
     105        if (!join) {
     106                defaultResumptionHandler = default_thread_cancel_handler;
     107        }
     108
     109        ThreadCancelled(T) except;
     110        // TODO: Remove explitate vtable set once trac#186 is fixed.
     111        except.virtual_table = &get_exception_vtable(&except);
     112        except.the_thread = &thrd;
     113        except.the_exception = __cfaehm_cancellation_exception( cancellation );
     114        throwResume except;
     115
     116        except.the_exception->virtual_table->free( except.the_exception );
     117        free( cancellation );
     118        desc->self_cor.cancellation = 0p;
     119}
     120
     121void ^?{}( thread_dtor_guard_t & this ) {
     122        ^(this.mg){};
    49123}
    50124
     
    59133
    60134        this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
    61         verify( this_thrd->context.SP );
     135        /* paranoid */ verify( this_thrd->context.SP );
    62136
    63         __schedule_thread(this_thrd);
     137        __schedule_thread( this_thrd );
    64138        enable_interrupts( __cfaabi_dbg_ctx );
    65139}
     
    84158}
    85159
     160//-----------------------------------------------------------------------------
     161forall(dtype T | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)))
     162T & join( T & this ) {
     163        thread_dtor_guard_t guard = { this, defaultResumptionHandler };
     164        return this;
     165}
     166
     167uint64_t thread_rand() {
     168        disable_interrupts();
     169        uint64_t ret = __tls_rand();
     170        enable_interrupts( __cfaabi_dbg_ctx );
     171        return ret;
     172}
     173
    86174// Local Variables: //
    87175// mode: c //
  • libcfa/src/concurrency/thread.hfa

    r3c64c668 r58fe85a  
    2222#include "kernel.hfa"
    2323#include "monitor.hfa"
     24#include "exception.hfa"
    2425
    2526//-----------------------------------------------------------------------------
    2627// thread trait
    2728trait is_thread(dtype T) {
    28       void ^?{}(T& mutex this);
    29       void main(T& this);
    30       $thread* get_thread(T& this);
     29        void ^?{}(T& mutex this);
     30        void main(T& this);
     31        $thread* get_thread(T& this);
    3132};
     33
     34FORALL_DATA_EXCEPTION(ThreadCancelled, (dtype thread_t), (thread_t)) (
     35        thread_t * the_thread;
     36        exception_t * the_exception;
     37);
     38
     39forall(dtype T)
     40void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src);
     41
     42forall(dtype T)
     43const char * msg(ThreadCancelled(T) *);
    3244
    3345// define that satisfies the trait without using the thread keyword
     
    6678static inline void ?{}($thread & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
    6779
     80struct thread_dtor_guard_t {
     81        monitor_dtor_guard_t mg;
     82};
     83
     84forall( dtype T | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)) )
     85void ?{}( thread_dtor_guard_t & this, T & thrd, void(*)(ThreadCancelled(T) &) );
     86void ^?{}( thread_dtor_guard_t & this );
     87
    6888//-----------------------------------------------------------------------------
    6989// thread runner
     
    82102forall( dtype T | sized(T) | is_thread(T) )
    83103void ^?{}( scoped(T)& this );
    84 
    85 //-----------------------------------------------------------------------------
    86 // Thread getters
    87 static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
    88104
    89105//-----------------------------------------------------------------------------
     
    106122bool force_yield( enum __Preemption_Reason );
    107123
    108 static inline void yield() {
    109         force_yield(__MANUAL_PREEMPTION);
    110 }
     124//----------
     125// sleep: force thread to block and be rescheduled after Duration duration
     126void sleep( Duration duration );
    111127
    112 // Yield: yield N times
    113 static inline void yield( unsigned times ) {
    114         for( times ) {
    115                 yield();
    116         }
    117 }
     128//----------
     129// join
     130forall( dtype T | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)) )
     131T & join( T & this );
    118132
    119133// Local Variables: //
  • libcfa/src/containers/vector.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Tue Jul  5 18:00:07 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Jul 22 10:01:18 2017
    13 // Update Count     : 3
     12// Last Modified On : Wed Jun 17 11:02:46 2020
     13// Update Count     : 4
    1414//
    1515
    1616#pragma once
    1717
    18 extern "C" {
    1918#include <stdbool.h>
    20 }
    2119
    2220//------------------------------------------------------------------------------
  • libcfa/src/exception.c

    r3c64c668 r58fe85a  
    99// Author           : Andrew Beach
    1010// Created On       : Mon Jun 26 15:13:00 2017
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Feb 22 18:17:34 2018
    13 // Update Count     : 11
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Tue Oct 27 16:27:00 2020
     13// Update Count     : 35
    1414//
    1515
     16// Normally we would get this from the CFA prelude.
    1617#include <stddef.h> // for size_t
    1718
     19#include <unwind.h> // for struct _Unwind_Exception {...};
     20
    1821#include "exception.h"
    19 
    20 // Implementation of the secret header.
    2122
    2223#include <stdlib.h>
    2324#include <stdio.h>
    24 #include <unwind.h>
    2525#include <bits/debug.hfa>
    26 
    27 // FIX ME: temporary hack to keep ARM build working
     26#include "concurrency/invoke.h"
     27#include "stdhdr/assert.h"
     28
     29#if defined( __ARM_ARCH )
     30#warning FIX ME: temporary hack to keep ARM build working
    2831#ifndef _URC_FATAL_PHASE1_ERROR
    29 #define _URC_FATAL_PHASE1_ERROR 2
     32#define _URC_FATAL_PHASE1_ERROR 3
    3033#endif // ! _URC_FATAL_PHASE1_ERROR
    3134#ifndef _URC_FATAL_PHASE2_ERROR
    3235#define _URC_FATAL_PHASE2_ERROR 2
    3336#endif // ! _URC_FATAL_PHASE2_ERROR
     37#endif // __ARM_ARCH
    3438
    3539#include "lsda.h"
    3640
     41/* The exception class for our exceptions. Because of the vendor component
     42 * its value would not be standard.
     43 * Vendor: UWPL
     44 * Language: CFA\0
     45 */
     46const _Unwind_Exception_Class __cfaehm_exception_class = 0x4c50575500414643;
    3747
    3848// Base exception vtable is abstract, you should not have base exceptions.
    39 struct __cfaabi_ehm__base_exception_t_vtable
    40                 ___cfaabi_ehm__base_exception_t_vtable_instance = {
     49struct __cfaehm_base_exception_t_vtable
     50                ___cfaehm_base_exception_t_vtable_instance = {
    4151        .parent = NULL,
    4252        .size = 0,
     
    4757
    4858
    49 // Temperary global exception context. Does not work with concurency.
    50 struct exception_context_t {
    51     struct __cfaabi_ehm__try_resume_node * top_resume;
    52     struct __cfaabi_ehm__try_resume_node * current_resume;
    53 
    54     exception_t * current_exception;
    55     int current_handler_index;
    56 } shared_stack = {NULL, NULL, 0, 0};
    57 
    5859// Get the current exception context.
    5960// There can be a single global until multithreading occurs, then each stack
    60 // needs its own. It will have to be updated to handle that.
    61 struct exception_context_t * this_exception_context() {
     61// needs its own. We get this from libcfathreads (no weak attribute).
     62__attribute__((weak)) struct exception_context_t * this_exception_context() {
     63        static struct exception_context_t shared_stack = {NULL, NULL};
    6264        return &shared_stack;
    6365}
    64 //#define SAVE_EXCEPTION_CONTEXT(to_name)
    65 //struct exception_context_t * to_name = this_exception_context();
    66 //exception * this_exception() {
    67 //    return this_exception_context()->current_exception;
    68 //}
    69 
    70 
    71 // This macro should be the only thing that needs to change across machines.
    72 // Used in the personality function, way down in termination.
    73 // struct _Unwind_Context * -> _Unwind_Reason_Code(*)(exception_t *)
    74 #define MATCHER_FROM_CONTEXT(ptr_to_context) \
    75         (*(_Unwind_Reason_Code(**)(exception_t *))(_Unwind_GetCFA(ptr_to_context) + 8))
    7666
    7767
    7868// RESUMPTION ================================================================
    7969
    80 void __cfaabi_ehm__throw_resume(exception_t * except) {
    81 
    82         __cfaabi_dbg_print_safe("Throwing resumption exception\n");
    83 
    84         struct __cfaabi_ehm__try_resume_node * original_head = shared_stack.current_resume;
    85         struct __cfaabi_ehm__try_resume_node * current =
    86                 (original_head) ? original_head->next : shared_stack.top_resume;
    87 
    88         for ( ; current ; current = current->next) {
    89                 shared_stack.current_resume = current;
    90                 if (current->handler(except)) {
    91                         shared_stack.current_resume = original_head;
    92                         return;
     70static void reset_top_resume(struct __cfaehm_try_resume_node ** store) {
     71        this_exception_context()->top_resume = *store;
     72}
     73
     74void __cfaehm_throw_resume(exception_t * except, void (*defaultHandler)(exception_t *)) {
     75        struct exception_context_t * context = this_exception_context();
     76
     77        __cfadbg_print_safe(exception, "Throwing resumption exception\n");
     78
     79        {
     80                __attribute__((cleanup(reset_top_resume)))
     81                struct __cfaehm_try_resume_node * original_head = context->top_resume;
     82                struct __cfaehm_try_resume_node * current = context->top_resume;
     83
     84                for ( ; current ; current = current->next) {
     85                        context->top_resume = current->next;
     86                        if (current->handler(except)) {
     87                                return;
     88                        }
    9389                }
    94         }
    95 
    96         __cfaabi_dbg_print_safe("Unhandled exception\n");
    97         shared_stack.current_resume = original_head;
    98 
    99         // Fall back to termination:
    100         __cfaabi_ehm__throw_terminate(except);
    101         // TODO: Default handler for resumption.
     90        } // End the search and return to the top of the stack.
     91
     92        // No handler found, fall back to the default operation.
     93        __cfadbg_print_safe(exception, "Unhandled exception\n");
     94        defaultHandler(except);
    10295}
    10396
     
    10699// be added after the node is built but before it is made the top node.
    107100
    108 void __cfaabi_ehm__try_resume_setup(struct __cfaabi_ehm__try_resume_node * node,
     101void __cfaehm_try_resume_setup(struct __cfaehm_try_resume_node * node,
    109102                        _Bool (*handler)(exception_t * except)) {
    110         node->next = shared_stack.top_resume;
     103        struct exception_context_t * context = this_exception_context();
     104        node->next = context->top_resume;
    111105        node->handler = handler;
    112         shared_stack.top_resume = node;
    113 }
    114 
    115 void __cfaabi_ehm__try_resume_cleanup(struct __cfaabi_ehm__try_resume_node * node) {
    116         shared_stack.top_resume = node->next;
    117 }
    118 
    119 
    120 // TERMINATION ===============================================================
    121 
    122 // MEMORY MANAGEMENT (still for integers)
    123 // May have to move to cfa for constructors and destructors (references).
    124 
    125 struct __cfaabi_ehm__node {
    126         struct __cfaabi_ehm__node * next;
    127 };
     106        context->top_resume = node;
     107}
     108
     109void __cfaehm_try_resume_cleanup(struct __cfaehm_try_resume_node * node) {
     110        struct exception_context_t * context = this_exception_context();
     111        context->top_resume = node->next;
     112}
     113
     114
     115// MEMORY MANAGEMENT =========================================================
    128116
    129117#define NODE_TO_EXCEPT(node) ((exception_t *)(1 + (node)))
    130 #define EXCEPT_TO_NODE(except) ((struct __cfaabi_ehm__node *)(except) - 1)
     118#define EXCEPT_TO_NODE(except) ((struct __cfaehm_node *)(except) - 1)
     119#define UNWIND_TO_NODE(unwind) ((struct __cfaehm_node *)(unwind))
     120#define NULL_MAP(map, ptr) ((ptr) ? (map(ptr)) : NULL)
     121
     122// How to clean up an exception in various situations.
     123static void __cfaehm_exception_cleanup(
     124                _Unwind_Reason_Code reason,
     125                struct _Unwind_Exception * exception) {
     126        switch (reason) {
     127        case _URC_FOREIGN_EXCEPTION_CAUGHT:
     128                // This one we could clean-up to allow cross-language exceptions.
     129        case _URC_FATAL_PHASE1_ERROR:
     130        case _URC_FATAL_PHASE2_ERROR:
     131        default:
     132                abort();
     133        }
     134}
    131135
    132136// Creates a copy of the indicated exception and sets current_exception to it.
    133 static void __cfaabi_ehm__allocate_exception( exception_t * except ) {
     137static void __cfaehm_allocate_exception( exception_t * except ) {
    134138        struct exception_context_t * context = this_exception_context();
    135139
    136140        // Allocate memory for the exception.
    137         struct __cfaabi_ehm__node * store = malloc(
    138                 sizeof( struct __cfaabi_ehm__node ) + except->virtual_table->size );
     141        struct __cfaehm_node * store = malloc(
     142                sizeof( struct __cfaehm_node ) + except->virtual_table->size );
    139143
    140144        if ( ! store ) {
     
    143147        }
    144148
     149        // Initialize the node:
     150        exception_t * except_store = NODE_TO_EXCEPT(store);
     151        store->unwind_exception.exception_class = __cfaehm_exception_class;
     152        store->unwind_exception.exception_cleanup = __cfaehm_exception_cleanup;
     153        store->handler_index = 0;
     154        except->virtual_table->copy( except_store, except );
     155
    145156        // Add the node to the list:
    146         store->next = EXCEPT_TO_NODE(context->current_exception);
    147         context->current_exception = NODE_TO_EXCEPT(store);
    148 
    149         // Copy the exception to storage.
    150         except->virtual_table->copy( context->current_exception, except );
     157        store->next = NULL_MAP(EXCEPT_TO_NODE, context->current_exception);
     158        context->current_exception = except_store;
    151159}
    152160
    153161// Delete the provided exception, unsetting current_exception if relivant.
    154 static void __cfaabi_ehm__delete_exception( exception_t * except ) {
    155         struct exception_context_t * context = this_exception_context();
    156 
    157         __cfaabi_dbg_print_safe("Deleting Exception\n");
     162static void __cfaehm_delete_exception( exception_t * except ) {
     163        struct exception_context_t * context = this_exception_context();
     164
     165        __cfadbg_print_safe(exception, "Deleting Exception\n");
    158166
    159167        // Remove the exception from the list.
    160         struct __cfaabi_ehm__node * to_free = EXCEPT_TO_NODE(except);
    161         struct __cfaabi_ehm__node * node;
     168        struct __cfaehm_node * to_free = EXCEPT_TO_NODE(except);
     169        struct __cfaehm_node * node;
    162170
    163171        if ( context->current_exception == except ) {
    164172                node = to_free->next;
    165                 context->current_exception = (node) ? NODE_TO_EXCEPT(node) : 0;
     173                context->current_exception = NULL_MAP(NODE_TO_EXCEPT, node);
    166174        } else {
    167175                node = EXCEPT_TO_NODE(context->current_exception);
    168176                // It may always be in the first or second position.
    169                 while( to_free != node->next ) {
     177                while ( to_free != node->next ) {
    170178                        node = node->next;
    171179                }
     
    178186}
    179187
    180 // If this isn't a rethrow (*except==0), delete the provided exception.
    181 void __cfaabi_ehm__cleanup_terminate( void * except ) {
    182         if ( *(void**)except ) __cfaabi_ehm__delete_exception( *(exception_t **)except );
    183 }
    184 
    185 
    186 // We need a piece of storage to raise the exception
    187 struct _Unwind_Exception this_exception_storage;
     188// CANCELLATION ==============================================================
    188189
    189190// Function needed by force unwind
     
    192193                int version,
    193194                _Unwind_Action actions,
    194                 _Unwind_Exception_Class exceptionClass,
     195                _Unwind_Exception_Class exception_class,
    195196                struct _Unwind_Exception * unwind_exception,
    196                 struct _Unwind_Context * context,
    197                 void * some_param) {
    198         if( actions & _UA_END_OF_STACK  ) exit(1);
    199         if( actions & _UA_CLEANUP_PHASE ) return _URC_NO_REASON;
    200 
    201         return _URC_FATAL_PHASE2_ERROR;
     197                struct _Unwind_Context * unwind_context,
     198                void * stop_param) {
     199        // Verify actions follow the rules we expect.
     200        verify(actions & _UA_CLEANUP_PHASE);
     201        verify(actions & _UA_FORCE_UNWIND);
     202        verify(!(actions & _UA_SEARCH_PHASE));
     203        verify(!(actions & _UA_HANDLER_FRAME));
     204
     205        if ( actions & _UA_END_OF_STACK ) {
     206                abort();
     207        } else {
     208                return _URC_NO_REASON;
     209        }
     210}
     211
     212__attribute__((weak)) _Unwind_Reason_Code
     213__cfaehm_cancellation_unwind( struct _Unwind_Exception * exception ) {
     214        return _Unwind_ForcedUnwind( exception, _Stop_Fn, (void*)0x22 );
     215}
     216
     217// Cancel the current stack, prefroming approprate clean-up and messaging.
     218void __cfaehm_cancel_stack( exception_t * exception ) {
     219        __cfaehm_allocate_exception( exception );
     220
     221        struct exception_context_t * context = this_exception_context();
     222        struct __cfaehm_node * node = EXCEPT_TO_NODE(context->current_exception);
     223
     224        // Preform clean-up of any extra active exceptions.
     225        while ( node->next ) {
     226                struct __cfaehm_node * to_free = node->next;
     227                node->next = to_free->next;
     228                exception_t * except = NODE_TO_EXCEPT( to_free );
     229                except->virtual_table->free( except );
     230            free( to_free );
     231        }
     232
     233        _Unwind_Reason_Code ret;
     234        ret = __cfaehm_cancellation_unwind( &node->unwind_exception );
     235        printf("UNWIND ERROR %d after force unwind\n", ret);
     236        abort();
     237}
     238
     239
     240// TERMINATION ===============================================================
     241
     242// If this isn't a rethrow (*except==0), delete the provided exception.
     243void __cfaehm_cleanup_terminate( void * except ) {
     244        if ( *(void**)except ) __cfaehm_delete_exception( *(exception_t **)except );
     245}
     246
     247static void __cfaehm_cleanup_default( exception_t ** except ) {
     248        __cfaehm_delete_exception( *except );
     249        *except = NULL;
    202250}
    203251
    204252// The exception that is being thrown must already be stored.
    205 __attribute__((noreturn)) void __cfaabi_ehm__begin_unwind(void) {
    206         if ( ! this_exception_context()->current_exception ) {
     253static void __cfaehm_begin_unwind(void(*defaultHandler)(exception_t *)) {
     254        struct exception_context_t * context = this_exception_context();
     255        if ( NULL == context->current_exception ) {
    207256                printf("UNWIND ERROR missing exception in begin unwind\n");
    208257                abort();
    209258        }
    210 
     259        struct _Unwind_Exception * storage =
     260                &EXCEPT_TO_NODE(context->current_exception)->unwind_exception;
    211261
    212262        // Call stdlibc to raise the exception
    213         _Unwind_Reason_Code ret = _Unwind_RaiseException( &this_exception_storage );
     263        __cfadbg_print_safe(exception, "Begin unwinding (storage &p, context %p)\n", storage, context);
     264        _Unwind_Reason_Code ret = _Unwind_RaiseException( storage );
    214265
    215266        // If we reach here it means something happened. For resumption to work we need to find a way
     
    220271        // the whole stack.
    221272
    222         if( ret == _URC_END_OF_STACK ) {
    223                 // No proper handler was found. This can be handled in many ways, C++ calls std::terminate.
    224                 // Here we force unwind the stack, basically raising a cancellation.
    225                 printf("Uncaught exception %p\n", &this_exception_storage);
    226 
    227                 ret = _Unwind_ForcedUnwind( &this_exception_storage, _Stop_Fn, (void*)0x22 );
    228                 printf("UNWIND ERROR %d after force unwind\n", ret);
     273        // We did not simply reach the end of the stack without finding a handler. This is an error.
     274        if ( ret != _URC_END_OF_STACK ) {
     275                printf("UNWIND ERROR %d after raise exception\n", ret);
    229276                abort();
    230277        }
    231278
    232         // We did not simply reach the end of the stack without finding a handler. This is an error.
    233         printf("UNWIND ERROR %d after raise exception\n", ret);
     279        // No handler found, go to the default operation.
     280        __cfadbg_print_safe(exception, "Uncaught exception %p\n", storage);
     281
     282        __attribute__((cleanup(__cfaehm_cleanup_default)))
     283        exception_t * exception = context->current_exception;
     284        defaultHandler( exception );
     285}
     286
     287void __cfaehm_throw_terminate( exception_t * val, void (*defaultHandler)(exception_t *) ) {
     288        __cfadbg_print_safe(exception, "Throwing termination exception\n");
     289
     290        __cfaehm_allocate_exception( val );
     291        __cfaehm_begin_unwind( defaultHandler );
     292}
     293
     294static __attribute__((noreturn)) void __cfaehm_rethrow_adapter( exception_t * except ) {
     295        // TODO: Print some error message.
     296        (void)except;
    234297        abort();
    235298}
    236299
    237 void __cfaabi_ehm__throw_terminate( exception_t * val ) {
    238         __cfaabi_dbg_print_safe("Throwing termination exception\n");
    239 
    240         __cfaabi_ehm__allocate_exception( val );
    241         __cfaabi_ehm__begin_unwind();
    242 }
    243 
    244 void __cfaabi_ehm__rethrow_terminate(void) {
    245         __cfaabi_dbg_print_safe("Rethrowing termination exception\n");
    246 
    247         __cfaabi_ehm__begin_unwind();
    248 }
    249 
    250 #pragma GCC push_options
    251 #pragma GCC optimize("O0")
    252 
     300void __cfaehm_rethrow_terminate(void) {
     301        __cfadbg_print_safe(exception, "Rethrowing termination exception\n");
     302
     303        __cfaehm_begin_unwind( __cfaehm_rethrow_adapter );
     304        abort();
     305}
     306
     307#if defined( __x86_64 ) || defined( __i386 )
    253308// This is our personality routine. For every stack frame annotated with
    254309// ".cfi_personality 0x3,__gcfa_personality_v0" this function will be called twice when unwinding.
    255310//  Once in the search phase and once in the cleanup phase.
    256 _Unwind_Reason_Code __gcfa_personality_v0 (
    257                 int version, _Unwind_Action actions, unsigned long long exceptionClass,
    258                 struct _Unwind_Exception* unwind_exception,
    259                 struct _Unwind_Context* context)
     311_Unwind_Reason_Code __gcfa_personality_v0(
     312                int version,
     313                _Unwind_Action actions,
     314                unsigned long long exception_class,
     315                struct _Unwind_Exception * unwind_exception,
     316                struct _Unwind_Context * unwind_context)
    260317{
    261318
    262         //__cfaabi_dbg_print_safe("CFA: 0x%lx\n", _Unwind_GetCFA(context));
    263         __cfaabi_dbg_print_safe("Personality function (%d, %x, %llu, %p, %p):",
    264                         version, actions, exceptionClass, unwind_exception, context);
    265 
    266         // If we've reached the end of the stack then there is nothing much we can do...
    267         if( actions & _UA_END_OF_STACK ) return _URC_END_OF_STACK;
    268 
     319        //__cfadbg_print_safe(exception, "CFA: 0x%lx\n", _Unwind_GetCFA(context));
     320        __cfadbg_print_safe(exception, "Personality function (%d, %x, %llu, %p, %p):",
     321                        version, actions, exception_class, unwind_exception, unwind_context);
     322
     323        // Verify that actions follow the rules we expect.
     324        // This function should never be called at the end of the stack.
     325        verify(!(actions & _UA_END_OF_STACK));
     326        // Either only the search phase flag is set or...
    269327        if (actions & _UA_SEARCH_PHASE) {
    270                 __cfaabi_dbg_print_safe(" lookup phase");
    271         }
    272         else if (actions & _UA_CLEANUP_PHASE) {
    273                 __cfaabi_dbg_print_safe(" cleanup phase");
    274         }
    275         // Just in case, probably can't actually happen
    276         else {
    277                 printf(" error\n");
    278                 return _URC_FATAL_PHASE1_ERROR;
     328                verify(actions == _UA_SEARCH_PHASE);
     329                __cfadbg_print_safe(exception, " lookup phase");
     330        // ... we are in clean-up phase.
     331        } else {
     332                verify(actions & _UA_CLEANUP_PHASE);
     333                __cfadbg_print_safe(exception, " cleanup phase");
     334                // We shouldn't be the handler frame during forced unwind.
     335                if (actions & _UA_HANDLER_FRAME) {
     336                        verify(!(actions & _UA_FORCE_UNWIND));
     337                        __cfadbg_print_safe(exception, " (handler frame)");
     338                } else if (actions & _UA_FORCE_UNWIND) {
     339                        __cfadbg_print_safe(exception, " (force unwind)");
     340                }
    279341        }
    280342
    281343        // Get a pointer to the language specific data from which we will read what we need
    282         const unsigned char * lsd = (const unsigned char*) _Unwind_GetLanguageSpecificData( context );
    283 
    284         if( !lsd ) {    //Nothing to do, keep unwinding
     344        const unsigned char * lsd = _Unwind_GetLanguageSpecificData( unwind_context );
     345
     346        if ( !lsd ) {   //Nothing to do, keep unwinding
    285347                printf(" no LSD");
    286348                goto UNWIND;
     
    289351        // Get the instuction pointer and a reading pointer into the exception table
    290352        lsda_header_info lsd_info;
    291         const unsigned char * cur_ptr = parse_lsda_header(context, lsd, &lsd_info);
    292         _Unwind_Ptr instruction_ptr = _Unwind_GetIP( context );
     353        const unsigned char * cur_ptr = parse_lsda_header(unwind_context, lsd, &lsd_info);
     354        _Unwind_Ptr instruction_ptr = _Unwind_GetIP(unwind_context);
     355
     356        struct exception_context_t * context = this_exception_context();
    293357
    294358        // Linearly search the table for stuff to do
    295         while( cur_ptr < lsd_info.action_table ) {
     359        while ( cur_ptr < lsd_info.action_table ) {
    296360                _Unwind_Ptr callsite_start;
    297361                _Unwind_Ptr callsite_len;
     
    306370
    307371                // Have we reach the correct frame info yet?
    308                 if( lsd_info.Start + callsite_start + callsite_len < instruction_ptr ) {
     372                if ( lsd_info.Start + callsite_start + callsite_len < instruction_ptr ) {
    309373#ifdef __CFA_DEBUG_PRINT__
    310374                        void * ls = (void*)lsd_info.Start;
     
    314378                        void * ep = (void*)lsd_info.Start + callsite_start + callsite_len;
    315379                        void * ip = (void*)instruction_ptr;
    316                         __cfaabi_dbg_print_safe("\nfound %p - %p (%p, %p, %p), looking for %p\n",
     380                        __cfadbg_print_safe(exception, "\nfound %p - %p (%p, %p, %p), looking for %p\n",
    317381                                        bp, ep, ls, cs, cl, ip);
    318382#endif // __CFA_DEBUG_PRINT__
     
    321385
    322386                // Have we gone too far?
    323                 if( lsd_info.Start + callsite_start > instruction_ptr ) {
     387                if ( lsd_info.Start + callsite_start > instruction_ptr ) {
    324388                        printf(" gone too far");
    325389                        break;
    326390                }
    327391
    328                 // Something to do?
    329                 if( callsite_landing_pad ) {
    330                         // Which phase are we in
    331                         if (actions & _UA_SEARCH_PHASE) {
    332                                 // In search phase, these means we found a potential handler we must check.
    333 
    334                                 // We have arbitrarily decided that 0 means nothing to do and 1 means there is
    335                                 // a potential handler. This doesn't seem to conflict the gcc default behavior.
    336                                 if (callsite_action != 0) {
    337                                         // Now we want to run some code to see if the handler matches
    338                                         // This is the tricky part where we want to the power to run arbitrary code
    339                                         // However, generating a new exception table entry and try routine every time
    340                                         // is way more expansive than we might like
    341                                         // The information we have is :
    342                                         //  - The GR (Series of registers)
    343                                         //    GR1=GP Global Pointer of frame ref by context
    344                                         //  - The instruction pointer
    345                                         //  - The instruction pointer info (???)
    346                                         //  - The CFA (Canonical Frame Address)
    347                                         //  - The BSP (Probably the base stack pointer)
    348 
    349 
    350                                         // The current apprach uses one exception table entry per try block
    351                                         _uleb128_t imatcher;
    352                                         // Get the relative offset to the {...}?
    353                                         cur_ptr = read_uleb128(cur_ptr, &imatcher);
    354 
    355                                         _Unwind_Reason_Code (*matcher)(exception_t *) =
    356                                                 MATCHER_FROM_CONTEXT(context);
    357                                         int index = matcher(shared_stack.current_exception);
    358                                         _Unwind_Reason_Code ret = (0 == index)
    359                                                 ? _URC_CONTINUE_UNWIND : _URC_HANDLER_FOUND;
    360                                         shared_stack.current_handler_index = index;
    361 
    362                                         // Based on the return value, check if we matched the exception
    363                                         if( ret == _URC_HANDLER_FOUND) {
    364                                                 __cfaabi_dbg_print_safe(" handler found\n");
    365                                         } else {
    366                                                 __cfaabi_dbg_print_safe(" no handler\n");
    367                                         }
    368                                         return ret;
     392                // Check for what we must do:
     393                if ( 0 == callsite_landing_pad ) {
     394                        // Nothing to do, move along
     395                        __cfadbg_print_safe(exception, " no landing pad");
     396                } else if (actions & _UA_SEARCH_PHASE) {
     397                        // In search phase, these means we found a potential handler we must check.
     398
     399                        // We have arbitrarily decided that 0 means nothing to do and 1 means there is
     400                        // a potential handler. This doesn't seem to conflict the gcc default behavior.
     401                        if (callsite_action != 0) {
     402                                // Now we want to run some code to see if the handler matches
     403                                // This is the tricky part where we want to the power to run arbitrary code
     404                                // However, generating a new exception table entry and try routine every time
     405                                // is way more expansive than we might like
     406                                // The information we have is :
     407                                //  - The GR (Series of registers)
     408                                //    GR1=GP Global Pointer of frame ref by context
     409                                //  - The instruction pointer
     410                                //  - The instruction pointer info (???)
     411                                //  - The CFA (Canonical Frame Address)
     412                                //  - The BSP (Probably the base stack pointer)
     413
     414                                // The current apprach uses one exception table entry per try block
     415                                _uleb128_t imatcher;
     416                                // Get the relative offset to the {...}?
     417                                cur_ptr = read_uleb128(cur_ptr, &imatcher);
     418
     419                                _Unwind_Word match_pos =
     420#                               if defined( __x86_64 )
     421                                    _Unwind_GetCFA(unwind_context) + 8;
     422#                               elif defined( __i386 )
     423                                    _Unwind_GetCFA(unwind_context) + 24;
     424#                               elif defined( __ARM_ARCH )
     425#                                   warning FIX ME: check if anything needed for ARM
     426                                    42;
     427#                               endif
     428                                int (*matcher)(exception_t *) = *(int(**)(exception_t *))match_pos;
     429
     430                                int index = matcher(context->current_exception);
     431                                _Unwind_Reason_Code ret = (0 == index)
     432                                        ? _URC_CONTINUE_UNWIND : _URC_HANDLER_FOUND;
     433                                UNWIND_TO_NODE(unwind_exception)->handler_index = index;
     434
     435                                // Based on the return value, check if we matched the exception
     436                                if (ret == _URC_HANDLER_FOUND) {
     437                                        __cfadbg_print_safe(exception, " handler found\n");
     438                                } else {
     439                                        // TODO: Continue the search if there is more in the table.
     440                                        __cfadbg_print_safe(exception, " no handler\n");
    369441                                }
    370 
    371                                 // This is only a cleanup handler, ignore it
    372                                 __cfaabi_dbg_print_safe(" no action");
     442                                return ret;
    373443                        }
    374                         else if (actions & _UA_CLEANUP_PHASE) {
    375 
    376                                 if( (callsite_action != 0) && !(actions & _UA_HANDLER_FRAME) ){
    377                                         // If this is a potential exception handler
    378                                         // but not the one that matched the exception in the seach phase,
    379                                         // just ignore it
    380                                         goto UNWIND;
    381                                 }
    382 
    383                                 // We need to run some clean-up or a handler
    384                                 // These statment do the right thing but I don't know any specifics at all
    385                                 _Unwind_SetGR( context, __builtin_eh_return_data_regno(0), (_Unwind_Ptr) unwind_exception );
    386                                 _Unwind_SetGR( context, __builtin_eh_return_data_regno(1), 0 );
    387 
    388                                 // I assume this sets the instruction pointer to the adress of the landing pad
    389                                 // It doesn't actually set it, it only state the value that needs to be set once we return _URC_INSTALL_CONTEXT
    390                                 _Unwind_SetIP( context, ((lsd_info.LPStart) + (callsite_landing_pad)) );
    391 
    392                                 __cfaabi_dbg_print_safe(" action\n");
    393 
    394                                 // Return have some action to run
    395                                 return _URC_INSTALL_CONTEXT;
     444
     445                        // This is only a cleanup handler, ignore it
     446                        __cfadbg_print_safe(exception, " no action");
     447                } else {
     448                        // In clean-up phase, no destructors here but this could be the handler.
     449
     450                        if ( (callsite_action != 0) && !(actions & _UA_HANDLER_FRAME) ){
     451                                // If this is a potential exception handler
     452                                // but not the one that matched the exception in the seach phase,
     453                                // just ignore it
     454                                goto UNWIND;
    396455                        }
     456
     457                        // We need to run some clean-up or a handler
     458                        // These statment do the right thing but I don't know any specifics at all
     459                        _Unwind_SetGR( unwind_context, __builtin_eh_return_data_regno(0),
     460                                (_Unwind_Ptr)unwind_exception );
     461                        _Unwind_SetGR( unwind_context, __builtin_eh_return_data_regno(1), 0 );
     462
     463                        // I assume this sets the instruction pointer to the adress of the landing pad
     464                        // It doesn't actually set it, it only state the value that needs to be set once we
     465                        // return _URC_INSTALL_CONTEXT
     466                        _Unwind_SetIP( unwind_context, ((lsd_info.LPStart) + (callsite_landing_pad)) );
     467
     468                        __cfadbg_print_safe(exception, " action\n");
     469
     470                        // Return have some action to run
     471                        return _URC_INSTALL_CONTEXT;
    397472                }
    398 
    399                 // Nothing to do, move along
    400                 __cfaabi_dbg_print_safe(" no landing pad");
    401473        }
    402474        // No handling found
    403         __cfaabi_dbg_print_safe(" table end reached\n");
     475        __cfadbg_print_safe(exception, " table end reached");
    404476
    405477        UNWIND:
    406         __cfaabi_dbg_print_safe(" unwind\n");
     478        __cfadbg_print_safe(exception, " unwind\n");
    407479
    408480        // Keep unwinding the stack
    409481        return _URC_CONTINUE_UNWIND;
    410482}
     483
     484#pragma GCC push_options
     485#pragma GCC optimize(0)
    411486
    412487// Try statements are hoisted out see comments for details. While this could probably be unique
    413488// and simply linked from libcfa but there is one problem left, see the exception table for details
    414489__attribute__((noinline))
    415 void __cfaabi_ehm__try_terminate(void (*try_block)(),
     490void __cfaehm_try_terminate(void (*try_block)(),
    416491                void (*catch_block)(int index, exception_t * except),
    417492                __attribute__((unused)) int (*match_block)(exception_t * except)) {
     
    419494        //! printf("%p %p %p %p\n", &try_block, &catch_block, &match_block, &xy);
    420495
    421         // Setup statments: These 2 statments won't actually result in any code, they only setup global tables.
    422         // However, they clobber gcc cancellation support from gcc.  We can replace the personality routine but
    423         // replacing the exception table gcc generates is not really doable, it generates labels based on how the
    424         // assembly works.
    425 
    426496        // Setup the personality routine and exception table.
     497        // Unforturnately these clobber gcc cancellation support which means we can't get access to
     498        // the attribute cleanup tables at the same time. We would have to inspect the assembly to
     499        // create a new set ourselves.
    427500#ifdef __PIC__
    428501        asm volatile (".cfi_personality 0x9b,CFA.ref.__gcfa_personality_v0");
     
    449522        // Label which defines the end of the area for which the handler is setup.
    450523        asm volatile (".TRYEND:");
    451         // Label which defines the start of the exception landing pad.  Basically what is called when the exception is
    452         // caught.  Note, if multiple handlers are given, the multiplexing should be done by the generated code, not the
    453         // exception runtime.
     524        // Label which defines the start of the exception landing pad. Basically what is called when
     525        // the exception is caught. Note, if multiple handlers are given, the multiplexing should be
     526        // done by the generated code, not the exception runtime.
    454527        asm volatile (".CATCH:");
    455528
    456529        // Exception handler
    457         catch_block( shared_stack.current_handler_index,
    458                      shared_stack.current_exception );
     530        // Note: Saving the exception context on the stack breaks termination exceptions.
     531        catch_block( EXCEPT_TO_NODE( this_exception_context()->current_exception )->handler_index,
     532                     this_exception_context()->current_exception );
    459533}
    460534
     
    464538
    465539#ifdef __PIC__
    466 #if defined( __i386 ) || defined( __x86_64 )
    467540asm (
    468541        // HEADER
     
    481554        // handler landing pad offset and 1 (action code, gcc seems to use 0).
    482555        ".LLSDACSBCFA2:\n"
    483         "       .uleb128 .TRYSTART-__cfaabi_ehm__try_terminate\n"
     556        "       .uleb128 .TRYSTART-__cfaehm_try_terminate\n"
    484557        "       .uleb128 .TRYEND-.TRYSTART\n"
    485         "       .uleb128 .CATCH-__cfaabi_ehm__try_terminate\n"
     558        "       .uleb128 .CATCH-__cfaehm_try_terminate\n"
    486559        "       .uleb128 1\n"
    487560        ".LLSDACSECFA2:\n"
    488561        // TABLE FOOTER
    489562        "       .text\n"
    490         "       .size   __cfaabi_ehm__try_terminate, .-__cfaabi_ehm__try_terminate\n"
     563        "       .size   __cfaehm_try_terminate, .-__cfaehm_try_terminate\n"
    491564);
    492565
     
    507580        "       .quad __gcfa_personality_v0\n"
    508581#else // then __i386
    509         "   .long __gcfa_personality_v0\n"
     582        "       .long __gcfa_personality_v0\n"
    510583#endif
    511584);
    512 #else
    513 #error Exception Handling: unknown architecture for position independent code.
    514 #endif // __i386 || __x86_64
    515585#else // __PIC__
    516 #if defined( __i386 ) || defined( __x86_64 )
    517586asm (
    518587        // HEADER
     
    529598        ".LLSDACSBCFA2:\n"
    530599        //      Handled area start (relative to start of function)
    531         "       .uleb128 .TRYSTART-__cfaabi_ehm__try_terminate\n"
     600        "       .uleb128 .TRYSTART-__cfaehm_try_terminate\n"
    532601        //      Handled area length
    533602        "       .uleb128 .TRYEND-.TRYSTART\n"
    534603        //      Handler landing pad address (relative to start of function)
    535         "       .uleb128 .CATCH-__cfaabi_ehm__try_terminate\n"
     604        "       .uleb128 .CATCH-__cfaehm_try_terminate\n"
    536605        //      Action code, gcc seems to always use 0.
    537606        "       .uleb128 1\n"
     
    539608        ".LLSDACSECFA2:\n"
    540609        "       .text\n"
    541         "       .size   __cfaabi_ehm__try_terminate, .-__cfaabi_ehm__try_terminate\n"
     610        "       .size   __cfaehm_try_terminate, .-__cfaehm_try_terminate\n"
    542611        "       .ident  \"GCC: (Ubuntu 6.2.0-3ubuntu11~16.04) 6.2.0 20160901\"\n"
    543612        "       .section        .note.GNU-stack,\"x\",@progbits\n"
    544613);
     614#endif // __PIC__
     615
     616#pragma GCC pop_options
     617
     618#elif defined( __ARM_ARCH )
     619_Unwind_Reason_Code __gcfa_personality_v0(
     620                int version,
     621                _Unwind_Action actions,
     622                unsigned long long exception_class,
     623                struct _Unwind_Exception * unwind_exception,
     624                struct _Unwind_Context * unwind_context) {
     625        return _URC_CONTINUE_UNWIND;
     626}
     627
     628__attribute__((noinline))
     629void __cfaehm_try_terminate(void (*try_block)(),
     630                void (*catch_block)(int index, exception_t * except),
     631                __attribute__((unused)) int (*match_block)(exception_t * except)) {
     632}
    545633#else
    546 #error Exception Handling: unknown architecture for position dependent code.
    547 #endif // __i386 || __x86_64
    548 #endif // __PIC__
    549 
    550 #pragma GCC pop_options
     634        #error unsupported hardware architecture
     635#endif // __x86_64 || __i386
  • libcfa/src/exception.h

    r3c64c668 r58fe85a  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // exception.h -- Builtins for exception handling.
     7// exception.h -- Internal exception handling definitions.
    88//
    99// Author           : Andrew Beach
    1010// Created On       : Mon Jun 26 15:11:00 2017
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Feb 22 18:11:15 2018
    13 // Update Count     : 8
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Tue Oct 27 14:45:00 2020
     13// Update Count     : 11
    1414//
    1515
    1616#pragma once
    1717
     18// This could be considered several headers. All are internal to the exception
     19// system but needed to depending on whether they are C/Cforall code and
     20// whether or not they are part of the builtins.
    1821
    1922#ifdef __cforall
     
    2124#endif
    2225
    23 struct __cfaabi_ehm__base_exception_t;
    24 typedef struct __cfaabi_ehm__base_exception_t exception_t;
    25 struct __cfaabi_ehm__base_exception_t_vtable {
    26         const struct __cfaabi_ehm__base_exception_t_vtable * parent;
     26// Included in C code or the built-ins.
     27#if !defined(__cforall) || defined(__cforall_builtins__)
     28
     29struct __cfaehm_base_exception_t;
     30typedef struct __cfaehm_base_exception_t exception_t;
     31struct __cfaehm_base_exception_t_vtable {
     32        const struct __cfaehm_base_exception_t_vtable * parent;
    2733        size_t size;
    28         void (*copy)(struct __cfaabi_ehm__base_exception_t *this,
    29                      struct __cfaabi_ehm__base_exception_t * other);
    30         void (*free)(struct __cfaabi_ehm__base_exception_t *this);
    31         const char * (*msg)(struct __cfaabi_ehm__base_exception_t *this);
     34        void (*copy)(struct __cfaehm_base_exception_t *this,
     35                     struct __cfaehm_base_exception_t * other);
     36        void (*free)(struct __cfaehm_base_exception_t *this);
     37        const char * (*msg)(struct __cfaehm_base_exception_t *this);
    3238};
    33 struct __cfaabi_ehm__base_exception_t {
    34         struct __cfaabi_ehm__base_exception_t_vtable const * virtual_table;
     39struct __cfaehm_base_exception_t {
     40        struct __cfaehm_base_exception_t_vtable const * virtual_table;
    3541};
    36 extern struct __cfaabi_ehm__base_exception_t_vtable
    37         ___cfaabi_ehm__base_exception_t_vtable_instance;
     42extern struct __cfaehm_base_exception_t_vtable
     43        ___cfaehm_base_exception_t_vtable_instance;
    3844
    3945
     46void __cfaehm_cancel_stack(exception_t * except) __attribute__((noreturn));
     47
    4048// Used in throw statement translation.
    41 void __cfaabi_ehm__throw_terminate(exception_t * except) __attribute__((noreturn));
    42 void __cfaabi_ehm__rethrow_terminate() __attribute__((noreturn));
    43 void __cfaabi_ehm__throw_resume(exception_t * except);
     49void __cfaehm_throw_terminate(exception_t * except, void (*)(exception_t *));
     50void __cfaehm_rethrow_terminate() __attribute__((noreturn));
     51void __cfaehm_throw_resume(exception_t * except, void (*)(exception_t *));
    4452
    4553// Function catches termination exceptions.
    46 void __cfaabi_ehm__try_terminate(
    47     void (*try_block)(),
    48     void (*catch_block)(int index, exception_t * except),
    49     int (*match_block)(exception_t * except));
     54void __cfaehm_try_terminate(
     55        void (*try_block)(),
     56        void (*catch_block)(int index, exception_t * except),
     57        int (*match_block)(exception_t * except));
    5058
    5159// Clean-up the exception in catch blocks.
    52 void __cfaabi_ehm__cleanup_terminate(void * except);
     60void __cfaehm_cleanup_terminate(void * except);
    5361
    5462// Data structure creates a list of resume handlers.
    55 struct __cfaabi_ehm__try_resume_node {
    56     struct __cfaabi_ehm__try_resume_node * next;
    57     _Bool (*handler)(exception_t * except);
     63struct __cfaehm_try_resume_node {
     64        struct __cfaehm_try_resume_node * next;
     65        _Bool (*handler)(exception_t * except);
    5866};
    5967
    6068// These act as constructor and destructor for the resume node.
    61 void __cfaabi_ehm__try_resume_setup(
    62     struct __cfaabi_ehm__try_resume_node * node,
    63     _Bool (*handler)(exception_t * except));
    64 void __cfaabi_ehm__try_resume_cleanup(
    65     struct __cfaabi_ehm__try_resume_node * node);
     69void __cfaehm_try_resume_setup(
     70        struct __cfaehm_try_resume_node * node,
     71        _Bool (*handler)(exception_t * except));
     72void __cfaehm_try_resume_cleanup(
     73        struct __cfaehm_try_resume_node * node);
    6674
    6775// Check for a standard way to call fake deconstructors.
    68 struct __cfaabi_ehm__cleanup_hook {};
     76struct __cfaehm_cleanup_hook {};
     77
     78#endif
     79
     80// Included in C code and the library.
     81#if !defined(__cforall) || !defined(__cforall_builtins__)
     82struct __cfaehm_node {
     83        struct _Unwind_Exception unwind_exception;
     84        struct __cfaehm_node * next;
     85        int handler_index;
     86};
     87
     88static inline exception_t * __cfaehm_cancellation_exception(
     89                struct _Unwind_Exception * unwind_exception ) {
     90        return (exception_t *)(1 + (struct __cfaehm_node *)unwind_exception);
     91}
     92#endif
    6993
    7094#ifdef __cforall
    7195}
     96
     97// Built-ins not visible in C.
     98#if defined(__cforall_builtins__)
     99
     100// Not all the built-ins can be expressed in C. These can't be
     101// implemented in the .c file either so they all have to be inline.
     102
     103trait is_exception(dtype exceptT, dtype virtualT) {
     104        /* The first field must be a pointer to a virtual table.
     105         * That virtual table must be a decendent of the base exception virtual table.
     106         */
     107        virtualT const & get_exception_vtable(exceptT *);
     108        // Always returns the virtual table for this type (associated types hack).
     109};
     110
     111trait is_termination_exception(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT)) {
     112        void defaultTerminationHandler(exceptT &);
     113};
     114
     115trait is_resumption_exception(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT)) {
     116        void defaultResumptionHandler(exceptT &);
     117};
     118
     119forall(dtype exceptT, dtype virtualT | is_termination_exception(exceptT, virtualT))
     120static inline void $throw(exceptT & except) {
     121        __cfaehm_throw_terminate(
     122                (exception_t *)&except,
     123                (void(*)(exception_t *))defaultTerminationHandler
     124        );
     125}
     126
     127forall(dtype exceptT, dtype virtualT | is_resumption_exception(exceptT, virtualT))
     128static inline void $throwResume(exceptT & except) {
     129        __cfaehm_throw_resume(
     130                (exception_t *)&except,
     131                (void(*)(exception_t *))defaultResumptionHandler
     132        );
     133}
     134
     135forall(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT))
     136static inline void cancel_stack(exceptT & except) __attribute__((noreturn)) {
     137        __cfaehm_cancel_stack( (exception_t *)&except );
     138}
     139
     140forall(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT))
     141static inline void defaultTerminationHandler(exceptT & except) {
     142        return cancel_stack( except );
     143}
     144
     145forall(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT))
     146static inline void defaultResumptionHandler(exceptT & except) {
     147        throw except;
     148}
     149
    72150#endif
     151
     152#endif
  • libcfa/src/executor.cfa

    r3c64c668 r58fe85a  
    44// buffer.
    55
    6 #include <bits/containers.hfa>
    76#include <thread.hfa>
    8 #include <stdio.h>
     7#include <containers/list.hfa>
    98
    10 forall( dtype T )
    11 monitor Buffer {                                        // unbounded buffer
    12     __queue_t( T ) queue;                               // unbounded list of work requests
    13     condition delay;
    14 }; // Buffer
    15 forall( dtype T | is_node(T) ) {
    16     void insert( Buffer( T ) & mutex buf, T * elem ) with(buf) {
    17         append( queue, elem );                          // insert element into buffer
    18         signal( delay );                                // restart
    19     } // insert
     9forall( dtype T | $dlistable(T, T) ) {
     10        monitor Buffer {                                                                        // unbounded buffer
     11                dlist( T, T ) queue;                                                    // unbounded list of work requests
     12                condition delay;
     13        }; // Buffer
    2014
    21     T * remove( Buffer( T ) & mutex buf ) with(buf) {
    22         if ( queue.head != 0 ) wait( delay );                   // no request to process ? => wait
    23 //      return pop_head( queue );
    24     } // remove
    25 } // distribution
     15        void insert( Buffer(T) & mutex buf, T * elem ) with(buf) {
     16                dlist( T, T ) * qptr = &queue;                                  // workaround https://cforall.uwaterloo.ca/trac/ticket/166
     17                insert_last( *qptr, *elem );                                    // insert element into buffer
     18                signal( delay );                                                                // restart
     19        } // insert
    2620
    27 struct WRequest {                                       // client request, no return
    28     void (* action)( void );
    29     WRequest * next;                                    // intrusive queue field
     21        T * remove( Buffer(T) & mutex buf ) with(buf) {
     22                dlist( T, T ) * qptr = &queue;                                  // workaround https://cforall.uwaterloo.ca/trac/ticket/166
     23                // if ( (*qptr)`is_empty ) wait( delay );                       // no request to process ? => wait
     24          if ( (*qptr)`is_empty ) return 0p;                            // no request to process ? => wait
     25                return &pop_first( *qptr );
     26        } // remove
     27} // forall
     28
     29struct WRequest {                                                                               // client request, no return
     30        void (* action)( void );
     31        DLISTED_MGD_IMPL_IN(WRequest)
    3032}; // WRequest
     33DLISTED_MGD_IMPL_OUT(WRequest)
    3134
    32 WRequest *& get_next( WRequest & this ) { return this.next; }
    33 void ?{}( WRequest & req ) with(req) { action = 0; next = 0; }
    34 void ?{}( WRequest & req, void (* action)( void ) ) with(req) { req.action = action; next = 0; }
     35void ?{}( WRequest & req ) with(req) { action = 0; }
     36void ?{}( WRequest & req, void (* action)( void ) ) with(req) { req.action = action; }
    3537bool stop( WRequest & req ) { return req.action == 0; }
    3638void doit( WRequest & req ) { req.action(); }
    3739
    38 // Each worker has its own work buffer to reduce contention between client and server. Hence, work requests arrive and
    39 // are distributed into buffers in a roughly round-robin order.
     40// Each worker has its own set (when requests buffers > workers) of work buffers to reduce contention between client
     41// and server, where work requests arrive and are distributed into buffers in a roughly round-robin order.
    4042
    4143thread Worker {
    42     Buffer( WRequest ) * requests;
    43     unsigned int start, range;
     44        Buffer(WRequest) * requests;
     45        WRequest * request;
     46        unsigned int start, range;
    4447}; // Worker
    4548
    4649void main( Worker & w ) with(w) {
    47     for ( int i = 0;; i = (i + 1) % range ) {
    48         WRequest * request = remove( requests[i + start] );
    49       if ( ! request ) { yield(); continue; }
    50       if ( stop( *request ) ) break;
    51         doit( *request );
    52         delete( request );
    53     } // for
     50        for ( int i = 0;; i = (i + 1) % range ) {
     51                request = remove( requests[i + start] );
     52          if ( ! request ) { yield(); continue; }
     53          if ( stop( *request ) ) break;
     54                doit( *request );
     55                delete( request );
     56        } // for
    5457} // Worker::main
    5558
    56 void ?{}( Worker & worker, cluster * wc, Buffer( WRequest ) * requests, unsigned int start, unsigned int range ) {
    57     (*get_thread(worker)){ *wc };                       // create on given cluster
    58     worker.[requests, start, range] = [requests, start, range];
     59void ?{}( Worker & worker, cluster * wc, Buffer(WRequest) * requests, unsigned int start, unsigned int range ) {
     60        ((thread &)worker){ *wc };
     61        worker.[requests, request, start, range] = [requests, 0p, start, range];
    5962} // ?{}
    6063
     64WRequest * current_request( Worker & worker ) { return worker.request; }
     65
    6166struct Executor {
    62     cluster * cluster;                                  // if workers execute on separate cluster
    63     processor ** processors;                            // array of virtual processors adding parallelism for workers
    64     Buffer( WRequest ) * requests;                      // list of work requests
    65     Worker ** workers;                                  // array of workers executing work requests
    66     unsigned int nprocessors, nworkers, nmailboxes;     // number of mailboxes/workers/processor tasks
    67     bool sepClus;                                       // use same or separate cluster for executor
     67        cluster * cluster;                                                                      // if workers execute on separate cluster
     68        processor ** processors;                                                        // array of virtual processors adding parallelism for workers
     69        Buffer(WRequest) * requests;                                            // list of work requests
     70        Worker ** workers;                                                                      // array of workers executing work requests
     71        unsigned int nprocessors, nworkers, nrqueues;           // number of processors/threads/request queues
     72        bool sepClus;                                                                           // use same or separate cluster for executor
     73        unsigned int next;                                                                      // demultiplexed across worker buffers
    6874}; // Executor
    6975
    70 static thread_local unsigned int next;                  // demultiplexed across worker buffers
    7176unsigned int tickets( Executor & ex ) with(ex) {
    72     //return uFetchAdd( next, 1 ) % nmailboxes;
    73     return next++ % nmailboxes;                         // no locking, interference randomizes
     77        //return uFetchAdd( next, 1 ) % nrqueues;
     78        return next++ % nrqueues;                                                       // no locking, interference randomizes
    7479} // tickets
    7580
    76 void ?{}( Executor & ex, unsigned int np, unsigned int nw, unsigned int nm, bool sc = false ) with(ex) {
    77     [nprocessors, nworkers, nmailboxes, sepClus] = [np, nw, nm, sc];
    78     assert( nmailboxes >= nworkers );
    79     cluster = sepClus ? new( "Executor" ) : active_cluster();
    80     processors = (processor **)anew( nprocessors );
    81     requests = anew( nmailboxes );
    82     workers = (Worker **)anew( nworkers );
     81void ?{}( Executor & ex, unsigned int np, unsigned int nw, unsigned int nr, bool sc = false ) with(ex) {
     82        [nprocessors, nworkers, nrqueues, sepClus] = [np, nw, nr, sc];
     83        assert( nrqueues >= nworkers );
     84        cluster = sepClus ? new( "Executor" ) : active_cluster();
     85        processors = aalloc( nprocessors );
     86        requests = anew( nrqueues );
     87        workers = aalloc( nworkers );
    8388
    84     for ( i; nprocessors ) {
    85         processors[ i ] = new( *cluster );
    86     } // for
     89        for ( i; nprocessors ) {
     90                processors[i] = new( *cluster );
     91        } // for
    8792
    88     unsigned int reqPerWorker = nmailboxes / nworkers, extras = nmailboxes % nworkers;
    89     for ( unsigned int i = 0, step = 0; i < nworkers; i += 1, step += reqPerWorker + ( i < extras ? 1 : 0 ) ) {
    90         workers[ i ] = new( cluster, requests, step, reqPerWorker + ( i < extras ? 1 : 0 ) );
    91     } // for
     93        unsigned int reqPerWorker = nrqueues / nworkers, extras = nrqueues % nworkers;
     94//      for ( unsigned int i = 0, start = 0, range; i < nworkers; i += 1, start += range ) {
     95    for ( i; nworkers : start; 0u ~ @ ~ range : range; ) {
     96            range = reqPerWorker + ( i < extras ? 1 : 0 );
     97                workers[i] = new( cluster, requests, start, range );
     98        } // for
    9299} // ?{}
    93100
    94101void ?{}( Executor & ex, unsigned int nprocessors, unsigned int nworkers, bool sepClus = false ) {
    95     ex{ nprocessors, nworkers, nworkers, sepClus };
     102        ex{ nprocessors, nworkers, nworkers, sepClus };
    96103}
    97104void ?{}( Executor & ex, unsigned int nprocessors, bool sepClus = false ) {
    98     ex{ nprocessors, nprocessors, nprocessors, sepClus };
     105        ex{ nprocessors, nprocessors, nprocessors, sepClus };
    99106}
    100 void ?{}( Executor & ex ) {                             // special for current cluster
    101     ex{ 0, active_cluster()->nprocessors, false };
     107void ?{}( Executor & ex ) {                                                             // special for current cluster, no processors added
     108        ex{ 0, active_cluster()->nprocessors, false };
    102109}
    103110void ^?{}( Executor & ex ) with(ex) {
    104     // Add one sentinel per worker to stop them. Since in destructor, no new work should be queued.  Cannot combine next
    105     // two loops and only have a single sentinel because workers arrive in arbitrary order, so worker1 may take the
    106     // single sentinel while waiting for worker 0 to end.
     111        // Add one sentinel per worker to stop them. Since in destructor, no new external work should be queued.  Cannot
     112        // combine next two loops and only have a single sentinel because workers arrive in arbitrary order, so worker1 may
     113        // take the single sentinel while waiting for worker 0 to end.
    107114
    108     WRequest sentinel[nworkers];
    109     unsigned int reqPerWorker = nmailboxes / nworkers;
    110     for ( unsigned int i = 0, step = 0; i < nworkers; i += 1, step += reqPerWorker ) {
    111         insert( requests[step], &sentinel[i] );         // force eventually termination
    112     } // for
    113     for ( i; nworkers ) {
    114         delete( workers[ i ] );
    115     } // for
    116     for ( i; nprocessors ) {
    117         delete( processors[ i ] );
    118     } // for
     115        WRequest sentinel[nworkers];
     116        unsigned int reqPerWorker = nrqueues / nworkers;
     117        for ( unsigned int i = 0, step = 0; i < nworkers; i += 1, step += reqPerWorker ) {
     118                insert( requests[step], &sentinel[i] );                 // force eventually termination
     119        } // for
     120        for ( i; nworkers ) {
     121                delete( workers[i] );
     122        } // for
     123        for ( i; nprocessors ) {
     124                delete( processors[i] );
     125        } // for
    119126
    120     delete( workers );
    121     delete( requests );
    122     delete( processors );
    123     if ( sepClus ) { delete( cluster ); }
     127        free( workers );
     128//      adelete( nrqueues, requests );
     129        for ( i; nrqueues ) ^?{}( requests[i] );                        // FIX ME: problem with resolver
     130        free( requests );
     131        free( processors );
     132        if ( sepClus ) { delete( cluster ); }
    124133} // ^?{}
    125134
    126135void send( Executor & ex, void (* action)( void ) ) {   // asynchronous call, no return value
    127     WRequest * node = new( action );
    128     insert( ex.requests[tickets( ex )], node );
     136        WRequest * node = new( action );
     137        insert( ex.requests[tickets( ex )], node );
    129138} // send
     139
    130140
    131141int counter = 0;
    132142
    133 void workie( void ) {
    134     __atomic_add_fetch( &counter, 1, __ATOMIC_SEQ_CST );
    135 //    fprintf( stderr, "workie\n" );
     143void work( void ) {
     144        __atomic_add_fetch( &counter, 1, __ATOMIC_SEQ_CST );
     145        // fprintf( stderr, "workie\n" );
    136146}
    137147
    138 int main() {
    139     {
    140         Executor exector;
    141         for ( i; 3000 ) {
    142             send( exector, workie );
    143             if ( i % 100 ) yield();
    144         } // for
    145     }
    146     printf( "%d\n", counter );
     148int main( int argc, char * argv[] ) {
     149        int times = 1_000_000;
     150        if ( argc == 2 ) times = atoi( argv[1] );
     151        processor p[7];
     152        {
     153                Executor exector;
     154                for ( i; times ) {
     155                        send( exector, work );
     156                        if ( i % 100 == 0 ) yield();
     157                } // for
     158        }
     159        printf( "%d\n", counter );
    147160}
    148161
    149162// Local Variables: //
     163// tab-width: 4" //
    150164// compile-command: "cfa executor.cfa" //
    151165// End: //
  • libcfa/src/fstream.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Feb  7 19:01:01 2020
    13 // Update Count     : 363
     12// Last Modified On : Fri Jun 19 16:24:54 2020
     13// Update Count     : 384
    1414//
    1515
     
    2626
    2727
    28 //*********************************** ofstream ***********************************
     28// *********************************** ofstream ***********************************
    2929
    3030
     
    123123        #ifdef __CFA_DEBUG__
    124124        if ( file == 0p ) {
    125                 abort | IO_MSG "open output file \"" | name | "\"" | nl | strerror( errno );
     125                throw (Open_Failure){ os };
     126                // abort | IO_MSG "open output file \"" | name | "\"" | nl | strerror( errno );
    126127        } // if
    127128        #endif // __CFA_DEBUG__
     
    134135
    135136void close( ofstream & os ) {
    136         if ( (FILE *)(os.$file) == stdout || (FILE *)(os.$file) == stderr ) return;
     137  if ( (FILE *)(os.$file) == 0p ) return;
     138  if ( (FILE *)(os.$file) == (FILE *)stdout || (FILE *)(os.$file) == (FILE *)stderr ) return;
    137139
    138140        if ( fclose( (FILE *)(os.$file) ) == EOF ) {
    139141                abort | IO_MSG "close output" | nl | strerror( errno );
    140142        } // if
     143        os.$file = 0p;
    141144} // close
    142145
     
    179182
    180183
    181 //*********************************** ifstream ***********************************
     184// *********************************** ifstream ***********************************
    182185
    183186
     
    219222        #ifdef __CFA_DEBUG__
    220223        if ( file == 0p ) {
    221                 abort | IO_MSG "open input file \"" | name | "\"" | nl | strerror( errno );
     224                throw (Open_Failure){ is };
     225                // abort | IO_MSG "open input file \"" | name | "\"" | nl | strerror( errno );
    222226        } // if
    223227        #endif // __CFA_DEBUG__
     
    230234
    231235void close( ifstream & is ) {
    232         if ( (FILE *)(is.$file) == stdin ) return;
     236  if ( (FILE *)(is.$file) == 0p ) return;
     237  if ( (FILE *)(is.$file) == (FILE *)stdin ) return;
    233238
    234239        if ( fclose( (FILE *)(is.$file) ) == EOF ) {
    235240                abort | IO_MSG "close input" | nl | strerror( errno );
    236241        } // if
     242        is.$file = 0p;
    237243} // close
    238244
     
    276282ifstream & sin = sinFile, & stdin = sinFile;
    277283
     284
     285// *********************************** exceptions ***********************************
     286
     287
     288void ?{}( Open_Failure & this, ofstream & ostream ) {
     289        VTABLE_INIT(this, Open_Failure);
     290        this.ostream = &ostream;
     291        this.tag = 1;
     292}
     293void ?{}( Open_Failure & this, ifstream & istream ) {
     294        VTABLE_INIT(this, Open_Failure);
     295        this.istream = &istream;
     296        this.tag = 0;
     297}
     298const char * Open_Failure_msg(Open_Failure * this) {
     299        return "Open_Failure";
     300}
     301VTABLE_INSTANCE(Open_Failure)(Open_Failure_msg);
     302void throwOpen_Failure( ofstream & ostream ) {
     303        Open_Failure exc = { ostream };
     304}
     305void throwOpen_Failure( ifstream & istream ) {
     306        Open_Failure exc = { istream };
     307}
     308
    278309// Local Variables: //
    279310// tab-width: 4 //
  • libcfa/src/fstream.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Feb 17 08:29:23 2020
    13 // Update Count     : 175
     12// Last Modified On : Fri Jun 19 16:29:17 2020
     13// Update Count     : 189
    1414//
    1515
     
    1717
    1818#include "iostream.hfa"
     19#include <exception.hfa>
    1920
    2021
    21 //*********************************** ofstream ***********************************
     22// *********************************** ofstream ***********************************
    2223
    2324
     
    7879
    7980
    80 //*********************************** ifstream ***********************************
     81// *********************************** ifstream ***********************************
    8182
    8283
     
    106107extern ifstream & sin, & stdin;                                                 // aliases
    107108
     109
     110// *********************************** exceptions ***********************************
     111
     112
     113DATA_EXCEPTION(Open_Failure)(
     114        union {
     115                ofstream * ostream;
     116                ifstream * istream;
     117        };
     118        // TEMPORARY: need polymorphic exceptions
     119        int tag;                                                                                        // 1 => ostream; 0 => istream
     120);
     121
     122void ?{}( Open_Failure & this, ofstream & ostream );
     123void ?{}( Open_Failure & this, ifstream & istream );
     124
    108125// Local Variables: //
    109126// mode: c //
  • libcfa/src/heap.cfa

    r3c64c668 r58fe85a  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // heap.c --
     7// heap.cfa --
    88//
    99// Author           : Peter A. Buhr
    1010// Created On       : Tue Dec 19 21:58:35 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 10:04:51 2020
    13 // Update Count     : 648
     12// Last Modified On : Wed Dec 16 12:28:25 2020
     13// Update Count     : 1023
    1414//
    1515
    1616#include <unistd.h>                                                                             // sbrk, sysconf
     17#include <stdlib.h>                                                                             // EXIT_FAILURE
    1718#include <stdbool.h>                                                                    // true, false
    1819#include <stdio.h>                                                                              // snprintf, fileno
    1920#include <errno.h>                                                                              // errno
    2021#include <string.h>                                                                             // memset, memcpy
    21 extern "C" {
     22#include <limits.h>                                                                             // ULONG_MAX
     23#include <malloc.h>                                                                             // memalign, malloc_usable_size
    2224#include <sys/mman.h>                                                                   // mmap, munmap
    23 } // extern "C"
    24 
    25 // #comment TD : Many of these should be merged into math I believe
    26 #include "bits/align.hfa"                                                               // libPow2
     25
     26#include "bits/align.hfa"                                                               // libAlign
    2727#include "bits/defs.hfa"                                                                // likely, unlikely
    2828#include "bits/locks.hfa"                                                               // __spinlock_t
    2929#include "startup.hfa"                                                                  // STARTUP_PRIORITY_MEMORY
    30 //#include "stdlib.hfa"                                                                 // bsearchl
    31 #include "malloc.h"
    32 
    33 #define MIN(x, y) (y > x ? x : y)
     30#include "math.hfa"                                                                             // ceiling
     31#include "bitmanip.hfa"                                                                 // is_pow2, ceiling2
    3432
    3533static bool traceHeap = false;
     
    7472        // Define the default extension heap amount in units of bytes. When the uC++ supplied heap reaches the brk address,
    7573        // the brk address is extended by the extension amount.
    76         __CFA_DEFAULT_HEAP_EXPANSION__ = (1 * 1024 * 1024),
     74        __CFA_DEFAULT_HEAP_EXPANSION__ = (10 * 1024 * 1024),
    7775
    7876        // Define the mmap crossover point during allocation. Allocations less than this amount are allocated from buckets;
     
    9189
    9290#ifdef __CFA_DEBUG__
    93 static unsigned int allocFree;                                                  // running total of allocations minus frees
     91static size_t allocUnfreed;                                                             // running total of allocations minus frees
    9492
    9593static void prtUnfreed() {
    96         if ( allocFree != 0 ) {
     94        if ( allocUnfreed != 0 ) {
    9795                // DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
    9896                char helpText[512];
    99                 int len = snprintf( helpText, sizeof(helpText), "CFA warning (UNIX pid:%ld) : program terminating with %u(0x%x) bytes of storage allocated but not freed.\n"
     97                int len = snprintf( helpText, sizeof(helpText), "CFA warning (UNIX pid:%ld) : program terminating with %zu(0x%zx) bytes of storage allocated but not freed.\n"
    10098                                                        "Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
    101                                                         (long int)getpid(), allocFree, allocFree ); // always print the UNIX pid
     99                                                        (long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid
    102100                __cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
    103101        } // if
     
    106104extern "C" {
    107105        void heapAppStart() {                                                           // called by __cfaabi_appready_startup
    108                 allocFree = 0;
     106                allocUnfreed = 0;
    109107        } // heapAppStart
    110108
     
    118116
    119117// statically allocated variables => zero filled.
    120 static size_t pageSize;                                                                 // architecture pagesize
     118size_t __page_size;                                                                             // architecture pagesize
     119int __map_prot;                                                                                 // common mmap/mprotect protection
    121120static size_t heapExpand;                                                               // sbrk advance
    122121static size_t mmapStart;                                                                // cross over point for mmap
     
    127126#define LOCKFREE 1
    128127#define BUCKETLOCK SPINLOCK
    129 #if BUCKETLOCK == LOCKFREE
    130 #include <uStackLF.h>
     128#if BUCKETLOCK == SPINLOCK
     129#elif BUCKETLOCK == LOCKFREE
     130#include <stackLockFree.hfa>
     131#else
     132        #error undefined lock type for bucket lock
    131133#endif // LOCKFREE
    132134
     
    136138
    137139struct HeapManager {
    138 //      struct FreeHeader;                                                                      // forward declaration
    139 
    140140        struct Storage {
    141141                struct Header {                                                                 // header
     
    145145                                                struct {                                                // 4-byte word => 8-byte header, 8-byte word => 16-byte header
    146146                                                        #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
    147                                                         uint32_t padding;                       // unused, force home/blocksize to overlay alignment in fake header
     147                                                        uint64_t padding;                       // unused, force home/blocksize to overlay alignment in fake header
    148148                                                        #endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
    149149
    150150                                                        union {
    151 //                                                              FreeHeader * home;              // allocated block points back to home locations (must overlay alignment)
     151                                                                // FreeHeader * home;           // allocated block points back to home locations (must overlay alignment)
     152                                                                // 2nd low-order bit => zero filled
    152153                                                                void * home;                    // allocated block points back to home locations (must overlay alignment)
    153154                                                                size_t blockSize;               // size for munmap (must overlay alignment)
    154                                                                 #if BUCKLOCK == SPINLOCK
     155                                                                #if BUCKETLOCK == SPINLOCK
    155156                                                                Storage * next;                 // freed block points next freed block of same size
    156157                                                                #endif // SPINLOCK
    157158                                                        };
     159                                                        size_t size;                            // allocation size in bytes
    158160
    159161                                                        #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
    160                                                         uint32_t padding;                       // unused, force home/blocksize to overlay alignment in fake header
     162                                                        uint64_t padding;                       // unused, force home/blocksize to overlay alignment in fake header
    161163                                                        #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
    162164                                                };
    163                                                 // future code
    164                                                 #if BUCKLOCK == LOCKFREE
    165                                                 Stack<Storage>::Link next;              // freed block points next freed block of same size (double-wide)
     165                                                #if BUCKETLOCK == LOCKFREE
     166                                                Link(Storage) next;                             // freed block points next freed block of same size (double-wide)
    166167                                                #endif // LOCKFREE
    167168                                        };
    168169                                } real; // RealHeader
     170
    169171                                struct FakeHeader {
    170172                                        #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    171                                         uint32_t alignment;                                     // low-order bits of home/blockSize used for tricks
     173                                        uint32_t alignment;                                     // 1st low-order bit => fake header & alignment
    172174                                        #endif // __ORDER_LITTLE_ENDIAN__
    173175
     
    187189
    188190        struct FreeHeader {
    189                 #if BUCKLOCK == SPINLOCK
     191                #if BUCKETLOCK == SPINLOCK
    190192                __spinlock_t lock;                                                              // must be first field for alignment
    191193                Storage * freeList;
    192                 #elif BUCKLOCK == LOCKFREE
    193                 // future code
    194                 StackLF<Storage> freeList;
    195194                #else
    196                         #error undefined lock type for bucket lock
    197                 #endif // SPINLOCK
     195                StackLF(Storage) freeList;
     196                #endif // BUCKETLOCK
    198197                size_t blockSize;                                                               // size of allocations on this list
    199198        }; // FreeHeader
     
    208207}; // HeapManager
    209208
     209#if BUCKETLOCK == LOCKFREE
     210static inline {
     211        Link(HeapManager.Storage) * ?`next( HeapManager.Storage * this ) { return &this->header.kind.real.next; }
     212        void ?{}( HeapManager.FreeHeader & ) {}
     213        void ^?{}( HeapManager.FreeHeader & ) {}
     214} // distribution
     215#endif // LOCKFREE
     216
    210217static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; }
    211218
     
    214221#define __STATISTICS__
    215222
    216 // Bucket size must be multiple of 16.
    217 // Powers of 2 are common allocation sizes, so make powers of 2 generate the minimum required size.
     223// Size of array must harmonize with NoBucketSizes and individual bucket sizes must be multiple of 16.
     224// Smaller multiples of 16 and powers of 2 are common allocation sizes, so make them generate the minimum required bucket size.
     225// malloc(0) returns 0p, so no bucket is necessary for 0 bytes returning an address that can be freed.
    218226static const unsigned int bucketSizes[] @= {                    // different bucket sizes
    219         16, 32, 48, 64 + sizeof(HeapManager.Storage), // 4
    220         96, 112, 128 + sizeof(HeapManager.Storage), // 3
     227        16 + sizeof(HeapManager.Storage), 32 + sizeof(HeapManager.Storage), 48 + sizeof(HeapManager.Storage), 64 + sizeof(HeapManager.Storage), // 4
     228        96 + sizeof(HeapManager.Storage), 112 + sizeof(HeapManager.Storage), 128 + sizeof(HeapManager.Storage), // 3
    221229        160, 192, 224, 256 + sizeof(HeapManager.Storage), // 4
    222230        320, 384, 448, 512 + sizeof(HeapManager.Storage), // 4
     
    236244};
    237245
    238 static_assert( NoBucketSizes == sizeof(bucketSizes) / sizeof(bucketSizes[0]), "size of bucket array wrong" );
     246static_assert( NoBucketSizes == sizeof(bucketSizes) / sizeof(bucketSizes[0] ), "size of bucket array wrong" );
    239247
    240248#ifdef FASTLOOKUP
     
    243251#endif // FASTLOOKUP
    244252
    245 static int mmapFd = -1;                                                                 // fake or actual fd for anonymous file
     253static const off_t mmapFd = -1;                                                 // fake or actual fd for anonymous file
    246254#ifdef __CFA_DEBUG__
    247255static bool heapBoot = 0;                                                               // detect recursion during boot
    248256#endif // __CFA_DEBUG__
     257
     258// The constructor for heapManager is called explicitly in memory_startup.
    249259static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
    250260
     
    252262#ifdef __STATISTICS__
    253263// Heap statistics counters.
     264static unsigned int malloc_calls;
     265static unsigned long long int malloc_storage;
     266static unsigned int aalloc_calls;
     267static unsigned long long int aalloc_storage;
     268static unsigned int calloc_calls;
     269static unsigned long long int calloc_storage;
     270static unsigned int memalign_calls;
     271static unsigned long long int memalign_storage;
     272static unsigned int amemalign_calls;
     273static unsigned long long int amemalign_storage;
     274static unsigned int cmemalign_calls;
     275static unsigned long long int cmemalign_storage;
     276static unsigned int resize_calls;
     277static unsigned long long int resize_storage;
     278static unsigned int realloc_calls;
     279static unsigned long long int realloc_storage;
     280static unsigned int free_calls;
     281static unsigned long long int free_storage;
     282static unsigned int mmap_calls;
    254283static unsigned long long int mmap_storage;
    255 static unsigned int mmap_calls;
     284static unsigned int munmap_calls;
    256285static unsigned long long int munmap_storage;
    257 static unsigned int munmap_calls;
     286static unsigned int sbrk_calls;
    258287static unsigned long long int sbrk_storage;
    259 static unsigned int sbrk_calls;
    260 static unsigned long long int malloc_storage;
    261 static unsigned int malloc_calls;
    262 static unsigned long long int free_storage;
    263 static unsigned int free_calls;
    264 static unsigned long long int calloc_storage;
    265 static unsigned int calloc_calls;
    266 static unsigned long long int memalign_storage;
    267 static unsigned int memalign_calls;
    268 static unsigned long long int cmemalign_storage;
    269 static unsigned int cmemalign_calls;
    270 static unsigned long long int realloc_storage;
    271 static unsigned int realloc_calls;
    272288// Statistics file descriptor (changed by malloc_stats_fd).
    273 static int statfd = STDERR_FILENO;                                              // default stderr
     289static int stat_fd = STDERR_FILENO;                                             // default stderr
    274290
    275291// Use "write" because streams may be shutdown when calls are made.
    276292static void printStats() {
    277         char helpText[512];
     293        char helpText[1024];
    278294        __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
    279295                                                                        "\nHeap statistics:\n"
    280296                                                                        "  malloc: calls %u / storage %llu\n"
     297                                                                        "  aalloc: calls %u / storage %llu\n"
    281298                                                                        "  calloc: calls %u / storage %llu\n"
    282299                                                                        "  memalign: calls %u / storage %llu\n"
     300                                                                        "  amemalign: calls %u / storage %llu\n"
    283301                                                                        "  cmemalign: calls %u / storage %llu\n"
     302                                                                        "  resize: calls %u / storage %llu\n"
    284303                                                                        "  realloc: calls %u / storage %llu\n"
    285304                                                                        "  free: calls %u / storage %llu\n"
     
    288307                                                                        "  sbrk: calls %u / storage %llu\n",
    289308                                                                        malloc_calls, malloc_storage,
     309                                                                        aalloc_calls, aalloc_storage,
    290310                                                                        calloc_calls, calloc_storage,
    291311                                                                        memalign_calls, memalign_storage,
     312                                                                        amemalign_calls, amemalign_storage,
    292313                                                                        cmemalign_calls, cmemalign_storage,
     314                                                                        resize_calls, resize_storage,
    293315                                                                        realloc_calls, realloc_storage,
    294316                                                                        free_calls, free_storage,
     
    300322
    301323static int printStatsXML( FILE * stream ) {                             // see malloc_info
    302         char helpText[512];
     324        char helpText[1024];
    303325        int len = snprintf( helpText, sizeof(helpText),
    304326                                                "<malloc version=\"1\">\n"
     
    307329                                                "</sizes>\n"
    308330                                                "<total type=\"malloc\" count=\"%u\" size=\"%llu\"/>\n"
     331                                                "<total type=\"aalloc\" count=\"%u\" size=\"%llu\"/>\n"
    309332                                                "<total type=\"calloc\" count=\"%u\" size=\"%llu\"/>\n"
    310333                                                "<total type=\"memalign\" count=\"%u\" size=\"%llu\"/>\n"
     334                                                "<total type=\"amemalign\" count=\"%u\" size=\"%llu\"/>\n"
    311335                                                "<total type=\"cmemalign\" count=\"%u\" size=\"%llu\"/>\n"
     336                                                "<total type=\"resize\" count=\"%u\" size=\"%llu\"/>\n"
    312337                                                "<total type=\"realloc\" count=\"%u\" size=\"%llu\"/>\n"
    313338                                                "<total type=\"free\" count=\"%u\" size=\"%llu\"/>\n"
     
    317342                                                "</malloc>",
    318343                                                malloc_calls, malloc_storage,
     344                                                aalloc_calls, aalloc_storage,
    319345                                                calloc_calls, calloc_storage,
    320346                                                memalign_calls, memalign_storage,
     347                                                amemalign_calls, amemalign_storage,
    321348                                                cmemalign_calls, cmemalign_storage,
     349                                                resize_calls, resize_storage,
    322350                                                realloc_calls, realloc_storage,
    323351                                                free_calls, free_storage,
     
    332360
    333361
    334 // static inline void noMemory() {
    335 //      abort( "Heap memory exhausted at %zu bytes.\n"
    336 //                 "Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
    337 //                 ((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) );
    338 // } // noMemory
    339 
    340 
    341 static inline void checkAlign( size_t alignment ) {
    342         if ( alignment < libAlign() || ! libPow2( alignment ) ) {
    343                 abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
    344         } // if
    345 } // checkAlign
    346 
    347 
    348 static inline bool setHeapExpand( size_t value ) {
    349   if ( heapExpand < pageSize ) return true;
    350         heapExpand = value;
    351         return false;
    352 } // setHeapExpand
    353 
    354 
    355362// thunk problem
    356363size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
     
    369376
    370377static inline bool setMmapStart( size_t value ) {               // true => mmapped, false => sbrk
    371   if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
     378  if ( value < __page_size || bucketSizes[NoBucketSizes - 1] < value ) return false;
    372379        mmapStart = value;                                                                      // set global
    373380
     
    376383        assert( maxBucketsUsed < NoBucketSizes );                       // subscript failure ?
    377384        assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
    378         return false;
     385        return true;
    379386} // setMmapStart
     387
     388
     389// <-------+----------------------------------------------------> bsize (bucket size)
     390// |header |addr
     391//==================================================================================
     392//                   align/offset |
     393// <-----------------<------------+-----------------------------> bsize (bucket size)
     394//                   |fake-header | addr
     395#define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
     396#define realHeader( header ) ((HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset))
     397
     398// <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
     399// |header |addr
     400//==================================================================================
     401//                   align/offset |
     402// <------------------------------<<---------- dsize --------->>> bsize (bucket size)
     403//                   |fake-header |addr
     404#define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
     405
     406
     407static inline void checkAlign( size_t alignment ) {
     408        if ( alignment < libAlign() || ! is_pow2( alignment ) ) {
     409                abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
     410        } // if
     411} // checkAlign
    380412
    381413
     
    391423static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & alignment ) {
    392424        if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
    393                 size_t offset = header->kind.fake.offset;
    394425                alignment = header->kind.fake.alignment & -2;   // remove flag from value
    395426                #ifdef __CFA_DEBUG__
    396427                checkAlign( alignment );                                                // check alignment
    397428                #endif // __CFA_DEBUG__
    398                 header = (HeapManager.Storage.Header *)((char *)header - offset);
     429                header = realHeader( header );                                  // backup from fake to real header
     430        } else {
     431                alignment = libAlign();                                                 // => no fake header
    399432        } // if
    400433} // fakeHeader
    401434
    402435
    403 // <-------+----------------------------------------------------> bsize (bucket size)
    404 // |header |addr
    405 //==================================================================================
    406 //                                | alignment
    407 // <-----------------<------------+-----------------------------> bsize (bucket size)
    408 //                   |fake-header | addr
    409 #define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
    410 
    411 // <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
    412 // |header |addr
    413 //==================================================================================
    414 //                                | alignment
    415 // <------------------------------<<---------- dsize --------->>> bsize (bucket size)
    416 //                   |fake-header |addr
    417 #define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
    418 
    419 
    420 static inline bool headers( const char name[] __attribute__(( unused )), void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem, size_t & size, size_t & alignment ) with ( heapManager ) {
     436static inline bool headers( const char name[] __attribute__(( unused )), void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem,
     437                                                        size_t & size, size_t & alignment ) with( heapManager ) {
    421438        header = headerAddr( addr );
    422439
    423         if ( unlikely( heapEnd < addr ) ) {                                     // mmapped ?
     440  if ( unlikely( addr < heapBegin || heapEnd < addr ) ) { // mmapped ?
    424441                fakeHeader( header, alignment );
    425442                size = header->kind.real.blockSize & -3;                // mmap size
     
    428445
    429446        #ifdef __CFA_DEBUG__
    430         checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
     447        checkHeader( header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
    431448        #endif // __CFA_DEBUG__
    432449
     
    449466} // headers
    450467
    451 
    452 static inline void * extend( size_t size ) with ( heapManager ) {
     468#ifdef __CFA_DEBUG__
     469#if __SIZEOF_POINTER__ == 4
     470#define MASK 0xdeadbeef
     471#else
     472#define MASK 0xdeadbeefdeadbeef
     473#endif
     474#define STRIDE size_t
     475
     476static void * Memset( void * addr, STRIDE size ) {              // debug only
     477        if ( size % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, size %zd not multiple of %zd.", size, sizeof(STRIDE) );
     478        if ( (STRIDE)addr % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, addr %p not multiple of %zd.", addr, sizeof(STRIDE) );
     479
     480        STRIDE * end = (STRIDE *)addr + size / sizeof(STRIDE);
     481        for ( STRIDE * p = (STRIDE *)addr; p < end; p += 1 ) *p = MASK;
     482        return addr;
     483} // Memset
     484#endif // __CFA_DEBUG__
     485
     486
     487#define NO_MEMORY_MSG "insufficient heap memory available for allocating %zd new bytes."
     488
     489static inline void * extend( size_t size ) with( heapManager ) {
    453490        lock( extlock __cfaabi_dbg_ctx2 );
    454491        ptrdiff_t rem = heapRemaining - size;
     
    456493                // If the size requested is bigger than the current remaining storage, increase the size of the heap.
    457494
    458                 size_t increase = libCeiling( size > heapExpand ? size : heapExpand, libAlign() );
    459                 if ( sbrk( increase ) == (void *)-1 ) {
     495                size_t increase = ceiling2( size > heapExpand ? size : heapExpand, __page_size );
     496                // Do not call abort or strerror( errno ) as they may call malloc.
     497                if ( sbrk( increase ) == (void *)-1 ) {                 // failed, no memory ?
    460498                        unlock( extlock );
    461                         errno = ENOMEM;
    462                         return 0p;
     499                        __cfaabi_bits_print_nolock( STDERR_FILENO, NO_MEMORY_MSG, size );
     500                        _exit( EXIT_FAILURE );
     501                } // if
     502                if ( mprotect( (char *)heapEnd + heapRemaining, increase, __map_prot ) ) {
     503                        unlock( extlock );
     504                        __cfaabi_bits_print_nolock( STDERR_FILENO, "extend() : internal error, mprotect failure, heapEnd:%p size:%zd, errno:%d.\n", heapEnd, increase, errno );
     505                        _exit( EXIT_FAILURE );
    463506                } // if
    464507                #ifdef __STATISTICS__
     
    468511                #ifdef __CFA_DEBUG__
    469512                // Set new memory to garbage so subsequent uninitialized usages might fail.
    470                 memset( (char *)heapEnd + heapRemaining, '\377', increase );
     513                memset( (char *)heapEnd + heapRemaining, '\xde', increase );
     514                //Memset( (char *)heapEnd + heapRemaining, increase );
    471515                #endif // __CFA_DEBUG__
    472516                rem = heapRemaining + increase - size;
     
    481525
    482526
    483 static inline void * doMalloc( size_t size ) with ( heapManager ) {
     527static inline void * doMalloc( size_t size ) with( heapManager ) {
    484528        HeapManager.Storage * block;                                            // pointer to new block of storage
    485529
     
    487531        // along with the block and is a multiple of the alignment size.
    488532
    489   if ( unlikely( size > ~0ul - sizeof(HeapManager.Storage) ) ) return 0p;
     533  if ( unlikely( size > ULONG_MAX - sizeof(HeapManager.Storage) ) ) return 0p;
    490534        size_t tsize = size + sizeof(HeapManager.Storage);
    491535        if ( likely( tsize < mmapStart ) ) {                            // small size => sbrk
     
    497541                        posn = Bsearchl( (unsigned int)tsize, bucketSizes, (size_t)maxBucketsUsed );
    498542                HeapManager.FreeHeader * freeElem = &freeLists[posn];
    499                 // #ifdef FASTLOOKUP
    500                 // if ( tsize < LookupSizes )
    501                 //      freeElem = &freeLists[lookup[tsize]];
    502                 // else
    503                 // #endif // FASTLOOKUP
    504                 //      freeElem = bsearchl( tsize, freeLists, (size_t)maxBucketsUsed ); // binary search
    505                 // HeapManager.FreeHeader * freeElem =
    506                 //      #ifdef FASTLOOKUP
    507                 //      tsize < LookupSizes ? &freeLists[lookup[tsize]] :
    508                 //      #endif // FASTLOOKUP
    509                 //      bsearchl( tsize, freeLists, (size_t)maxBucketsUsed ); // binary search
    510                 assert( freeElem <= &freeLists[maxBucketsUsed] ); // subscripting error ?
    511                 assert( tsize <= freeElem->blockSize );                 // search failure ?
     543                verify( freeElem <= &freeLists[maxBucketsUsed] ); // subscripting error ?
     544                verify( tsize <= freeElem->blockSize );                 // search failure ?
    512545                tsize = freeElem->blockSize;                                    // total space needed for request
    513546
    514547                // Spin until the lock is acquired for this particular size of block.
    515548
    516                 #if defined( SPINLOCK )
     549                #if BUCKETLOCK == SPINLOCK
    517550                lock( freeElem->lock __cfaabi_dbg_ctx2 );
    518551                block = freeElem->freeList;                                             // remove node from stack
    519552                #else
    520                 block = freeElem->freeList.pop();
    521                 #endif // SPINLOCK
     553                block = pop( freeElem->freeList );
     554                #endif // BUCKETLOCK
    522555                if ( unlikely( block == 0p ) ) {                                // no free block ?
    523                         #if defined( SPINLOCK )
     556                        #if BUCKETLOCK == SPINLOCK
    524557                        unlock( freeElem->lock );
    525                         #endif // SPINLOCK
     558                        #endif // BUCKETLOCK
    526559
    527560                        // Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more
     
    529562
    530563                        block = (HeapManager.Storage *)extend( tsize ); // mutual exclusion on call
    531   if ( unlikely( block == 0p ) ) return 0p;
    532                 #if defined( SPINLOCK )
     564                #if BUCKETLOCK == SPINLOCK
    533565                } else {
    534566                        freeElem->freeList = block->header.kind.real.next;
    535567                        unlock( freeElem->lock );
    536                 #endif // SPINLOCK
     568                #endif // BUCKETLOCK
    537569                } // if
    538570
    539571                block->header.kind.real.home = freeElem;                // pointer back to free list of apropriate size
    540572        } else {                                                                                        // large size => mmap
    541   if ( unlikely( size > ~0ul - pageSize ) ) return 0p;
    542                 tsize = libCeiling( tsize, pageSize );                  // must be multiple of page size
     573  if ( unlikely( size > ULONG_MAX - __page_size ) ) return 0p;
     574                tsize = ceiling2( tsize, __page_size );                 // must be multiple of page size
    543575                #ifdef __STATISTICS__
    544576                __atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST );
    545577                __atomic_add_fetch( &mmap_storage, tsize, __ATOMIC_SEQ_CST );
    546578                #endif // __STATISTICS__
    547                 block = (HeapManager.Storage *)mmap( 0, tsize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 );
    548                 if ( block == (HeapManager.Storage *)MAP_FAILED ) {
     579
     580                block = (HeapManager.Storage *)mmap( 0, tsize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 );
     581                if ( block == (HeapManager.Storage *)MAP_FAILED ) { // failed ?
     582                        if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, tsize ); // no memory
    549583                        // Do not call strerror( errno ) as it may call malloc.
    550                         abort( "(HeapManager &)0x%p.doMalloc() : internal error, mmap failure, size:%zu error:%d.", &heapManager, tsize, errno );
    551                 } // if
     584                        abort( "(HeapManager &)0x%p.doMalloc() : internal error, mmap failure, size:%zu errno:%d.", &heapManager, tsize, errno );
     585                } //if
    552586                #ifdef __CFA_DEBUG__
    553587                // Set new memory to garbage so subsequent uninitialized usages might fail.
    554                 memset( block, '\377', tsize );
     588                memset( block, '\xde', tsize );
     589                //Memset( block, tsize );
    555590                #endif // __CFA_DEBUG__
    556591                block->header.kind.real.blockSize = tsize;              // storage size for munmap
    557592        } // if
    558593
     594        block->header.kind.real.size = size;                            // store allocation size
    559595        void * addr = &(block->data);                                           // adjust off header to user bytes
     596        verify( ((uintptr_t)addr & (libAlign() - 1)) == 0 ); // minimum alignment ?
    560597
    561598        #ifdef __CFA_DEBUG__
    562         assert( ((uintptr_t)addr & (libAlign() - 1)) == 0 ); // minimum alignment ?
    563         __atomic_add_fetch( &allocFree, tsize, __ATOMIC_SEQ_CST );
     599        __atomic_add_fetch( &allocUnfreed, tsize, __ATOMIC_SEQ_CST );
    564600        if ( traceHeap() ) {
    565601                enum { BufferSize = 64 };
    566602                char helpText[BufferSize];
    567603                int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", addr, size, tsize );
    568                 // int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", addr, size );
    569604                __cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
    570605        } // if
     
    575610
    576611
    577 static inline void doFree( void * addr ) with ( heapManager ) {
     612static inline void doFree( void * addr ) with( heapManager ) {
    578613        #ifdef __CFA_DEBUG__
    579614        if ( unlikely( heapManager.heapBegin == 0p ) ) {
     
    592627                #endif // __STATISTICS__
    593628                if ( munmap( header, size ) == -1 ) {
    594                         #ifdef __CFA_DEBUG__
    595629                        abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n"
    596630                                   "Possible cause is invalid pointer.",
    597631                                   addr );
    598                         #endif // __CFA_DEBUG__
    599632                } // if
    600633        } else {
    601634                #ifdef __CFA_DEBUG__
    602635                // Set free memory to garbage so subsequent usages might fail.
    603                 memset( ((HeapManager.Storage *)header)->data, '\377', freeElem->blockSize - sizeof( HeapManager.Storage ) );
     636                memset( ((HeapManager.Storage *)header)->data, '\xde', freeElem->blockSize - sizeof( HeapManager.Storage ) );
     637                //Memset( ((HeapManager.Storage *)header)->data, freeElem->blockSize - sizeof( HeapManager.Storage ) );
    604638                #endif // __CFA_DEBUG__
    605639
     
    607641                free_storage += size;
    608642                #endif // __STATISTICS__
    609                 #if defined( SPINLOCK )
     643                #if BUCKETLOCK == SPINLOCK
    610644                lock( freeElem->lock __cfaabi_dbg_ctx2 );               // acquire spin lock
    611645                header->kind.real.next = freeElem->freeList;    // push on stack
     
    613647                unlock( freeElem->lock );                                               // release spin lock
    614648                #else
    615                 freeElem->freeList.push( *(HeapManager.Storage *)header );
    616                 #endif // SPINLOCK
     649                push( freeElem->freeList, *(HeapManager.Storage *)header );
     650                #endif // BUCKETLOCK
    617651        } // if
    618652
    619653        #ifdef __CFA_DEBUG__
    620         __atomic_add_fetch( &allocFree, -size, __ATOMIC_SEQ_CST );
     654        __atomic_add_fetch( &allocUnfreed, -size, __ATOMIC_SEQ_CST );
    621655        if ( traceHeap() ) {
    622                 enum { BufferSize = 64 };
    623                 char helpText[BufferSize];
     656                char helpText[64];
    624657                int len = snprintf( helpText, sizeof(helpText), "Free( %p ) size:%zu\n", addr, size );
    625658                __cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
     
    629662
    630663
    631 size_t prtFree( HeapManager & manager ) with ( manager ) {
     664size_t prtFree( HeapManager & manager ) with( manager ) {
    632665        size_t total = 0;
    633666        #ifdef __STATISTICS__
     
    641674                #endif // __STATISTICS__
    642675
    643                 #if defined( SPINLOCK )
     676                #if BUCKETLOCK == SPINLOCK
    644677                for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0p; p = p->header.kind.real.next ) {
    645678                #else
    646                 for ( HeapManager.Storage * p = freeLists[i].freeList.top(); p != 0p; p = p->header.kind.real.next.top ) {
    647                 #endif // SPINLOCK
     679                        for(;;) {
     680//              for ( HeapManager.Storage * p = top( freeLists[i].freeList ); p != 0p; p = (p)`next->top ) {
     681//              for ( HeapManager.Storage * p = top( freeLists[i].freeList ); p != 0p; /* p = getNext( p )->top */) {
     682//                      HeapManager.Storage * temp = p->header.kind.real.next.top; // FIX ME: direct assignent fails, initialization works`
     683//                      typeof(p) temp = (( p )`next)->top;                     // FIX ME: direct assignent fails, initialization works`
     684//                      p = temp;
     685                #endif // BUCKETLOCK
    648686                        total += size;
    649687                        #ifdef __STATISTICS__
     
    665703
    666704
    667 static void ?{}( HeapManager & manager ) with ( manager ) {
    668         pageSize = sysconf( _SC_PAGESIZE );
     705static void ?{}( HeapManager & manager ) with( manager ) {
     706        __page_size = sysconf( _SC_PAGESIZE );
     707        __map_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
    669708
    670709        for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
     
    680719        #endif // FASTLOOKUP
    681720
    682         if ( setMmapStart( default_mmap_start() ) ) {
     721        if ( ! setMmapStart( default_mmap_start() ) ) {
    683722                abort( "HeapManager : internal error, mmap start initialization failure." );
    684723        } // if
     
    686725
    687726        char * end = (char *)sbrk( 0 );
    688         sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
    689         heapBegin = heapEnd = sbrk( 0 );                                        // get new start point
     727        heapBegin = heapEnd = sbrk( (char *)ceiling2( (long unsigned int)end, __page_size ) - end ); // move start of heap to multiple of alignment
    690728} // HeapManager
    691729
     
    695733        if ( traceHeapTerm() ) {
    696734                printStats();
    697                 // if ( prtfree() ) prtFree( heapManager, true );
     735                // prtUnfreed() called in heapAppStop()
    698736        } // if
    699737        #endif // __STATISTICS__
     
    704742void memory_startup( void ) {
    705743        #ifdef __CFA_DEBUG__
    706         if ( unlikely( heapBoot ) ) {                                           // check for recursion during system boot
    707                 // DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
     744        if ( heapBoot ) {                                                                       // check for recursion during system boot
    708745                abort( "boot() : internal error, recursively invoked during system boot." );
    709746        } // if
     
    711748        #endif // __CFA_DEBUG__
    712749
    713         //assert( heapManager.heapBegin != 0 );
     750        //verify( heapManager.heapBegin != 0 );
    714751        //heapManager{};
    715         if ( heapManager.heapBegin == 0p ) heapManager{};
     752        if ( heapManager.heapBegin == 0p ) heapManager{};       // sanity check
    716753} // memory_startup
    717754
     
    723760
    724761static inline void * mallocNoStats( size_t size ) {             // necessary for malloc statistics
    725         //assert( heapManager.heapBegin != 0 );
    726         if ( unlikely( heapManager.heapBegin == 0p ) ) heapManager{}; // called before memory_startup ?
    727         void * addr = doMalloc( size );
    728         if ( unlikely( addr == 0p ) ) errno = ENOMEM;           // POSIX
    729         return addr;
     762        verify( heapManager.heapBegin != 0p );                          // called before memory_startup ?
     763  if ( unlikely( size ) == 0 ) return 0p;                               // 0 BYTE ALLOCATION RETURNS NULL POINTER
     764
     765#if __SIZEOF_POINTER__ == 8
     766        verify( size < ((typeof(size_t))1 << 48) );
     767#endif // __SIZEOF_POINTER__ == 8
     768        return doMalloc( size );
    730769} // mallocNoStats
    731770
    732771
    733 static inline void * callocNoStats( size_t noOfElems, size_t elemSize ) {
    734         size_t size = noOfElems * elemSize;
     772static inline void * callocNoStats( size_t dim, size_t elemSize ) {
     773        size_t size = dim * elemSize;
     774  if ( unlikely( size ) == 0 ) return 0p;                               // 0 BYTE ALLOCATION RETURNS NULL POINTER
    735775        char * addr = (char *)mallocNoStats( size );
    736   if ( unlikely( addr == 0p ) ) return 0p;
    737776
    738777        HeapManager.Storage.Header * header;
    739778        HeapManager.FreeHeader * freeElem;
    740779        size_t bsize, alignment;
    741         bool mapped __attribute__(( unused )) = headers( "calloc", addr, header, freeElem, bsize, alignment );
    742780        #ifndef __CFA_DEBUG__
     781        bool mapped =
     782        #endif // __CFA_DEBUG__
     783                headers( "calloc", addr, header, freeElem, bsize, alignment );
     784        #ifndef __CFA_DEBUG__
     785
    743786        // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
    744787        if ( ! mapped )
    745788        #endif // __CFA_DEBUG__
    746                 // Zero entire data space even when > than size => realloc without a new allocation and zero fill works.
    747                 // <-------00000000000000000000000000000000000000000000000000000> bsize (bucket size)
     789                // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
    748790                // `-header`-addr                      `-size
    749                 memset( addr, '\0', bsize - sizeof(HeapManager.Storage) ); // set to zeros
     791                memset( addr, '\0', size );                                             // set to zeros
    750792
    751793        header->kind.real.blockSize |= 2;                                       // mark as zero filled
     
    754796
    755797
    756 static inline void * memalignNoStats( size_t alignment, size_t size ) { // necessary for malloc statistics
     798static inline void * memalignNoStats( size_t alignment, size_t size ) {
     799  if ( unlikely( size ) == 0 ) return 0p;                               // 0 BYTE ALLOCATION RETURNS NULL POINTER
     800
    757801        #ifdef __CFA_DEBUG__
    758802        checkAlign( alignment );                                                        // check alignment
     
    772816        // add sizeof(Storage) for fake header
    773817        char * addr = (char *)mallocNoStats( size + alignment - libAlign() + sizeof(HeapManager.Storage) );
    774   if ( unlikely( addr == 0p ) ) return addr;
    775818
    776819        // address in the block of the "next" alignment address
    777         char * user = (char *)libCeiling( (uintptr_t)(addr + sizeof(HeapManager.Storage)), alignment );
     820        char * user = (char *)ceiling2( (uintptr_t)(addr + sizeof(HeapManager.Storage)), alignment );
    778821
    779822        // address of header from malloc
    780823        HeapManager.Storage.Header * realHeader = headerAddr( addr );
     824        realHeader->kind.real.size = size;                                      // correct size to eliminate above alignment offset
    781825        // address of fake header * before* the alignment location
    782826        HeapManager.Storage.Header * fakeHeader = headerAddr( user );
     
    790834
    791835
    792 static inline void * cmemalignNoStats( size_t alignment, size_t noOfElems, size_t elemSize ) {
    793         size_t size = noOfElems * elemSize;
     836static inline void * cmemalignNoStats( size_t alignment, size_t dim, size_t elemSize ) {
     837        size_t size = dim * elemSize;
     838  if ( unlikely( size ) == 0 ) return 0p;                               // 0 BYTE ALLOCATION RETURNS NULL POINTER
    794839        char * addr = (char *)memalignNoStats( alignment, size );
    795   if ( unlikely( addr == 0p ) ) return 0p;
     840
    796841        HeapManager.Storage.Header * header;
    797842        HeapManager.FreeHeader * freeElem;
    798843        size_t bsize;
    799         bool mapped __attribute__(( unused )) = headers( "cmemalign", addr, header, freeElem, bsize, alignment );
    800844        #ifndef __CFA_DEBUG__
     845        bool mapped =
     846        #endif // __CFA_DEBUG__
     847                headers( "cmemalign", addr, header, freeElem, bsize, alignment );
     848
    801849        // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
     850        #ifndef __CFA_DEBUG__
    802851        if ( ! mapped )
    803852        #endif // __CFA_DEBUG__
    804                 memset( addr, '\0', dataStorage( bsize, addr, header ) ); // set to zeros
    805         header->kind.real.blockSize |= 2;                               // mark as zero filled
    806 
     853                // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
     854                // `-header`-addr                      `-size
     855                memset( addr, '\0', size );                                             // set to zeros
     856
     857        header->kind.real.blockSize |= 2;                                       // mark as zero filled
    807858        return addr;
    808859} // cmemalignNoStats
    809860
    810861
    811 // supported mallopt options
    812 #ifndef M_MMAP_THRESHOLD
    813 #define M_MMAP_THRESHOLD (-1)
    814 #endif // M_TOP_PAD
    815 #ifndef M_TOP_PAD
    816 #define M_TOP_PAD (-2)
    817 #endif // M_TOP_PAD
    818 
    819 
    820862extern "C" {
    821         // The malloc() function allocates size bytes and returns a pointer to the allocated memory. The memory is not
    822         // initialized. If size is 0, then malloc() returns either 0p, or a unique pointer value that can later be
    823         // successfully passed to free().
     863        // Allocates size bytes and returns a pointer to the allocated memory.  The contents are undefined. If size is 0,
     864        // then malloc() returns a unique pointer value that can later be successfully passed to free().
    824865        void * malloc( size_t size ) {
    825866                #ifdef __STATISTICS__
     
    831872        } // malloc
    832873
    833         // The calloc() function allocates memory for an array of nmemb elements of size bytes each and returns a pointer to
    834         // the allocated memory. The memory is set to zero. If nmemb or size is 0, then calloc() returns either 0p, or a
    835         // unique pointer value that can later be successfully passed to free().
    836         void * calloc( size_t noOfElems, size_t elemSize ) {
     874
     875        // Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
     876        void * aalloc( size_t dim, size_t elemSize ) {
     877                size_t size = dim * elemSize;
     878                #ifdef __STATISTICS__
     879                __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST );
     880                __atomic_add_fetch( &aalloc_storage, size, __ATOMIC_SEQ_CST );
     881                #endif // __STATISTICS__
     882
     883                return mallocNoStats( size );
     884        } // aalloc
     885
     886
     887        // Same as aalloc() with memory set to zero.
     888        void * calloc( size_t dim, size_t elemSize ) {
    837889                #ifdef __STATISTICS__
    838890                __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
    839                 __atomic_add_fetch( &calloc_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
    840                 #endif // __STATISTICS__
    841 
    842                 return callocNoStats( noOfElems, elemSize );
     891                __atomic_add_fetch( &calloc_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     892                #endif // __STATISTICS__
     893
     894                return callocNoStats( dim, elemSize );
    843895        } // calloc
    844896
    845         // The realloc() function changes the size of the memory block pointed to by ptr to size bytes. The contents will be
    846         // unchanged in the range from the start of the region up to the minimum of the old and new sizes. If the new size
    847         // is larger than the old size, the added memory will not be initialized.  If ptr is 0p, then the call is
    848         // equivalent to malloc(size), for all values of size; if size is equal to zero, and ptr is not 0p, then the call
    849         // is equivalent to free(ptr). Unless ptr is 0p, it must have been returned by an earlier call to malloc(),
    850         // calloc() or realloc(). If the area pointed to was moved, a free(ptr) is done.
    851         void * realloc( void * oaddr, size_t size ) {
    852                 #ifdef __STATISTICS__
    853                 __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
     897
     898        // Change the size of the memory block pointed to by oaddr to size bytes. The contents are undefined.  If oaddr is
     899        // 0p, then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and oaddr is
     900        // not 0p, then the call is equivalent to free(oaddr). Unless oaddr is 0p, it must have been returned by an earlier
     901        // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
     902        void * resize( void * oaddr, size_t size ) {
     903                #ifdef __STATISTICS__
     904                __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
    854905                #endif // __STATISTICS__
    855906
    856907                // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
    857           if ( unlikely( size == 0 ) ) { free( oaddr ); return mallocNoStats( size ); } // special cases
    858           if ( unlikely( oaddr == 0p ) ) return mallocNoStats( size );
     908          if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases
     909          if ( unlikely( oaddr == 0p ) ) {
     910                        #ifdef __STATISTICS__
     911                        __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
     912                        #endif // __STATISTICS__
     913                        return mallocNoStats( size );
     914                } // if
    859915
    860916                HeapManager.Storage.Header * header;
    861917                HeapManager.FreeHeader * freeElem;
    862                 size_t bsize, oalign = 0;
     918                size_t bsize, oalign;
     919                headers( "resize", oaddr, header, freeElem, bsize, oalign );
     920                size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
     921
     922                // same size, DO NOT preserve STICKY PROPERTIES.
     923                if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
     924                        header->kind.real.blockSize &= -2;                      // no alignment and turn off 0 fill
     925                        header->kind.real.size = size;                          // reset allocation size
     926                        return oaddr;
     927                } // if
     928
     929                #ifdef __STATISTICS__
     930                __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
     931                #endif // __STATISTICS__
     932
     933                // change size, DO NOT preserve STICKY PROPERTIES.
     934                free( oaddr );
     935                return mallocNoStats( size );                                   // create new area
     936        } // resize
     937
     938
     939        // Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of
     940        // the old and new sizes.
     941        void * realloc( void * oaddr, size_t size ) {
     942                #ifdef __STATISTICS__
     943                __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
     944                #endif // __STATISTICS__
     945
     946                // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
     947          if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases
     948          if ( unlikely( oaddr == 0p ) ) {
     949                        #ifdef __STATISTICS__
     950                        __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
     951                        #endif // __STATISTICS__
     952                        return mallocNoStats( size );
     953                } // if
     954
     955                HeapManager.Storage.Header * header;
     956                HeapManager.FreeHeader * freeElem;
     957                size_t bsize, oalign;
    863958                headers( "realloc", oaddr, header, freeElem, bsize, oalign );
    864959
    865960                size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
    866           if ( size <= odsize && odsize <= size * 2 ) { // allow up to 50% wasted storage in smaller size
    867                         // Do not know size of original allocation => cannot do 0 fill for any additional space because do not know
    868                         // where to start filling, i.e., do not overwrite existing values in space.
    869                         //
    870                         // This case does not result in a new profiler entry because the previous one still exists and it must match with
    871                         // the free for this memory.  Hence, this realloc does not appear in the profiler output.
     961                size_t osize = header->kind.real.size;                  // old allocation size
     962                bool ozfill = (header->kind.real.blockSize & 2); // old allocation zero filled
     963          if ( unlikely( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage
     964                        header->kind.real.size = size;                          // reset allocation size
     965                        if ( unlikely( ozfill ) && size > osize ) {     // previous request zero fill and larger ?
     966                                memset( (char *)oaddr + osize, '\0', size - osize ); // initialize added storage
     967                        } // if
    872968                        return oaddr;
    873969                } // if
    874970
    875971                #ifdef __STATISTICS__
    876                 __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
     972                __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
    877973                #endif // __STATISTICS__
    878974
     
    880976
    881977                void * naddr;
    882                 if ( unlikely( oalign != 0 ) ) {                                // previous request memalign?
    883                         if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill
    884                                 naddr = cmemalignNoStats( oalign, 1, size ); // create new aligned area
    885                         } else {
    886                                 naddr = memalignNoStats( oalign, size ); // create new aligned area
     978                if ( likely( oalign == libAlign() ) ) {                 // previous request not aligned ?
     979                        naddr = mallocNoStats( size );                          // create new area
     980                } else {
     981                        naddr = memalignNoStats( oalign, size );        // create new aligned area
     982                } // if
     983
     984                headers( "realloc", naddr, header, freeElem, bsize, oalign );
     985                memcpy( naddr, oaddr, min( osize, size ) );             // copy bytes
     986                free( oaddr );
     987
     988                if ( unlikely( ozfill ) ) {                                             // previous request zero fill ?
     989                        header->kind.real.blockSize |= 2;                       // mark new request as zero filled
     990                        if ( size > osize ) {                                           // previous request larger ?
     991                                memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
    887992                        } // if
    888                 } else {
    889                         if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill
    890                                 naddr = callocNoStats( 1, size );               // create new area
    891                         } else {
    892                                 naddr = mallocNoStats( size );                  // create new area
    893                         } // if
    894                 } // if
    895           if ( unlikely( naddr == 0p ) ) return 0p;
    896 
    897                 headers( "realloc", naddr, header, freeElem, bsize, oalign );
    898                 size_t ndsize = dataStorage( bsize, naddr, header ); // data storage avilable in bucket
    899                 // To preserve prior fill, the entire bucket must be copied versus the size.
    900                 memcpy( naddr, oaddr, MIN( odsize, ndsize ) );  // copy bytes
    901                 free( oaddr );
     993                } // if
    902994                return naddr;
    903995        } // realloc
    904996
    905         // The obsolete function memalign() allocates size bytes and returns a pointer to the allocated memory. The memory
    906         // address will be a multiple of alignment, which must be a power of two.
     997
     998        // Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
    907999        void * memalign( size_t alignment, size_t size ) {
    9081000                #ifdef __STATISTICS__
     
    9151007
    9161008
    917         // The cmemalign() function is the same as calloc() with memory alignment.
    918         void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
     1009        // Same as aalloc() with memory alignment.
     1010        void * amemalign( size_t alignment, size_t dim, size_t elemSize ) {
     1011                size_t size = dim * elemSize;
    9191012                #ifdef __STATISTICS__
    9201013                __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
    921                 __atomic_add_fetch( &cmemalign_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
    922                 #endif // __STATISTICS__
    923 
    924                 return cmemalignNoStats( alignment, noOfElems, elemSize );
     1014                __atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST );
     1015                #endif // __STATISTICS__
     1016
     1017                return memalignNoStats( alignment, size );
     1018        } // amemalign
     1019
     1020
     1021        // Same as calloc() with memory alignment.
     1022        void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) {
     1023                #ifdef __STATISTICS__
     1024                __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
     1025                __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     1026                #endif // __STATISTICS__
     1027
     1028                return cmemalignNoStats( alignment, dim, elemSize );
    9251029        } // cmemalign
    9261030
    927         // The function aligned_alloc() is the same as memalign(), except for the added restriction that size should be a
    928         // multiple of alignment.
     1031
     1032        // Same as memalign(), but ISO/IEC 2011 C11 Section 7.22.2 states: the value of size shall be an integral multiple
     1033    // of alignment. This requirement is universally ignored.
    9291034        void * aligned_alloc( size_t alignment, size_t size ) {
    9301035                return memalign( alignment, size );
     
    9321037
    9331038
    934         // The function posix_memalign() allocates size bytes and places the address of the allocated memory in *memptr. The
    935         // address of the allocated memory will be a multiple of alignment, which must be a power of two and a multiple of
    936         // sizeof(void *). If size is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later
    937         // be successfully passed to free(3).
     1039        // Allocates size bytes and places the address of the allocated memory in *memptr. The address of the allocated
     1040        // memory shall be a multiple of alignment, which must be a power of two and a multiple of sizeof(void *). If size
     1041        // is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later be successfully passed to
     1042        // free(3).
    9381043        int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
    939           if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) return EINVAL; // check alignment
     1044          if ( alignment < libAlign() || ! is_pow2( alignment ) ) return EINVAL; // check alignment
    9401045                * memptr = memalign( alignment, size );
    941           if ( unlikely( * memptr == 0p ) ) return ENOMEM;
    9421046                return 0;
    9431047        } // posix_memalign
    9441048
    945         // The obsolete function valloc() allocates size bytes and returns a pointer to the allocated memory. The memory
    946         // address will be a multiple of the page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
     1049
     1050        // Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of the
     1051        // page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
    9471052        void * valloc( size_t size ) {
    948                 return memalign( pageSize, size );
     1053                return memalign( __page_size, size );
    9491054        } // valloc
    9501055
    9511056
    952         // The free() function frees the memory space pointed to by ptr, which must have been returned by a previous call to
    953         // malloc(), calloc() or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behavior
    954         // occurs. If ptr is 0p, no operation is performed.
     1057        // Same as valloc but rounds size to multiple of page size.
     1058        void * pvalloc( size_t size ) {
     1059                return memalign( __page_size, ceiling2( size, __page_size ) );
     1060        } // pvalloc
     1061
     1062
     1063        // Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc()
     1064        // or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
     1065        // 0p, no operation is performed.
    9551066        void free( void * addr ) {
    9561067                #ifdef __STATISTICS__
     
    9731084
    9741085
    975         // The malloc_alignment() function returns the alignment of the allocation.
     1086        // Returns the alignment of an allocation.
    9761087        size_t malloc_alignment( void * addr ) {
    9771088          if ( unlikely( addr == 0p ) ) return libAlign();      // minimum alignment
     
    9801091                        return header->kind.fake.alignment & -2;        // remove flag from value
    9811092                } else {
    982                         return libAlign ();                                                     // minimum alignment
     1093                        return libAlign();                                                      // minimum alignment
    9831094                } // if
    9841095        } // malloc_alignment
    9851096
    9861097
    987         // The malloc_zero_fill() function returns true if the allocation is zero filled, i.e., initially allocated by calloc().
     1098        // Set the alignment for an the allocation and return previous alignment or 0 if no alignment.
     1099        size_t $malloc_alignment_set( void * addr, size_t alignment ) {
     1100          if ( unlikely( addr == 0p ) ) return libAlign();      // minimum alignment
     1101                size_t ret;
     1102                HeapManager.Storage.Header * header = headerAddr( addr );
     1103                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1104                        ret = header->kind.fake.alignment & -2;         // remove flag from old value
     1105                        header->kind.fake.alignment = alignment | 1; // add flag to new value
     1106                } else {
     1107                        ret = 0;                                                                        // => no alignment to change
     1108                } // if
     1109                return ret;
     1110        } // $malloc_alignment_set
     1111
     1112
     1113        // Returns true if the allocation is zero filled, e.g., allocated by calloc().
    9881114        bool malloc_zero_fill( void * addr ) {
    9891115          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
    9901116                HeapManager.Storage.Header * header = headerAddr( addr );
    9911117                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
    992                         header = (HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset);
    993                 } // if
    994                 return (header->kind.real.blockSize & 2) != 0;  // zero filled (calloc/cmemalign) ?
     1118                        header = realHeader( header );                          // backup from fake to real header
     1119                } // if
     1120                return (header->kind.real.blockSize & 2) != 0;  // zero filled ?
    9951121        } // malloc_zero_fill
    9961122
    997 
    998         // The malloc_usable_size() function returns the number of usable bytes in the block pointed to by ptr, a pointer to
    999         // a block of memory allocated by malloc(3) or a related function.
     1123        // Set allocation is zero filled and return previous zero filled.
     1124        bool $malloc_zero_fill_set( void * addr ) {
     1125          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
     1126                HeapManager.Storage.Header * header = headerAddr( addr );
     1127                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1128                        header = realHeader( header );                          // backup from fake to real header
     1129                } // if
     1130                bool ret = (header->kind.real.blockSize & 2) != 0; // zero filled ?
     1131                header->kind.real.blockSize |= 2;                               // mark as zero filled
     1132                return ret;
     1133        } // $malloc_zero_fill_set
     1134
     1135
     1136        // Returns original total allocation size (not bucket size) => array size is dimension * sizeif(T).
     1137        size_t malloc_size( void * addr ) {
     1138          if ( unlikely( addr == 0p ) ) return 0;                       // null allocation has zero size
     1139                HeapManager.Storage.Header * header = headerAddr( addr );
     1140                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1141                        header = realHeader( header );                          // backup from fake to real header
     1142                } // if
     1143                return header->kind.real.size;
     1144        } // malloc_size
     1145
     1146        // Set allocation size and return previous size.
     1147        size_t $malloc_size_set( void * addr, size_t size ) {
     1148          if ( unlikely( addr == 0p ) ) return 0;                       // null allocation has 0 size
     1149                HeapManager.Storage.Header * header = headerAddr( addr );
     1150                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1151                        header = realHeader( header );                          // backup from fake to real header
     1152                } // if
     1153                size_t ret = header->kind.real.size;
     1154                header->kind.real.size = size;
     1155                return ret;
     1156        } // $malloc_size_set
     1157
     1158
     1159        // Returns the number of usable bytes in the block pointed to by ptr, a pointer to a block of memory allocated by
     1160        // malloc or a related function.
    10001161        size_t malloc_usable_size( void * addr ) {
    10011162          if ( unlikely( addr == 0p ) ) return 0;                       // null allocation has 0 size
     
    10051166
    10061167                headers( "malloc_usable_size", addr, header, freeElem, bsize, alignment );
    1007                 return dataStorage( bsize, addr, header );      // data storage in bucket
     1168                return dataStorage( bsize, addr, header );              // data storage in bucket
    10081169        } // malloc_usable_size
    10091170
    10101171
    1011         // The malloc_stats() function prints (on default standard error) statistics about memory allocated by malloc(3) and
    1012         // related functions.
     1172        // Prints (on default standard error) statistics about memory allocated by malloc and related functions.
    10131173        void malloc_stats( void ) {
    10141174                #ifdef __STATISTICS__
     
    10181178        } // malloc_stats
    10191179
    1020         // The malloc_stats_fd() function changes the file descripter where malloc_stats() writes the statistics.
     1180
     1181        // Changes the file descripter where malloc_stats() writes statistics.
    10211182        int malloc_stats_fd( int fd __attribute__(( unused )) ) {
    10221183                #ifdef __STATISTICS__
    1023                 int temp = statfd;
    1024                 statfd = fd;
     1184                int temp = stat_fd;
     1185                stat_fd = fd;
    10251186                return temp;
    10261187                #else
     
    10301191
    10311192
    1032         // The mallopt() function adjusts parameters that control the behavior of the memory-allocation functions (see
    1033         // malloc(3)). The param argument specifies the parameter to be modified, and value specifies the new value for that
    1034         // parameter.
     1193        // Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
     1194        // specifies the parameter to be modified, and value specifies the new value for that parameter.
    10351195        int mallopt( int option, int value ) {
    10361196                choose( option ) {
    10371197                  case M_TOP_PAD:
    1038                         if ( setHeapExpand( value ) ) return 1;
     1198                        heapExpand = ceiling2( value, __page_size ); return 1;
    10391199                  case M_MMAP_THRESHOLD:
    10401200                        if ( setMmapStart( value ) ) return 1;
     1201                        break;
    10411202                } // switch
    10421203                return 0;                                                                               // error, unsupported
    10431204        } // mallopt
    10441205
    1045         // The malloc_trim() function attempts to release free memory at the top of the heap (by calling sbrk(2) with a
    1046         // suitable argument).
     1206
     1207        // Attempt to release free memory at the top of the heap (by calling sbrk with a suitable argument).
    10471208        int malloc_trim( size_t ) {
    10481209                return 0;                                                                               // => impossible to release memory
     
    10501211
    10511212
    1052         // The malloc_info() function exports an XML string that describes the current state of the memory-allocation
    1053         // implementation in the caller.  The string is printed on the file stream stream.  The exported string includes
    1054         // information about all arenas (see malloc(3)).
     1213        // Exports an XML string that describes the current state of the memory-allocation implementation in the caller.
     1214        // The string is printed on the file stream stream.  The exported string includes information about all arenas (see
     1215        // malloc).
    10551216        int malloc_info( int options, FILE * stream ) {
    1056                 if ( options != 0 ) { errno = EINVAL; return -1; }
     1217          if ( options != 0 ) { errno = EINVAL; return -1; }
     1218                #ifdef __STATISTICS__
    10571219                return printStatsXML( stream );
     1220                #else
     1221                return 0;                                                                               // unsupported
     1222                #endif // __STATISTICS__
    10581223        } // malloc_info
    10591224
    10601225
    1061         // The malloc_get_state() function records the current state of all malloc(3) internal bookkeeping variables (but
    1062         // not the actual contents of the heap or the state of malloc_hook(3) functions pointers).  The state is recorded in
    1063         // a system-dependent opaque data structure dynamically allocated via malloc(3), and a pointer to that data
    1064         // structure is returned as the function result.  (It is the caller's responsibility to free(3) this memory.)
     1226        // Records the current state of all malloc internal bookkeeping variables (but not the actual contents of the heap
     1227        // or the state of malloc_hook functions pointers).  The state is recorded in a system-dependent opaque data
     1228        // structure dynamically allocated via malloc, and a pointer to that data structure is returned as the function
     1229        // result.  (The caller must free this memory.)
    10651230        void * malloc_get_state( void ) {
    10661231                return 0p;                                                                              // unsupported
     
    10681233
    10691234
    1070         // The malloc_set_state() function restores the state of all malloc(3) internal bookkeeping variables to the values
    1071         // recorded in the opaque data structure pointed to by state.
    1072         int malloc_set_state( void * ptr ) {
     1235        // Restores the state of all malloc internal bookkeeping variables to the values recorded in the opaque data
     1236        // structure pointed to by state.
     1237        int malloc_set_state( void * ) {
    10731238                return 0;                                                                               // unsupported
    10741239        } // malloc_set_state
     
    10771242
    10781243// Must have CFA linkage to overload with C linkage realloc.
    1079 void * realloc( void * oaddr, size_t nalign, size_t size ) {
     1244void * resize( void * oaddr, size_t nalign, size_t size ) {
    10801245        #ifdef __STATISTICS__
    1081         __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
     1246        __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
    10821247        #endif // __STATISTICS__
    10831248
    1084         // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
    1085   if ( unlikely( size == 0 ) ) { free( oaddr ); return mallocNoStats( size ); } // special cases
    1086   if ( unlikely( oaddr == 0p ) ) return mallocNoStats( size );
    1087 
    1088         if ( unlikely( nalign == 0 ) ) nalign = libAlign();     // reset alignment to minimum
     1249        if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum
    10891250        #ifdef __CFA_DEBUG__
    10901251        else
     
    10921253        #endif // __CFA_DEBUG__
    10931254
    1094         HeapManager.Storage.Header * header;
    1095         HeapManager.FreeHeader * freeElem;
    1096         size_t bsize, oalign = 0;
    1097         headers( "realloc", oaddr, header, freeElem, bsize, oalign );
    1098         size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
    1099 
    1100   if ( oalign != 0 && (uintptr_t)oaddr % nalign == 0 ) { // has alignment and just happens to work out
    1101                 headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
    1102                 return realloc( oaddr, size );
     1255        // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
     1256  if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases
     1257  if ( unlikely( oaddr == 0p ) ) {
     1258                #ifdef __STATISTICS__
     1259                __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
     1260                #endif // __STATISTICS__
     1261                return memalignNoStats( nalign, size );
     1262        } // if
     1263
     1264        // Attempt to reuse existing alignment.
     1265        HeapManager.Storage.Header * header = headerAddr( oaddr );
     1266        bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
     1267        size_t oalign;
     1268        if ( isFakeHeader ) {
     1269                oalign = header->kind.fake.alignment & -2;              // old alignment
     1270                if ( (uintptr_t)oaddr % nalign == 0                             // lucky match ?
     1271                         && ( oalign <= nalign                                          // going down
     1272                                  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
     1273                        ) {
     1274                        headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
     1275                        HeapManager.FreeHeader * freeElem;
     1276                        size_t bsize, oalign;
     1277                        headers( "resize", oaddr, header, freeElem, bsize, oalign );
     1278                        size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
     1279
     1280                        if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
     1281                                headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
     1282
     1283                                header->kind.real.blockSize &= -2;              // turn off 0 fill
     1284                                header->kind.real.size = size;                  // reset allocation size
     1285                                return oaddr;
     1286                        } // if
     1287                } // if
     1288        } else if ( ! isFakeHeader                                                      // old real header (aligned on libAlign) ?
     1289                                && nalign == libAlign() ) {                             // new alignment also on libAlign => no fake header needed
     1290                return resize( oaddr, size );                                   // duplicate special case checks
    11031291        } // if
    11041292
    11051293        #ifdef __STATISTICS__
     1294        __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
     1295        #endif // __STATISTICS__
     1296
     1297        // change size, DO NOT preserve STICKY PROPERTIES.
     1298        free( oaddr );
     1299        return memalignNoStats( nalign, size );                         // create new aligned area
     1300} // resize
     1301
     1302
     1303void * realloc( void * oaddr, size_t nalign, size_t size ) {
     1304        if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum
     1305        #ifdef __CFA_DEBUG__
     1306        else
     1307                checkAlign( nalign );                                                   // check alignment
     1308        #endif // __CFA_DEBUG__
     1309
     1310        // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
     1311  if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases
     1312  if ( unlikely( oaddr == 0p ) ) {
     1313                #ifdef __STATISTICS__
     1314                __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
     1315                __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
     1316                #endif // __STATISTICS__
     1317                return memalignNoStats( nalign, size );
     1318        } // if
     1319
     1320        // Attempt to reuse existing alignment.
     1321        HeapManager.Storage.Header * header = headerAddr( oaddr );
     1322        bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
     1323        size_t oalign;
     1324        if ( isFakeHeader ) {
     1325                oalign = header->kind.fake.alignment & -2;              // old alignment
     1326                if ( (uintptr_t)oaddr % nalign == 0                             // lucky match ?
     1327                         && ( oalign <= nalign                                          // going down
     1328                                  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
     1329                        ) {
     1330                        headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
     1331                        return realloc( oaddr, size );                          // duplicate alignment and special case checks
     1332                } // if
     1333        } else if ( ! isFakeHeader                                                      // old real header (aligned on libAlign) ?
     1334                                && nalign == libAlign() )                               // new alignment also on libAlign => no fake header needed
     1335                return realloc( oaddr, size );                                  // duplicate alignment and special case checks
     1336
     1337        #ifdef __STATISTICS__
     1338        __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
    11061339        __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
    11071340        #endif // __STATISTICS__
    11081341
     1342        HeapManager.FreeHeader * freeElem;
     1343        size_t bsize;
     1344        headers( "realloc", oaddr, header, freeElem, bsize, oalign );
     1345
    11091346        // change size and copy old content to new storage
    11101347
    1111         void * naddr;
    1112         if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill
    1113                 naddr = cmemalignNoStats( nalign, 1, size );    // create new aligned area
    1114         } else {
    1115                 naddr = memalignNoStats( nalign, size );                // create new aligned area
    1116         } // if
     1348        size_t osize = header->kind.real.size;                          // old allocation size
     1349        bool ozfill = (header->kind.real.blockSize & 2);        // old allocation zero filled
     1350
     1351        void * naddr = memalignNoStats( nalign, size );         // create new aligned area
    11171352
    11181353        headers( "realloc", naddr, header, freeElem, bsize, oalign );
    1119         size_t ndsize = dataStorage( bsize, naddr, header ); // data storage avilable in bucket
    1120         // To preserve prior fill, the entire bucket must be copied versus the size.
    1121         memcpy( naddr, oaddr, MIN( odsize, ndsize ) );          // copy bytes
     1354        memcpy( naddr, oaddr, min( osize, size ) );                     // copy bytes
    11221355        free( oaddr );
     1356
     1357        if ( unlikely( ozfill ) ) {                                                     // previous request zero fill ?
     1358                header->kind.real.blockSize |= 2;                               // mark new request as zero filled
     1359                if ( size > osize ) {                                                   // previous request larger ?
     1360                        memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
     1361                } // if
     1362        } // if
    11231363        return naddr;
    11241364} // realloc
  • libcfa/src/interpose.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Wed Mar 29 16:10:31 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Feb 17 10:18:53 2020
    13 // Update Count     : 166
     12// Last Modified On : Fri Mar 13 17:35:37 2020
     13// Update Count     : 178
    1414//
    1515
    1616#include <stdarg.h>                                                                             // va_start, va_end
     17#include <stdio.h>
    1718#include <string.h>                                                                             // strlen
    1819#include <unistd.h>                                                                             // _exit, getpid
     
    143144void abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
    144145void abort( bool signalAbort, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));
     146void __abort( bool signalAbort, const char fmt[], va_list args ) __attribute__(( __nothrow__, __leaf__, __noreturn__ ));
    145147
    146148extern "C" {
     
    152154                va_list argp;
    153155                va_start( argp, fmt );
    154                 abort( false, fmt, argp );
     156                __abort( false, fmt, argp );
    155157                va_end( argp );
    156158        }
     
    218220}
    219221
    220 void abort( bool signalAbort, const char fmt[], ... ) {
    221         void * kernel_data = kernel_abort();                            // must be done here to lock down kernel
    222         int len;
    223 
    224         signal( SIGABRT, SIG_DFL );                                                     // prevent final "real" abort from recursing to handler
    225 
    226         len = snprintf( abort_text, abort_text_size, "Cforall Runtime error (UNIX pid:%ld) ", (long int)getpid() ); // use UNIX pid (versus getPid)
    227         __cfaabi_bits_write( STDERR_FILENO, abort_text, len );
    228 
    229         assert( fmt );
    230         va_list args;
    231         va_start( args, fmt );
    232 
    233         len = vsnprintf( abort_text, abort_text_size, fmt, args );
    234         va_end( args );
    235         __cfaabi_bits_write( STDERR_FILENO, abort_text, len );
    236 
    237         if ( fmt[strlen( fmt ) - 1] != '\n' ) {                         // add optional newline if missing at the end of the format text
    238                 __cfaabi_dbg_write( "\n", 1 );
    239         } // if
    240         kernel_abort_msg( kernel_data, abort_text, abort_text_size );
    241 
    242         __cfaabi_backtrace( signalAbort ? 4 : 2 );
    243 
    244         __cabi_libc.abort();                                                            // print stack trace in handler
     222static volatile int __abort_stage = 0;
     223
     224// Cannot forward va_list.
     225void __abort( bool signalAbort, const char fmt[], va_list args ) {
     226        int stage = __atomic_add_fetch( &__abort_stage, 1, __ATOMIC_SEQ_CST );
     227
     228        // First stage: stop the cforall kernel and print
     229        if(stage == 1) {
     230                // increment stage
     231                stage = __atomic_add_fetch( &__abort_stage, 1, __ATOMIC_SEQ_CST );
     232
     233                // must be done here to lock down kernel
     234                void * kernel_data = kernel_abort();
     235                int len;
     236
     237                signal( SIGABRT, SIG_DFL );                                                     // prevent final "real" abort from recursing to handler
     238
     239                len = snprintf( abort_text, abort_text_size, "Cforall Runtime error (UNIX pid:%ld) ", (long int)getpid() ); // use UNIX pid (versus getPid)
     240                __cfaabi_bits_write( STDERR_FILENO, abort_text, len );
     241
     242                assert( fmt );
     243                len = vsnprintf( abort_text, abort_text_size, fmt, args );
     244                __cfaabi_bits_write( STDERR_FILENO, abort_text, len );
     245
     246                // add optional newline if missing at the end of the format text
     247                if ( fmt[strlen( fmt ) - 1] != '\n' ) {
     248                        __cfaabi_bits_write( STDERR_FILENO, "\n", 1 );
     249                } // if
     250                kernel_abort_msg( kernel_data, abort_text, abort_text_size );
     251        }
     252
     253        // Second stage: print the backtrace
     254        if(stage == 2) {
     255                // increment stage
     256                stage = __atomic_add_fetch( &__abort_stage, 1, __ATOMIC_SEQ_CST );
     257
     258                // print stack trace in handler
     259                __cfaabi_backtrace( signalAbort ? 4 : 2 );
     260        }
     261
     262        do {
     263                // Finally call abort
     264                __cabi_libc.abort();
     265
     266                // Loop so that we never return
     267        } while(true);
    245268}
    246269
     
    248271        va_list args;
    249272        va_start( args, fmt );
    250         abort( false, fmt, args );
     273        __abort( false, fmt, args );
     274    // CONTROL NEVER REACHES HERE!
    251275        va_end( args );
     276}
     277
     278void abort( bool signalAbort, const char fmt[], ... ) {
     279    va_list args;
     280    va_start( args, fmt );
     281    __abort( signalAbort, fmt, args );
     282    // CONTROL NEVER REACHES HERE!
     283    va_end( args );
    252284}
    253285
  • libcfa/src/iostream.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Feb 20 15:53:23 2020
    13 // Update Count     : 829
     12// Last Modified On : Mon Aug 24 08:31:35 2020
     13// Update Count     : 1130
    1414//
    1515
    1616#include "iostream.hfa"
    1717
    18 extern "C" {
    1918#include <stdio.h>
    2019#include <stdbool.h>                                                                    // true/false
    2120#include <stdint.h>                                                                             // UINT64_MAX
    22 //#include <string.h>                                                                   // strlen, strcmp
     21#include <float.h>                                                                              // DBL_DIG, LDBL_DIG
     22#include <complex.h>                                                                    // creal, cimag
     23//#include <string.h>                                                                   // strlen, strcmp, memcpy
     24extern "C" {
    2325extern size_t strlen (const char *__s) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
    2426extern int strcmp (const char *__s1, const char *__s2) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));
    2527extern char *strcpy (char *__restrict __dest, const char *__restrict __src) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2)));
    2628extern void *memcpy (void *__restrict __dest, const void *__restrict __src, size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2)));
    27 #include <float.h>                                                                              // DBL_DIG, LDBL_DIG
    28 #include <math.h>                                                                               // isfinite
    29 #include <complex.h>                                                                    // creal, cimag
    3029} // extern "C"
    3130
    32 
    33 //*********************************** ostream ***********************************
     31#include "math.hfa"                                                                             // isfinite, floor, ceiling_div
     32#include "bitmanip.hfa"                                                                 // high1
     33
     34
     35// *********************************** ostream ***********************************
    3436
    3537
    3638forall( dtype ostype | ostream( ostype ) ) {
    37         ostype & ?|?( ostype & os, zero_t ) {
    38                 if ( $sepPrt( os ) ) fmt( os, "%s", $sepGetCur( os ) );
    39                 fmt( os, "%d", 0n );
    40                 return os;
    41         } // ?|?
    42         void ?|?( ostype & os, zero_t z ) {
    43                 (ostype &)(os | z); ends( os );
    44         } // ?|?
    45 
    46         ostype & ?|?( ostype & os, one_t ) {
    47                 if ( $sepPrt( os ) ) fmt( os, "%s", $sepGetCur( os ) );
    48                 fmt( os, "%d", 1n );
    49                 return os;
    50         } // ?|?
    51         void ?|?( ostype & os, one_t o ) {
    52                 (ostype &)(os | o); ends( os );
    53         } // ?|?
    54 
    5539        ostype & ?|?( ostype & os, bool b ) {
    5640                if ( $sepPrt( os ) ) fmt( os, "%s", $sepGetCur( os ) );
     
    165149        #define P10_UINT64 10_000_000_000_000_000_000_ULL       // 19 zeroes
    166150
    167         static void base10_128( ostype & os, unsigned int128 val ) {
    168                 if ( val > UINT64_MAX ) {
     151        static inline void base10_128( ostype & os, unsigned int128 val ) {
     152#if defined(__GNUC__) && __GNUC_PREREQ(7,0)                             // gcc version >= 7
     153                if ( val > P10_UINT64 ) {
     154#else
     155                if ( (uint64_t)(val >> 64) != 0 || (uint64_t)val > P10_UINT64 ) { // patch gcc 5 & 6 -O3 bug
     156#endif // __GNUC_PREREQ(7,0)
    169157                        base10_128( os, val / P10_UINT64 );                     // recursive
    170158                        fmt( os, "%.19lu", (uint64_t)(val % P10_UINT64) );
     
    174162        } // base10_128
    175163
    176         static void base10_128( ostype & os, int128 val ) {
     164        static inline void base10_128( ostype & os, int128 val ) {
    177165                if ( val < 0 ) {
    178166                        fmt( os, "-" );                                                         // leading negative sign
     
    445433} // distribution
    446434
    447 //*********************************** manipulators ***********************************
    448 
    449 //*********************************** integral ***********************************
     435// *********************************** manipulators ***********************************
     436
     437// *********************************** integral ***********************************
    450438
    451439static const char * shortbin[] = { "0", "1", "10", "11", "100", "101", "110", "111", "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111" };
     
    453441
    454442// Default prefix for non-decimal prints is 0b, 0, 0x.
    455 #define IntegralFMTImpl( T, CODE, IFMTNP, IFMTP ) \
     443#define IntegralFMTImpl( T, IFMTNP, IFMTP ) \
    456444forall( dtype ostype | ostream( ostype ) ) { \
    457445        ostype & ?|?( ostype & os, _Ostream_Manip(T) f ) { \
     
    459447\
    460448                if ( f.base == 'b' || f.base == 'B' ) {                 /* bespoke binary format */ \
    461                         int bits;                                                                                                       \
    462                         if ( f.val == (T){0} ) bits = 1;                        /* force at least one bit to print */ \
    463                         else bits = sizeof(long long int) * 8 - __builtin_clzll( f.val ); /* position of most significant bit */ \
    464                         bits = bits > sizeof(f.val) * 8 ? sizeof(f.val) * 8 : bits; \
    465                         int spaces = f.wd - bits;                                       /* can be negative */ \
    466                         if ( ! f.flags.nobsdp ) { spaces -= 2; }        /* base prefix takes space */ \
    467                         /* printf( "%d %d\n", bits, spaces ); */ \
     449                        int bits = high1( f.val );                                      /* position of most significant bit */ \
     450                        if ( bits == 0 ) bits = 1;                                      /* 0 value => force one bit to print */ \
     451                        int spaces; \
    468452                        if ( ! f.flags.left ) {                                         /* right justified ? */ \
    469453                                /* Note, base prefix then zero padding or spacing then prefix. */ \
    470                                 if ( f.flags.pad0 || f.flags.pc ) { \
     454                                if ( f.flags.pc ) { \
     455                                        spaces = f.wd - f.pc; \
     456                                        if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
     457                                        if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
    471458                                        if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
    472                                         if ( f.flags.pc ) spaces = f.pc - bits; \
     459                                        spaces = f.pc - bits; \
    473460                                        if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
    474461                                } else { \
    475                                         if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
    476                                         if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     462                                        spaces = f.wd - bits; \
     463                                        if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
     464                                        if ( f.flags.pad0 ) { \
     465                                                if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     466                                                if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
     467                                        } else { \
     468                                                if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
     469                                                if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     470                                        } /* if */ \
    477471                                } /* if */ \
    478                         } else if ( ! f.flags.nobsdp ) { \
    479                                 fmt( os, "0%c", f.base ); \
     472                        } else { \
     473                                if ( ! f.flags.nobsdp ) fmt( os, "0%c", f.base ); \
     474                                if ( f.flags.pc ) { \
     475                                        spaces = f.pc - bits; \
     476                                        if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
     477                                        spaces = f.wd - f.pc; \
     478                                } else { /* pad0 flag ignored with left flag */ \
     479                                        spaces = f.wd - bits; \
     480                                } /* if */ \
     481                                if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
    480482                        } /* if */ \
    481                         int shift = (bits - 1) / 4 * 4; /* floor( bits - 1, 4 ) */ \
     483                        int shift = floor( bits - 1, 4 ); \
    482484                        typeof( f.val ) temp = f.val; \
    483485                        fmt( os, "%s", shortbin[(temp >> shift) & 0xf] ); \
     
    490492                        if ( f.flags.left && spaces > 0 ) fmt( os, "%*s", spaces, " " ); \
    491493                        return os; \
    492                 } /* if  */ \
     494                } /* if */ \
    493495\
    494496                char fmtstr[sizeof(IFMTP)];                                             /* sizeof includes '\0' */ \
     
    500502                if ( ! f.flags.nobsdp ) { fmtstr[star] = '#'; star -= 1; } \
    501503                if ( f.flags.left ) { fmtstr[star] = '-'; star -= 1; } \
    502                 if ( f.flags.sign && f.base == CODE ) { fmtstr[star] = '+'; star -= 1; } \
     504                if ( f.flags.sign ) { fmtstr[star] = '+'; star -= 1; } \
    503505                if ( f.flags.pad0 && ! f.flags.pc ) { fmtstr[star] = '0'; star -= 1; } \
    504506                fmtstr[star] = '%'; \
     
    506508                if ( ! f.flags.pc ) {                                                   /* no precision */ \
    507509                        fmtstr[sizeof(IFMTNP)-2] = f.base;                      /* sizeof includes '\0' */ \
    508                         /* printf( "%s %c %c\n", &fmtstr[star], f.base, CODE ); */ \
     510                        /* printf( "%s %c\n", &fmtstr[star], f.base ); */ \
    509511                        fmt( os, &fmtstr[star], f.wd, f.val ); \
    510512                } else {                                                                                /* precision */ \
    511513                        fmtstr[sizeof(IFMTP)-2] = f.base;                       /* sizeof includes '\0' */ \
    512                         /* printf( "%s %c %c\n", &fmtstr[star], f.base, CODE ); */ \
     514                        /* printf( "%s %c\n", &fmtstr[star], f.base ); */ \
    513515                        fmt( os, &fmtstr[star], f.wd, f.pc, f.val ); \
    514516                } /* if */ \
     
    518520} // distribution
    519521
    520 IntegralFMTImpl( signed char, 'd', "%    *hh ", "%    *.*hh " )
    521 IntegralFMTImpl( unsigned char, 'u', "%    *hh ", "%    *.*hh " )
    522 IntegralFMTImpl( signed short int, 'd', "%    *h ", "%    *.*h " )
    523 IntegralFMTImpl( unsigned short int, 'u', "%    *h ", "%    *.*h " )
    524 IntegralFMTImpl( signed int, 'd', "%    * ", "%    *.* " )
    525 IntegralFMTImpl( unsigned int, 'u', "%    * ", "%    *.* " )
    526 IntegralFMTImpl( signed long int, 'd', "%    *l ", "%    *.*l " )
    527 IntegralFMTImpl( unsigned long int, 'u', "%    *l ", "%    *.*l " )
    528 IntegralFMTImpl( signed long long int, 'd', "%    *ll ", "%    *.*ll " )
    529 IntegralFMTImpl( unsigned long long int, 'u', "%    *ll ", "%    *.*ll " )
    530 
    531 
     522IntegralFMTImpl( signed char, "%    *hh ", "%    *.*hh " )
     523IntegralFMTImpl( unsigned char, "%    *hh ", "%    *.*hh " )
     524IntegralFMTImpl( signed short int, "%    *h ", "%    *.*h " )
     525IntegralFMTImpl( unsigned short int, "%    *h ", "%    *.*h " )
     526IntegralFMTImpl( signed int, "%    * ", "%    *.* " )
     527IntegralFMTImpl( unsigned int, "%    * ", "%    *.* " )
     528IntegralFMTImpl( signed long int, "%    *l ", "%    *.*l " )
     529IntegralFMTImpl( unsigned long int, "%    *l ", "%    *.*l " )
     530IntegralFMTImpl( signed long long int, "%    *ll ", "%    *.*ll " )
     531IntegralFMTImpl( unsigned long long int, "%    *ll ", "%    *.*ll " )
     532
     533#if 0
    532534#if defined( __SIZEOF_INT128__ )
    533535// Default prefix for non-decimal prints is 0b, 0, 0x.
    534536#define IntegralFMTImpl128( T, SIGNED, CODE, IFMTNP, IFMTP ) \
    535537forall( dtype ostype | ostream( ostype ) ) \
    536 static void base10_128( ostype & os, _Ostream_Manip(T) fmt ) { \
    537         if ( fmt.val > UINT64_MAX ) { \
    538                 fmt.val /= P10_UINT64; \
    539                 base10_128( os, fmt ); /* recursive */ \
    540                 _Ostream_Manip(unsigned long long int) fmt2 @= { (uint64_t)(fmt.val % P10_UINT64), 0, 19, 'u', { .all : 0 } }; \
    541                 fmt2.flags.nobsdp = true; \
    542                 printf( "fmt2 %c %lld %d\n", fmt2.base, fmt2.val, fmt2.all );   \
     538static void base10_128( ostype & os, _Ostream_Manip(T) f ) { \
     539        if ( f.val > UINT64_MAX ) { \
     540                unsigned long long int lsig = f.val % P10_UINT64; \
     541                f.val /= P10_UINT64; /* msig */ \
     542                base10_128( os, f ); /* recursion */ \
     543                _Ostream_Manip(unsigned long long int) fmt @= { lsig, 0, 19, 'u', { .all : 0 } }; \
     544                fmt.flags.nobsdp = true; \
     545                /* printf( "fmt1 %c %lld %d\n", fmt.base, fmt.val, fmt.all ); */ \
    543546                sepOff( os ); \
    544                 (ostype &)(os | fmt2); \
     547                (ostype &)(os | fmt); \
    545548        } else { \
    546                 printf( "fmt %c %lld %d\n", fmt.base, fmt.val, fmt.all ); \
     549                /* printf( "fmt2 %c %lld %d\n", f.base, (unsigned long long int)f.val, f.all ); */ \
     550                _Ostream_Manip(SIGNED long long int) fmt @= { (SIGNED long long int)f.val, f.wd, f.pc, f.base, { .all : f.all } }; \
    547551                (ostype &)(os | fmt); \
    548552        } /* if */ \
    549 } /* base10_128 */                                                \
     553} /* base10_128 */ \
    550554forall( dtype ostype | ostream( ostype ) ) { \
    551555        ostype & ?|?( ostype & os, _Ostream_Manip(T) f ) { \
    552556                if ( $sepPrt( os ) ) fmt( os, "%s", $sepGetCur( os ) ); \
    553557\
    554                 if ( f.base == 'b' | f.base == 'o' | f.base == 'x' | f.base == 'X' ) { \
     558                if ( f.base == 'b' | f.base == 'B' | f.base == 'o' | f.base == 'x' | f.base == 'X' ) { \
    555559                        unsigned long long int msig = (unsigned long long int)(f.val >> 64); \
    556560                        unsigned long long int lsig = (unsigned long long int)(f.val); \
     
    562566                        } else { \
    563567                                fmt2.flags.pad0 = fmt2.flags.nobsdp = true;     \
    564                                 if ( f.base == 'b' ) { \
    565                                         if ( f.wd > 64 ) fmt.wd = f.wd - 64; \
    566                                         fmt2.wd = 64; \
     568                                if ( f.base == 'b' | f.base == 'B' ) { \
     569                                        if ( fmt.flags.pc && fmt.pc > 64 ) fmt.pc -= 64; else { fmt.flags.pc = false; fmt.pc = 0; } \
     570                                        if ( fmt.flags.left ) { \
     571                                                fmt.flags.left = false; \
     572                                                fmt.wd = 0; \
     573                                                /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     574                                                fmt2.flags.left = true; \
     575                                                int msigd = high1( msig ); \
     576                                                fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
     577                                                if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0b base specifier */ \
     578                                                if ( (int)fmt2.wd < 64 ) fmt2.wd = 64; /* cast deals with negative value */ \
     579                                                fmt2.flags.pc = true; fmt2.pc = 64; \
     580                                        } else { \
     581                                                if ( fmt.wd > 64 ) fmt.wd -= 64; \
     582                                                else fmt.wd = 1; \
     583                                                /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     584                                                fmt2.wd = 64; \
     585                                        } /* if */ \
     586                                        /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    567587                                        (ostype &)(os | fmt | "" | fmt2); \
    568588                                } else if ( f.base == 'o' ) { \
     589                                        if ( fmt.flags.pc && fmt.pc > 22 ) fmt.pc -= 22; else { fmt.flags.pc = false; fmt.pc = 0; } \
    569590                                        fmt.val = (unsigned long long int)fmt.val >> 2; \
    570                                         if ( f.wd > 21 ) fmt.wd = f.wd - 21; \
    571                                         fmt2.wd = 1; \
    572                                         fmt2.val = ((msig & 0x3) << 1) + 1; \
    573                                         (ostype &)(os | fmt | "" | fmt2); \
    574                                         sepOff( os ); \
    575                                         fmt2.wd = 21; \
    576                                         fmt2.val = lsig & 0x7fffffffffffffff; \
     591                                        fmt2.val = ((msig & 0x3) << 1) + ((lsig & 0x8000000000000000U) != 0); \
     592                                        if ( fmt.flags.left ) { \
     593                                                fmt.flags.left = false; \
     594                                                fmt.wd = 0; \
     595                                                /* printf( "L %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
     596                                                (ostype &)(os | fmt | "" | fmt2); \
     597                                                sepOff( os ); \
     598                                                fmt2.flags.left = true; \
     599                                                int msigd = ceiling_div( high1( fmt.val ), 3 ); \
     600                                                fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
     601                                                if ( ! fmt.flags.nobsdp ) fmt2.wd -= 1; /* compensate for 0 base specifier */ \
     602                                                if ( (int)fmt2.wd < 21 ) fmt2.wd = 21; /* cast deals with negative value */ \
     603                                                fmt2.flags.pc = true; fmt2.pc = 21; \
     604                                        } else { \
     605                                                if ( fmt.wd > 22 ) fmt.wd -= 22; \
     606                                                else fmt.wd = 1; \
     607                                                /* printf( "R %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
     608                                                (ostype &)(os | fmt | "" | fmt2); \
     609                                                sepOff( os ); \
     610                                                fmt2.wd = 21; \
     611                                        } /* if */ \
     612                                        fmt2.val = lsig & 0x7fffffffffffffffU; \
     613                                        /* printf( "\nC %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    577614                                        (ostype &)(os | fmt2); \
    578                                 } else { \
    579                                         if ( f.flags.left ) { \
    580                                                 if ( f.wd > 16 ) fmt2.wd = f.wd - 16;   \
    581                                                 fmt.wd = 16;                                                    \
     615                                } else { /* f.base == 'x'  | f.base == 'X' */ \
     616                                        if ( fmt.flags.pc && fmt.pc > 16 ) fmt.pc -= 16; else { fmt.flags.pc = false; fmt.pc = 0; } \
     617                                        if ( fmt.flags.left ) { \
     618                                                fmt.flags.left = false; \
     619                                                fmt.wd = 0; \
     620                                                /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     621                                                fmt2.flags.left = true; \
     622                                                int msigd = high1( msig ); \
     623                                                fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
     624                                                if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0x base specifier */ \
     625                                                if ( (int)fmt2.wd < 16 ) fmt2.wd = 16; /* cast deals with negative value */ \
     626                                                fmt2.flags.pc = true; fmt2.pc = 16; \
    582627                                        } else { \
    583                                                 if ( f.wd > 16 ) fmt.wd = f.wd - 16;    \
    584                                                 fmt2.wd = 16;                                                   \
     628                                                if ( fmt.wd > 16 ) fmt.wd -= 16; \
     629                                                else fmt.wd = 1; \
     630                                                /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     631                                                fmt2.wd = 16; \
    585632                                        } /* if */ \
     633                                        /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    586634                                        (ostype &)(os | fmt | "" | fmt2); \
    587635                                } /* if */ \
    588636                        } /* if */ \
    589637                } else { \
     638                        if ( CODE == 'd' ) { \
     639                                if ( f.val < 0 )  { fmt( os, "-" ); sepOff( os ); f.val = -f.val; f.flags.sign = false; } \
     640                        } /* if */ \
    590641                        base10_128( os, f ); \
    591642                } /* if */ \
     
    598649IntegralFMTImpl128( unsigned int128, unsigned, 'u', "%    *ll ", "%    *.*ll " )
    599650#endif // __SIZEOF_INT128__
    600 
    601 //*********************************** floating point ***********************************
     651#endif // 0
     652
     653#if 1
     654#if defined( __SIZEOF_INT128__ )
     655// Default prefix for non-decimal prints is 0b, 0, 0x.
     656forall( dtype ostype | ostream( ostype ) )
     657static inline void base_128( ostype & os, unsigned int128 val, unsigned int128 power, _Ostream_Manip(uint64_t) & f, unsigned int maxdig, unsigned int bits, unsigned int cnt = 0 ) {
     658        int wd = 1;                                                                                     // f.wd is never 0 because 0 implies left-pad
     659        if ( val > power ) {                                                            // subdivide value into printable 64-bit values
     660                base_128( os, val / power, power, f, maxdig, bits, cnt + 1 ); // recursive
     661                f.val = val % power;
     662                if ( cnt == 1 && f.flags.left ) { wd = f.wd; f.wd = maxdig; } // copy f.wd and reset for printing middle chunk
     663                // printf( "R val:%#lx(%lu) wd:%u pc:%u base:%c neg:%d pc:%d left:%d nobsdp:%d sign:%d pad0:%d\n",
     664                //              f.val, f.val, f.wd, f.pc, f.base, f.flags.neg, f.flags.pc, f.flags.left, f.flags.nobsdp, f.flags.sign, f.flags.pad0 );
     665                (ostype &)(os | f);
     666                if ( cnt == 1 ) {
     667                        if ( f.flags.left ) { wd -= maxdig; f.wd = wd < 0 ? 1 : wd; } // update and restore f.wd for printing end chunk
     668                        sepOff( os );                                                           // no seperator between chunks
     669                } // if
     670        } else {                                                                                        // print start chunk
     671                f.val = val;
     672                // f.pc is unsigned => use wd
     673                if ( f.flags.pc && f.pc > maxdig * cnt ) { wd = f.pc - maxdig * cnt; f.pc = wd < 0 ? 0 : wd; }
     674                else { f.flags.pc = false; f.pc = 0; }
     675
     676                if ( ! f.flags.left ) {                                                 // right justify
     677                        wd = f.wd - maxdig * cnt;
     678                        f.wd = wd < 0 ? 1 : wd;
     679                        wd = maxdig;
     680                } else {                                                                                // left justify
     681                        if ( cnt != 0 ) {                                                       // value >= 2^64 ?
     682                                unsigned int dig, bs = 0;
     683                                // compute size of prefix digits and base
     684                                if ( f.base == 'd' || f.base == 'u' ) { // no base prefix
     685                                        dig = ceil( log10( f.val ) );           // use floating-point
     686                                        if ( f.base == 'd' && (f.flags.neg || f.flags.sign) ) bs = 1; // sign ?
     687                                } else {
     688                                        dig = ceiling_div( high1( f.val ), bits );
     689                                        if ( ! f.flags.nobsdp ) {                       // base prefix ?
     690                                                if ( f.base == 'o' ) {
     691                                                        // 0 prefix for octal is not added for precision with leading zero
     692                                                        if ( f.pc <= dig ) bs = 1;      // 1 character prefix
     693                                                } else bs = 2;                                  // 2 character prefix
     694                                        } // if
     695                                } // if
     696                                wd = f.wd - (f.pc > dig ? f.pc : dig) - bs; // precision > leading digits ?
     697                                if ( wd < 0 ) wd = 1;
     698                                f.wd = 1;
     699                        } // if
     700                        // all manipulators handled implicitly for value < 2^64
     701                } // if
     702                // prior checks ensure wd not negative
     703
     704                if ( f.flags.neg ) f.val = -f.val;
     705                // printf( "L val:%#lx(%lu) wd:%u pc:%u base:%c neg:%d pc:%d left:%d nobsdp:%d sign:%d pad0:%d\n",
     706                //              f.val, f.val, f.wd, f.pc, f.base, f.flags.neg, f.flags.pc, f.flags.left, f.flags.nobsdp, f.flags.sign, f.flags.pad0 );
     707                (ostype &)(os | f);
     708
     709                // remaining middle and end chunks are padded with 0s on the left
     710                if ( ! f.flags.left ) { f.flags.pad0 = true; f.flags.pc = false; } // left pad with 0s
     711                else { f.pc = maxdig; f.flags.pc = true; }              // left pad with precision
     712
     713                if ( cnt != 0 ) sepOff( os );                                   // no seperator between chunks
     714                f.wd = wd;                                                                              // reset f.wd for next chunk
     715                f.flags.sign = false;                                                   // no leading +/- sign
     716                f.flags.nobsdp = true;                                                  // no leading base prefix
     717        } // if
     718} // base_128
     719
     720#define IntegralFMTImpl128( T ) \
     721forall( dtype ostype | ostream( ostype ) ) { \
     722        ostype & ?|?( ostype & os, _Ostream_Manip(T) f ) { \
     723                _Ostream_Manip(uint64_t) fmt; \
     724                fmt.[wd, pc, base, all] = f.[wd, pc, base, all]; \
     725                if ( f.base == 'b' | f.base == 'B' ) { \
     726                        base_128( os, f.val, (unsigned int128)1 << 64, fmt, 64, 1 ); \
     727                } else if ( f.base == 'o' ) { \
     728                        base_128( os, f.val, (unsigned int128)1 << 63, fmt, 21, 3 ); \
     729                } else if ( f.base == 'd' || f.base == 'u' ) { \
     730                        if ( f.base == 'd' && f.val < 0 ) { f.val = -f.val; fmt.flags.neg = true; } \
     731                        base_128( os, f.val, (unsigned int128)10_000_000_000_000_000_000UL, fmt, 19, 0 ); \
     732                } else { \
     733                        base_128( os, f.val, (unsigned int128)1 << 64, fmt, 16, 4 ); \
     734                } /* if */ \
     735                return os; \
     736        } /* ?|? */ \
     737        void ?|?( ostype & os, _Ostream_Manip(T) f ) { (ostype &)(os | f); ends( os ); } \
     738} // distribution
     739
     740IntegralFMTImpl128( int128 )
     741IntegralFMTImpl128( unsigned int128 )
     742#endif // __SIZEOF_INT128__
     743#endif // 0
     744
     745// *********************************** floating point ***********************************
    602746
    603747#define PrintWithDP2( os, format, val, ... ) \
     
    655799FloatingPointFMTImpl( long double, "%    *L ", "%    *.*L " )
    656800
    657 //*********************************** character ***********************************
     801// *********************************** character ***********************************
    658802
    659803forall( dtype ostype | ostream( ostype ) ) {
     
    688832} // distribution
    689833
    690 //*********************************** C string ***********************************
     834// *********************************** C string ***********************************
    691835
    692836forall( dtype ostype | ostream( ostype ) ) {
     
    735879
    736880
    737 //*********************************** istream ***********************************
     881// *********************************** istream ***********************************
    738882
    739883
     
    812956        } // ?|?
    813957
     958#if defined( __SIZEOF_INT128__ )
     959        istype & ?|?( istype & is, int128 & i128 ) {
     960                return (istype &)(is | (unsigned int128 &)i128);
     961        } // ?|?
     962
     963        istype & ?|?( istype & is, unsigned int128 & ui128 ) {
     964                char s[40];
     965                bool sign = false;
     966
     967                if ( fmt( is, " %[-]", s ) == 1 ) sign = true;  // skip whitespace, negative sign ?
     968                // If the input is too large, the value returned is undefined. If there is no input, no value is returned
     969                if ( fmt( is, "%39[0-9]%*[0-9]", s ) == 1 ) {   // take first 39 characters, ignore remaining
     970                        ui128 = 0;
     971                        for ( unsigned int i = 0; s[i] != '\0'; i += 1 ) {
     972                                ui128 = ui128 * 10 + s[i] - '0';
     973                        } // for
     974                        if ( sign ) ui128 = -ui128;
     975                } else if ( sign ) ungetc( is, '-' );                   // return minus when no digits
     976                return is;
     977        } // ?|?
     978#endif // __SIZEOF_INT128__
    814979
    815980        istype & ?|?( istype & is, float & f ) {
     
    8811046} // distribution
    8821047
    883 //*********************************** manipulators ***********************************
     1048// *********************************** manipulators ***********************************
    8841049
    8851050forall( dtype istype | istream( istype ) )
  • libcfa/src/iostream.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Feb 20 15:30:56 2020
    13 // Update Count     : 337
     12// Last Modified On : Tue Aug 11 22:16:14 2020
     13// Update Count     : 350
    1414//
    1515
     
    1919
    2020
    21 //*********************************** ostream ***********************************
     21// *********************************** ostream ***********************************
    2222
    2323
     
    6767
    6868forall( dtype ostype | ostream( ostype ) ) {
    69         ostype & ?|?( ostype &, zero_t );
    70         void ?|?( ostype &, zero_t );
    71         ostype & ?|?( ostype &, one_t );
    72         void ?|?( ostype &, one_t );
    73 
    7469        ostype & ?|?( ostype &, bool );
    7570        void ?|?( ostype &, bool );
     
    156151} // distribution
    157152
    158 //*********************************** manipulators ***********************************
     153// *********************************** manipulators ***********************************
    159154
    160155forall( otype T )
     
    166161                unsigned char all;
    167162                struct {
     163                        unsigned char neg:1;                                            // val is negative
    168164                        unsigned char pc:1;                                                     // precision specified
    169165                        unsigned char left:1;                                           // left justify
     
    175171}; // _Ostream_Manip
    176172
    177 //*********************************** integral ***********************************
     173// *********************************** integral ***********************************
    178174
    179175// See 6.7.9. 19) The initialization shall occur in initializer list order, each initializer provided for a particular
     
    215211IntegralFMTDecl( int128, 'd' )
    216212IntegralFMTDecl( unsigned int128, 'u' )
    217 #endif
    218 
    219 //*********************************** floating point ***********************************
     213#endif // __SIZEOF_INT128__
     214
     215// *********************************** floating point ***********************************
    220216
    221217// Default suffix for values with no fraction is "."
     
    246242FloatingPointFMTDecl( long double )
    247243
    248 //*********************************** character ***********************************
     244// *********************************** character ***********************************
    249245
    250246static inline {
     
    263259} // ?|?
    264260
    265 //*********************************** C string ***********************************
     261// *********************************** C string ***********************************
    266262
    267263static inline {
     
    282278
    283279
    284 //*********************************** istream ***********************************
     280// *********************************** istream ***********************************
    285281
    286282
     
    314310        istype & ?|?( istype &, unsigned int & );
    315311        istype & ?|?( istype &, long int & );
     312        istype & ?|?( istype &, unsigned long int & );
    316313        istype & ?|?( istype &, long long int & );
    317         istype & ?|?( istype &, unsigned long int & );
    318314        istype & ?|?( istype &, unsigned long long int & );
     315#if defined( __SIZEOF_INT128__ )
     316        istype & ?|?( istype &, int128 & );
     317        istype & ?|?( istype &, unsigned int128 & );
     318#endif // __SIZEOF_INT128__
    319319
    320320        istype & ?|?( istype &, float & );
     
    336336} // distribution
    337337
    338 //*********************************** manipulators ***********************************
     338// *********************************** manipulators ***********************************
    339339
    340340struct _Istream_Cstr {
     
    358358        _Istream_Cstr excl( const char scanset[], char * s ) { return (_Istream_Cstr){ s, scanset, -1, { .flags.inex : true } }; }
    359359        _Istream_Cstr & excl( const char scanset[], _Istream_Cstr & fmt ) { fmt.scanset = scanset; fmt.flags.inex = true; return fmt; }
    360         _Istream_Cstr ignore( const char s[] ) { return (_Istream_Cstr)@{ s, 0p, -1, { .flags.ignore : true } }; }
     360        _Istream_Cstr ignore( char s[] ) { return (_Istream_Cstr)@{ s, 0p, -1, { .flags.ignore : true } }; }
    361361        _Istream_Cstr & ignore( _Istream_Cstr & fmt ) { fmt.flags.ignore = true; return fmt; }
    362362        _Istream_Cstr wdi( unsigned int w, char s[] ) { return (_Istream_Cstr)@{ s, 0p, w, { .all : 0 } }; }
     
    370370
    371371static inline {
    372         _Istream_Char ignore( const char c ) { return (_Istream_Char)@{ true }; }
     372        _Istream_Char ignore( const char ) { return (_Istream_Char)@{ true }; }
    373373        _Istream_Char & ignore( _Istream_Char & fmt ) { fmt.ignore = true; return fmt; }
    374374} // distribution
    375375forall( dtype istype | istream( istype ) ) istype & ?|?( istype & is, _Istream_Char f );
    376376
    377 forall( otype T )
     377forall( dtype T | sized( T ) )
    378378struct _Istream_Manip {
    379379        T & val;                                                                                        // polymorphic base-type
     
    413413
    414414
    415 //*********************************** time ***********************************
     415// *********************************** time ***********************************
    416416
    417417
  • libcfa/src/math.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Mon Apr 18 23:37:04 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 10:27:11 2020
    13 // Update Count     : 117
     12// Last Modified On : Mon Aug 24 08:56:20 2020
     13// Update Count     : 126
    1414//
    1515
     
    1919#include <complex.h>
    2020
     21//---------------------------------------
     22
     23#include "common.hfa"
     24
    2125//---------------------- General ----------------------
    2226
    23 static inline float ?%?( float x, float y ) { return fmodf( x, y ); }
    24 static inline float fmod( float x, float y ) { return fmodf( x, y ); }
    25 static inline double ?%?( double x, double y ) { return fmod( x, y ); }
    26 // extern "C" { double fmod( double, double ); }
    27 static inline long double ?%?( long double x, long double y ) { return fmodl( x, y ); }
    28 static inline long double fmod( long double x, long double y ) { return fmodl( x, y ); }
    29 
    30 static inline float remainder( float x, float y ) { return remainderf( x, y ); }
    31 // extern "C" { double remainder( double, double ); }
    32 static inline long double remainder( long double x, long double y ) { return remainderl( x, y ); }
    33 
    34 static inline float remquo( float x, float y, int * quo ) { return remquof( x, y, quo ); }
    35 // extern "C" { double remquo( double x, double y, int * quo ); }
    36 static inline long double remquo( long double x, long double y, int * quo ) { return remquol( x, y, quo ); }
    37 static inline [ int, float ] remquo( float x, float y ) { int quo; x = remquof( x, y, &quo ); return [ quo, x ]; }
    38 static inline [ int, double ] remquo( double x, double y ) { int quo; x = remquo( x, y, &quo ); return [ quo, x ]; }
    39 static inline [ int, long double ] remquo( long double x, long double y ) { int quo; x = remquol( x, y, &quo ); return [ quo, x ]; }
    40 
    41 static inline [ float, float ] div( float x, float y ) { y = modff( x / y, &x ); return [ x, y ]; }
    42 static inline [ double, double ] div( double x, double y ) { y = modf( x / y, &x ); return [ x, y ]; }
    43 static inline [ long double, long double ] div( long double x, long double y ) { y = modfl( x / y, &x ); return [ x, y ]; }
    44 
    45 static inline float fma( float x, float y, float z ) { return fmaf( x, y, z ); }
    46 // extern "C" { double fma( double, double, double ); }
    47 static inline long double fma( long double x, long double y, long double z ) { return fmal( x, y, z ); }
    48 
    49 static inline float fdim( float x, float y ) { return fdimf( x, y ); }
    50 // extern "C" { double fdim( double, double ); }
    51 static inline long double fdim( long double x, long double y ) { return fdiml( x, y ); }
    52 
    53 static inline float nan( const char tag[] ) { return nanf( tag ); }
    54 // extern "C" { double nan( const char [] ); }
    55 static inline long double nan( const char tag[] ) { return nanl( tag ); }
     27static inline {
     28        float ?%?( float x, float y ) { return fmodf( x, y ); }
     29        float fmod( float x, float y ) { return fmodf( x, y ); }
     30        double ?%?( double x, double y ) { return fmod( x, y ); }
     31        // extern "C" { double fmod( double, double ); }
     32        long double ?%?( long double x, long double y ) { return fmodl( x, y ); }
     33        long double fmod( long double x, long double y ) { return fmodl( x, y ); }
     34
     35        float remainder( float x, float y ) { return remainderf( x, y ); }
     36        // extern "C" { double remainder( double, double ); }
     37        long double remainder( long double x, long double y ) { return remainderl( x, y ); }
     38
     39        float remquo( float x, float y, int * quo ) { return remquof( x, y, quo ); }
     40        // extern "C" { double remquo( double x, double y, int * quo ); }
     41        long double remquo( long double x, long double y, int * quo ) { return remquol( x, y, quo ); }
     42        [ int, float ] remquo( float x, float y ) { int quo; x = remquof( x, y, &quo ); return [ quo, x ]; }
     43        [ int, double ] remquo( double x, double y ) { int quo; x = remquo( x, y, &quo ); return [ quo, x ]; }
     44        [ int, long double ] remquo( long double x, long double y ) { int quo; x = remquol( x, y, &quo ); return [ quo, x ]; }
     45
     46        [ float, float ] div( float x, float y ) { y = modff( x / y, &x ); return [ x, y ]; }
     47        [ double, double ] div( double x, double y ) { y = modf( x / y, &x ); return [ x, y ]; }
     48        [ long double, long double ] div( long double x, long double y ) { y = modfl( x / y, &x ); return [ x, y ]; }
     49
     50        float fma( float x, float y, float z ) { return fmaf( x, y, z ); }
     51        // extern "C" { double fma( double, double, double ); }
     52        long double fma( long double x, long double y, long double z ) { return fmal( x, y, z ); }
     53
     54        float fdim( float x, float y ) { return fdimf( x, y ); }
     55        // extern "C" { double fdim( double, double ); }
     56        long double fdim( long double x, long double y ) { return fdiml( x, y ); }
     57
     58        float nan( const char tag[] ) { return nanf( tag ); }
     59        // extern "C" { double nan( const char [] ); }
     60        long double nan( const char tag[] ) { return nanl( tag ); }
     61} // distribution
    5662
    5763//---------------------- Exponential ----------------------
    5864
    59 static inline float exp( float x ) { return expf( x ); }
    60 // extern "C" { double exp( double ); }
    61 static inline long double exp( long double x ) { return expl( x ); }
    62 static inline float _Complex exp( float _Complex x ) { return cexpf( x ); }
    63 static inline double _Complex exp( double _Complex x ) { return cexp( x ); }
    64 static inline long double _Complex exp( long double _Complex x ) { return cexpl( x ); }
    65 
    66 static inline float exp2( float x ) { return exp2f( x ); }
    67 // extern "C" { double exp2( double ); }
    68 static inline long double exp2( long double x ) { return exp2l( x ); }
    69 //static inline float _Complex exp2( float _Complex x ) { return cexp2f( x ); }
    70 //static inline double _Complex exp2( double _Complex x ) { return cexp2( x ); }
    71 //static inline long double _Complex exp2( long double _Complex x ) { return cexp2l( x ); }
    72 
    73 static inline float expm1( float x ) { return expm1f( x ); }
    74 // extern "C" { double expm1( double ); }
    75 static inline long double expm1( long double x ) { return expm1l( x ); }
    76 
    77 static inline float pow( float x, float y ) { return powf( x, y ); }
    78 // extern "C" { double pow( double, double ); }
    79 static inline long double pow( long double x, long double y ) { return powl( x, y ); }
    80 static inline float _Complex pow( float _Complex x, float _Complex y ) { return cpowf( x, y ); }
    81 static inline double _Complex pow( double _Complex x, double _Complex y ) { return cpow( x, y ); }
    82 static inline long double _Complex pow( long double _Complex x, long double _Complex y ) { return cpowl( x, y ); }
     65static inline {
     66        float exp( float x ) { return expf( x ); }
     67        // extern "C" { double exp( double ); }
     68        long double exp( long double x ) { return expl( x ); }
     69        float _Complex exp( float _Complex x ) { return cexpf( x ); }
     70        double _Complex exp( double _Complex x ) { return cexp( x ); }
     71        long double _Complex exp( long double _Complex x ) { return cexpl( x ); }
     72
     73        float exp2( float x ) { return exp2f( x ); }
     74        // extern "C" { double exp2( double ); }
     75        long double exp2( long double x ) { return exp2l( x ); }
     76        //float _Complex exp2( float _Complex x ) { return cexp2f( x ); }
     77        //double _Complex exp2( double _Complex x ) { return cexp2( x ); }
     78        //long double _Complex exp2( long double _Complex x ) { return cexp2l( x ); }
     79
     80        float expm1( float x ) { return expm1f( x ); }
     81        // extern "C" { double expm1( double ); }
     82        long double expm1( long double x ) { return expm1l( x ); }
     83
     84        float pow( float x, float y ) { return powf( x, y ); }
     85        // extern "C" { double pow( double, double ); }
     86        long double pow( long double x, long double y ) { return powl( x, y ); }
     87        float _Complex pow( float _Complex x, float _Complex y ) { return cpowf( x, y ); }
     88        double _Complex pow( double _Complex x, double _Complex y ) { return cpow( x, y ); }
     89        long double _Complex pow( long double _Complex x, long double _Complex y ) { return cpowl( x, y ); }
     90} // distribution
    8391
    8492//---------------------- Logarithm ----------------------
    8593
    86 static inline float log( float x ) { return logf( x ); }
    87 // extern "C" { double log( double ); }
    88 static inline long double log( long double x ) { return logl( x ); }
    89 static inline float _Complex log( float _Complex x ) { return clogf( x ); }
    90 static inline double _Complex log( double _Complex x ) { return clog( x ); }
    91 static inline long double _Complex log( long double _Complex x ) { return clogl( x ); }
    92 
    93 static inline float log2( float x ) { return log2f( x ); }
    94 // extern "C" { double log2( double ); }
    95 static inline long double log2( long double x ) { return log2l( x ); }
    96 // static inline float _Complex log2( float _Complex x ) { return clog2f( x ); }
    97 // static inline double _Complex log2( double _Complex x ) { return clog2( x ); }
    98 // static inline long double _Complex log2( long double _Complex x ) { return clog2l( x ); }
    99 
    100 static inline float log10( float x ) { return log10f( x ); }
    101 // extern "C" { double log10( double ); }
    102 static inline long double log10( long double x ) { return log10l( x ); }
    103 // static inline float _Complex log10( float _Complex x ) { return clog10f( x ); }
    104 // static inline double _Complex log10( double _Complex x ) { return clog10( x ); }
    105 // static inline long double _Complex log10( long double _Complex x ) { return clog10l( x ); }
    106 
    107 static inline float log1p( float x ) { return log1pf( x ); }
    108 // extern "C" { double log1p( double ); }
    109 static inline long double log1p( long double x ) { return log1pl( x ); }
    110 
    111 static inline int ilogb( float x ) { return ilogbf( x ); }
    112 // extern "C" { int ilogb( double ); }
    113 static inline int ilogb( long double x ) { return ilogbl( x ); }
    114 
    115 static inline float logb( float x ) { return logbf( x ); }
    116 // extern "C" { double logb( double ); }
    117 static inline long double logb( long double x ) { return logbl( x ); }
    118 
    119 static inline float sqrt( float x ) { return sqrtf( x ); }
    120 // extern "C" { double sqrt( double ); }
    121 static inline long double sqrt( long double x ) { return sqrtl( x ); }
    122 static inline float _Complex sqrt( float _Complex x ) { return csqrtf( x ); }
    123 static inline double _Complex sqrt( double _Complex x ) { return csqrt( x ); }
    124 static inline long double _Complex sqrt( long double _Complex x ) { return csqrtl( x ); }
    125 
    126 static inline float cbrt( float x ) { return cbrtf( x ); }
    127 // extern "C" { double cbrt( double ); }
    128 static inline long double cbrt( long double x ) { return cbrtl( x ); }
    129 
    130 static inline float hypot( float x, float y ) { return hypotf( x, y ); }
    131 // extern "C" { double hypot( double, double ); }
    132 static inline long double hypot( long double x, long double y ) { return hypotl( x, y ); }
     94static inline {
     95        float log( float x ) { return logf( x ); }
     96        // extern "C" { double log( double ); }
     97        long double log( long double x ) { return logl( x ); }
     98        float _Complex log( float _Complex x ) { return clogf( x ); }
     99        double _Complex log( double _Complex x ) { return clog( x ); }
     100        long double _Complex log( long double _Complex x ) { return clogl( x ); }
     101
     102        float log2( float x ) { return log2f( x ); }
     103        // extern "C" { double log2( double ); }
     104        long double log2( long double x ) { return log2l( x ); }
     105        // float _Complex log2( float _Complex x ) { return clog2f( x ); }
     106        // double _Complex log2( double _Complex x ) { return clog2( x ); }
     107        // long double _Complex log2( long double _Complex x ) { return clog2l( x ); }
     108
     109        float log10( float x ) { return log10f( x ); }
     110        // extern "C" { double log10( double ); }
     111        long double log10( long double x ) { return log10l( x ); }
     112        // float _Complex log10( float _Complex x ) { return clog10f( x ); }
     113        // double _Complex log10( double _Complex x ) { return clog10( x ); }
     114        // long double _Complex log10( long double _Complex x ) { return clog10l( x ); }
     115
     116        float log1p( float x ) { return log1pf( x ); }
     117        // extern "C" { double log1p( double ); }
     118        long double log1p( long double x ) { return log1pl( x ); }
     119
     120        int ilogb( float x ) { return ilogbf( x ); }
     121        // extern "C" { int ilogb( double ); }
     122        int ilogb( long double x ) { return ilogbl( x ); }
     123
     124        float logb( float x ) { return logbf( x ); }
     125        // extern "C" { double logb( double ); }
     126        long double logb( long double x ) { return logbl( x ); }
     127
     128        float sqrt( float x ) { return sqrtf( x ); }
     129        // extern "C" { double sqrt( double ); }
     130        long double sqrt( long double x ) { return sqrtl( x ); }
     131        float _Complex sqrt( float _Complex x ) { return csqrtf( x ); }
     132        double _Complex sqrt( double _Complex x ) { return csqrt( x ); }
     133        long double _Complex sqrt( long double _Complex x ) { return csqrtl( x ); }
     134
     135        float cbrt( float x ) { return cbrtf( x ); }
     136        // extern "C" { double cbrt( double ); }
     137        long double cbrt( long double x ) { return cbrtl( x ); }
     138
     139        float hypot( float x, float y ) { return hypotf( x, y ); }
     140        // extern "C" { double hypot( double, double ); }
     141        long double hypot( long double x, long double y ) { return hypotl( x, y ); }
     142} // distribution
    133143
    134144//---------------------- Trigonometric ----------------------
    135145
    136 static inline float sin( float x ) { return sinf( x ); }
    137 // extern "C" { double sin( double ); }
    138 static inline long double sin( long double x ) { return sinl( x ); }
    139 static inline float _Complex sin( float _Complex x ) { return csinf( x ); }
    140 static inline double _Complex sin( double _Complex x ) { return csin( x ); }
    141 static inline long double _Complex sin( long double _Complex x ) { return csinl( x ); }
    142 
    143 static inline float cos( float x ) { return cosf( x ); }
    144 // extern "C" { double cos( double ); }
    145 static inline long double cos( long double x ) { return cosl( x ); }
    146 static inline float _Complex cos( float _Complex x ) { return ccosf( x ); }
    147 static inline double _Complex cos( double _Complex x ) { return ccos( x ); }
    148 static inline long double _Complex cos( long double _Complex x ) { return ccosl( x ); }
    149 
    150 static inline float tan( float x ) { return tanf( x ); }
    151 // extern "C" { double tan( double ); }
    152 static inline long double tan( long double x ) { return tanl( x ); }
    153 static inline float _Complex tan( float _Complex x ) { return ctanf( x ); }
    154 static inline double _Complex tan( double _Complex x ) { return ctan( x ); }
    155 static inline long double _Complex tan( long double _Complex x ) { return ctanl( x ); }
    156 
    157 static inline float asin( float x ) { return asinf( x ); }
    158 // extern "C" { double asin( double ); }
    159 static inline long double asin( long double x ) { return asinl( x ); }
    160 static inline float _Complex asin( float _Complex x ) { return casinf( x ); }
    161 static inline double _Complex asin( double _Complex x ) { return casin( x ); }
    162 static inline long double _Complex asin( long double _Complex x ) { return casinl( x ); }
    163 
    164 static inline float acos( float x ) { return acosf( x ); }
    165 // extern "C" { double acos( double ); }
    166 static inline long double acos( long double x ) { return acosl( x ); }
    167 static inline float _Complex acos( float _Complex x ) { return cacosf( x ); }
    168 static inline double _Complex acos( double _Complex x ) { return cacos( x ); }
    169 static inline long double _Complex acos( long double _Complex x ) { return cacosl( x ); }
    170 
    171 static inline float atan( float x ) { return atanf( x ); }
    172 // extern "C" { double atan( double ); }
    173 static inline long double atan( long double x ) { return atanl( x ); }
    174 static inline float _Complex atan( float _Complex x ) { return catanf( x ); }
    175 static inline double _Complex atan( double _Complex x ) { return catan( x ); }
    176 static inline long double _Complex atan( long double _Complex x ) { return catanl( x ); }
    177 
    178 static inline float atan2( float x, float y ) { return atan2f( x, y ); }
    179 // extern "C" { double atan2( double, double ); }
    180 static inline long double atan2( long double x, long double y ) { return atan2l( x, y ); }
    181 
    182 // alternative name for atan2
    183 static inline float atan( float x, float y ) { return atan2f( x, y ); }
    184 static inline double atan( double x, double y ) { return atan2( x, y ); }
    185 static inline long double atan( long double x, long double y ) { return atan2l( x, y ); }
     146static inline {
     147        float sin( float x ) { return sinf( x ); }
     148        // extern "C" { double sin( double ); }
     149        long double sin( long double x ) { return sinl( x ); }
     150        float _Complex sin( float _Complex x ) { return csinf( x ); }
     151        double _Complex sin( double _Complex x ) { return csin( x ); }
     152        long double _Complex sin( long double _Complex x ) { return csinl( x ); }
     153
     154        float cos( float x ) { return cosf( x ); }
     155        // extern "C" { double cos( double ); }
     156        long double cos( long double x ) { return cosl( x ); }
     157        float _Complex cos( float _Complex x ) { return ccosf( x ); }
     158        double _Complex cos( double _Complex x ) { return ccos( x ); }
     159        long double _Complex cos( long double _Complex x ) { return ccosl( x ); }
     160
     161        float tan( float x ) { return tanf( x ); }
     162        // extern "C" { double tan( double ); }
     163        long double tan( long double x ) { return tanl( x ); }
     164        float _Complex tan( float _Complex x ) { return ctanf( x ); }
     165        double _Complex tan( double _Complex x ) { return ctan( x ); }
     166        long double _Complex tan( long double _Complex x ) { return ctanl( x ); }
     167
     168        float asin( float x ) { return asinf( x ); }
     169        // extern "C" { double asin( double ); }
     170        long double asin( long double x ) { return asinl( x ); }
     171        float _Complex asin( float _Complex x ) { return casinf( x ); }
     172        double _Complex asin( double _Complex x ) { return casin( x ); }
     173        long double _Complex asin( long double _Complex x ) { return casinl( x ); }
     174
     175        float acos( float x ) { return acosf( x ); }
     176        // extern "C" { double acos( double ); }
     177        long double acos( long double x ) { return acosl( x ); }
     178        float _Complex acos( float _Complex x ) { return cacosf( x ); }
     179        double _Complex acos( double _Complex x ) { return cacos( x ); }
     180        long double _Complex acos( long double _Complex x ) { return cacosl( x ); }
     181
     182        float atan( float x ) { return atanf( x ); }
     183        // extern "C" { double atan( double ); }
     184        long double atan( long double x ) { return atanl( x ); }
     185        float _Complex atan( float _Complex x ) { return catanf( x ); }
     186        double _Complex atan( double _Complex x ) { return catan( x ); }
     187        long double _Complex atan( long double _Complex x ) { return catanl( x ); }
     188
     189        float atan2( float x, float y ) { return atan2f( x, y ); }
     190        // extern "C" { double atan2( double, double ); }
     191        long double atan2( long double x, long double y ) { return atan2l( x, y ); }
     192
     193        // alternative name for atan2
     194        float atan( float x, float y ) { return atan2f( x, y ); }
     195        double atan( double x, double y ) { return atan2( x, y ); }
     196        long double atan( long double x, long double y ) { return atan2l( x, y ); }
     197} // distribution
    186198
    187199//---------------------- Hyperbolic ----------------------
    188200
    189 static inline float sinh( float x ) { return sinhf( x ); }
    190 // extern "C" { double sinh( double ); }
    191 static inline long double sinh( long double x ) { return sinhl( x ); }
    192 static inline float _Complex sinh( float _Complex x ) { return csinhf( x ); }
    193 static inline double _Complex sinh( double _Complex x ) { return csinh( x ); }
    194 static inline long double _Complex sinh( long double _Complex x ) { return csinhl( x ); }
    195 
    196 static inline float cosh( float x ) { return coshf( x ); }
    197 // extern "C" { double cosh( double ); }
    198 static inline long double cosh( long double x ) { return coshl( x ); }
    199 static inline float _Complex cosh( float _Complex x ) { return ccoshf( x ); }
    200 static inline double _Complex cosh( double _Complex x ) { return ccosh( x ); }
    201 static inline long double _Complex cosh( long double _Complex x ) { return ccoshl( x ); }
    202 
    203 static inline float tanh( float x ) { return tanhf( x ); }
    204 // extern "C" { double tanh( double ); }
    205 static inline long double tanh( long double x ) { return tanhl( x ); }
    206 static inline float _Complex tanh( float _Complex x ) { return ctanhf( x ); }
    207 static inline double _Complex tanh( double _Complex x ) { return ctanh( x ); }
    208 static inline long double _Complex tanh( long double _Complex x ) { return ctanhl( x ); }
    209 
    210 static inline float asinh( float x ) { return asinhf( x ); }
    211 // extern "C" { double asinh( double ); }
    212 static inline long double asinh( long double x ) { return asinhl( x ); }
    213 static inline float _Complex asinh( float _Complex x ) { return casinhf( x ); }
    214 static inline double _Complex asinh( double _Complex x ) { return casinh( x ); }
    215 static inline long double _Complex asinh( long double _Complex x ) { return casinhl( x ); }
    216 
    217 static inline float acosh( float x ) { return acoshf( x ); }
    218 // extern "C" { double acosh( double ); }
    219 static inline long double acosh( long double x ) { return acoshl( x ); }
    220 static inline float _Complex acosh( float _Complex x ) { return cacoshf( x ); }
    221 static inline double _Complex acosh( double _Complex x ) { return cacosh( x ); }
    222 static inline long double _Complex acosh( long double _Complex x ) { return cacoshl( x ); }
    223 
    224 static inline float atanh( float x ) { return atanhf( x ); }
    225 // extern "C" { double atanh( double ); }
    226 static inline long double atanh( long double x ) { return atanhl( x ); }
    227 static inline float _Complex atanh( float _Complex x ) { return catanhf( x ); }
    228 static inline double _Complex atanh( double _Complex x ) { return catanh( x ); }
    229 static inline long double _Complex atanh( long double _Complex x ) { return catanhl( x ); }
     201static inline {
     202        float sinh( float x ) { return sinhf( x ); }
     203        // extern "C" { double sinh( double ); }
     204        long double sinh( long double x ) { return sinhl( x ); }
     205        float _Complex sinh( float _Complex x ) { return csinhf( x ); }
     206        double _Complex sinh( double _Complex x ) { return csinh( x ); }
     207        long double _Complex sinh( long double _Complex x ) { return csinhl( x ); }
     208
     209        float cosh( float x ) { return coshf( x ); }
     210        // extern "C" { double cosh( double ); }
     211        long double cosh( long double x ) { return coshl( x ); }
     212        float _Complex cosh( float _Complex x ) { return ccoshf( x ); }
     213        double _Complex cosh( double _Complex x ) { return ccosh( x ); }
     214        long double _Complex cosh( long double _Complex x ) { return ccoshl( x ); }
     215
     216        float tanh( float x ) { return tanhf( x ); }
     217        // extern "C" { double tanh( double ); }
     218        long double tanh( long double x ) { return tanhl( x ); }
     219        float _Complex tanh( float _Complex x ) { return ctanhf( x ); }
     220        double _Complex tanh( double _Complex x ) { return ctanh( x ); }
     221        long double _Complex tanh( long double _Complex x ) { return ctanhl( x ); }
     222
     223        float asinh( float x ) { return asinhf( x ); }
     224        // extern "C" { double asinh( double ); }
     225        long double asinh( long double x ) { return asinhl( x ); }
     226        float _Complex asinh( float _Complex x ) { return casinhf( x ); }
     227        double _Complex asinh( double _Complex x ) { return casinh( x ); }
     228        long double _Complex asinh( long double _Complex x ) { return casinhl( x ); }
     229
     230        float acosh( float x ) { return acoshf( x ); }
     231        // extern "C" { double acosh( double ); }
     232        long double acosh( long double x ) { return acoshl( x ); }
     233        float _Complex acosh( float _Complex x ) { return cacoshf( x ); }
     234        double _Complex acosh( double _Complex x ) { return cacosh( x ); }
     235        long double _Complex acosh( long double _Complex x ) { return cacoshl( x ); }
     236
     237        float atanh( float x ) { return atanhf( x ); }
     238        // extern "C" { double atanh( double ); }
     239        long double atanh( long double x ) { return atanhl( x ); }
     240        float _Complex atanh( float _Complex x ) { return catanhf( x ); }
     241        double _Complex atanh( double _Complex x ) { return catanh( x ); }
     242        long double _Complex atanh( long double _Complex x ) { return catanhl( x ); }
     243} // distribution
    230244
    231245//---------------------- Error / Gamma ----------------------
    232246
    233 static inline float erf( float x ) { return erff( x ); }
    234 // extern "C" { double erf( double ); }
    235 static inline long double erf( long double x ) { return erfl( x ); }
    236 // float _Complex erf( float _Complex );
    237 // double _Complex erf( double _Complex );
    238 // long double _Complex erf( long double _Complex );
    239 
    240 static inline float erfc( float x ) { return erfcf( x ); }
    241 // extern "C" { double erfc( double ); }
    242 static inline long double erfc( long double x ) { return erfcl( x ); }
    243 // float _Complex erfc( float _Complex );
    244 // double _Complex erfc( double _Complex );
    245 // long double _Complex erfc( long double _Complex );
    246 
    247 static inline float lgamma( float x ) { return lgammaf( x ); }
    248 // extern "C" { double lgamma( double ); }
    249 static inline long double lgamma( long double x ) { return lgammal( x ); }
    250 static inline float lgamma( float x, int * sign ) { return lgammaf_r( x, sign ); }
    251 static inline double lgamma( double x, int * sign ) { return lgamma_r( x, sign ); }
    252 static inline long double lgamma( long double x, int * sign ) { return lgammal_r( x, sign ); }
    253 
    254 static inline float tgamma( float x ) { return tgammaf( x ); }
    255 // extern "C" { double tgamma( double ); }
    256 static inline long double tgamma( long double x ) { return tgammal( x ); }
     247static inline {
     248        float erf( float x ) { return erff( x ); }
     249        // extern "C" { double erf( double ); }
     250        long double erf( long double x ) { return erfl( x ); }
     251        // float _Complex erf( float _Complex );
     252        // double _Complex erf( double _Complex );
     253        // long double _Complex erf( long double _Complex );
     254
     255        float erfc( float x ) { return erfcf( x ); }
     256        // extern "C" { double erfc( double ); }
     257        long double erfc( long double x ) { return erfcl( x ); }
     258        // float _Complex erfc( float _Complex );
     259        // double _Complex erfc( double _Complex );
     260        // long double _Complex erfc( long double _Complex );
     261
     262        float lgamma( float x ) { return lgammaf( x ); }
     263        // extern "C" { double lgamma( double ); }
     264        long double lgamma( long double x ) { return lgammal( x ); }
     265        float lgamma( float x, int * sign ) { return lgammaf_r( x, sign ); }
     266        double lgamma( double x, int * sign ) { return lgamma_r( x, sign ); }
     267        long double lgamma( long double x, int * sign ) { return lgammal_r( x, sign ); }
     268
     269        float tgamma( float x ) { return tgammaf( x ); }
     270        // extern "C" { double tgamma( double ); }
     271        long double tgamma( long double x ) { return tgammal( x ); }
     272} // distribution
    257273
    258274//---------------------- Nearest Integer ----------------------
    259275
    260 static inline float floor( float x ) { return floorf( x ); }
    261 // extern "C" { double floor( double ); }
    262 static inline long double floor( long double x ) { return floorl( x ); }
    263 
    264 static inline float ceil( float x ) { return ceilf( x ); }
    265 // extern "C" { double ceil( double ); }
    266 static inline long double ceil( long double x ) { return ceill( x ); }
    267 
    268 static inline float trunc( float x ) { return truncf( x ); }
    269 // extern "C" { double trunc( double ); }
    270 static inline long double trunc( long double x ) { return truncl( x ); }
    271 
    272 static inline float rint( float x ) { return rintf( x ); }
    273 // extern "C" { double rint( double x ); }
    274 static inline long double rint( long double x ) { return rintl( x ); }
    275 static inline long int rint( float x ) { return lrintf( x ); }
    276 static inline long int rint( double x ) { return lrint( x ); }
    277 static inline long int rint( long double x ) { return lrintl( x ); }
    278 static inline long long int rint( float x ) { return llrintf( x ); }
    279 static inline long long int rint( double x ) { return llrint( x ); }
    280 static inline long long int rint( long double x ) { return llrintl( x ); }
    281 
    282 static inline long int lrint( float x ) { return lrintf( x ); }
    283 // extern "C" { long int lrint( double ); }
    284 static inline long int lrint( long double x ) { return lrintl( x ); }
    285 static inline long long int llrint( float x ) { return llrintf( x ); }
    286 // extern "C" { long long int llrint( double ); }
    287 static inline long long int llrint( long double x ) { return llrintl( x ); }
    288 
    289 static inline float nearbyint( float x ) { return nearbyintf( x ); }
    290 // extern "C" { double nearbyint( double ); }
    291 static inline long double nearbyint( long double x ) { return nearbyintl( x ); }
    292 
    293 static inline float round( float x ) { return roundf( x ); }
    294 // extern "C" { double round( double x ); }
    295 static inline long double round( long double x ) { return roundl( x ); }
    296 static inline long int round( float x ) { return lroundf( x ); }
    297 static inline long int round( double x ) { return lround( x ); }
    298 static inline long int round( long double x ) { return lroundl( x ); }
    299 static inline long long int round( float x ) { return llroundf( x ); }
    300 static inline long long int round( double x ) { return llround( x ); }
    301 static inline long long int round( long double x ) { return llroundl( x ); }
    302 
    303 static inline long int lround( float x ) { return lroundf( x ); }
    304 // extern "C" { long int lround( double ); }
    305 static inline long int lround( long double x ) { return lroundl( x ); }
    306 static inline long long int llround( float x ) { return llroundf( x ); }
    307 // extern "C" { long long int llround( double ); }
    308 static inline long long int llround( long double x ) { return llroundl( x ); }
     276static inline {
     277        signed char floor( signed char n, signed char align ) { return n / align * align; }
     278        unsigned char floor( unsigned char n, unsigned char align ) { return n / align * align; }
     279        short int floor( short int n, short int align ) { return n / align * align; }
     280        unsigned short int floor( unsigned short int n, unsigned short int align ) { return n / align * align; }
     281        int floor( int n, int align ) { return n / align * align; }
     282        unsigned int floor( unsigned int n, unsigned int align ) { return n / align * align; }
     283        long int floor( long int n, long int align ) { return n / align * align; }
     284        unsigned long int floor( unsigned long int n, unsigned long int align ) { return n / align * align; }
     285        long long int floor( long long int n, long long int align ) { return n / align * align; }
     286        unsigned long long int floor( unsigned long long int n, unsigned long long int align ) { return n / align * align; }
     287
     288        // forall( otype T | { T ?/?( T, T ); T ?*?( T, T ); } )
     289        // T floor( T n, T align ) { return n / align * align; }
     290
     291        signed char ceiling_div( signed char n, char align ) { return (n + (align - 1)) / align; }
     292        unsigned char ceiling_div( unsigned char n, unsigned char align ) { return (n + (align - 1)) / align; }
     293        short int ceiling_div( short int n, short int align ) { return (n + (align - 1)) / align; }
     294        unsigned short int ceiling_div( unsigned short int n, unsigned short int align ) { return (n + (align - 1)) / align; }
     295        int ceiling_div( int n, int align ) { return (n + (align - 1)) / align; }
     296        unsigned int ceiling_div( unsigned int n, unsigned int align ) { return (n + (align - 1)) / align; }
     297        long int ceiling_div( long int n, long int align ) { return (n + (align - 1)) / align; }
     298        unsigned long int ceiling_div( unsigned long int n, unsigned long int align ) { return (n + (align - 1)) / align; }
     299        long long int ceiling_div( long long int n, long long int align ) { return (n + (align - 1)) / align; }
     300        unsigned long long int ceiling_div( unsigned long long int n, unsigned long long int align ) { return (n + (align - 1)) / align; }
     301
     302        // forall( otype T | { T ?+?( T, T ); T ?-?( T, T ); T ?%?( T, T ); } )
     303        // T ceiling_div( T n, T align ) { verify( is_pow2( align ) );return (n + (align - 1)) / align; }
     304       
     305        // gcc notices the div/mod pair and saves both so only one div.
     306        signed char ceiling( signed char n, signed char align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     307        unsigned char ceiling( unsigned char n, unsigned char align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     308        short int ceiling( short int n, short int align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     309        unsigned short int ceiling( unsigned short int n, unsigned short int align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     310        int ceiling( int n, int align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     311        unsigned int ceiling( unsigned int n, unsigned int align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     312        long int ceiling( long int n, long int align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     313        unsigned long int ceiling( unsigned long int n, unsigned long int align ) { return floor( n + (n % align != 0 ? align - 1 : 0) , align); }
     314        long long int ceiling( long long int n, long long int align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     315        unsigned long long int ceiling( unsigned long long int n, unsigned long long int align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); }
     316
     317        // forall( otype T | { void ?{}( T &, one_t ); T ?+?( T, T ); T ?-?( T, T ); T ?/?( T, T ); } )
     318        // T ceiling( T n, T align ) { return return floor( n + (n % align != 0 ? align - 1 : 0), align ); *}
     319
     320        float floor( float x ) { return floorf( x ); }
     321        // extern "C" { double floor( double ); }
     322        long double floor( long double x ) { return floorl( x ); }
     323
     324        float ceil( float x ) { return ceilf( x ); }
     325        // extern "C" { double ceil( double ); }
     326        long double ceil( long double x ) { return ceill( x ); }
     327
     328        float trunc( float x ) { return truncf( x ); }
     329        // extern "C" { double trunc( double ); }
     330        long double trunc( long double x ) { return truncl( x ); }
     331
     332        float rint( float x ) { return rintf( x ); }
     333        // extern "C" { double rint( double x ); }
     334        long double rint( long double x ) { return rintl( x ); }
     335        long int rint( float x ) { return lrintf( x ); }
     336        long int rint( double x ) { return lrint( x ); }
     337        long int rint( long double x ) { return lrintl( x ); }
     338        long long int rint( float x ) { return llrintf( x ); }
     339        long long int rint( double x ) { return llrint( x ); }
     340        long long int rint( long double x ) { return llrintl( x ); }
     341
     342        long int lrint( float x ) { return lrintf( x ); }
     343        // extern "C" { long int lrint( double ); }
     344        long int lrint( long double x ) { return lrintl( x ); }
     345        long long int llrint( float x ) { return llrintf( x ); }
     346        // extern "C" { long long int llrint( double ); }
     347        long long int llrint( long double x ) { return llrintl( x ); }
     348
     349        float nearbyint( float x ) { return nearbyintf( x ); }
     350        // extern "C" { double nearbyint( double ); }
     351        long double nearbyint( long double x ) { return nearbyintl( x ); }
     352
     353        float round( float x ) { return roundf( x ); }
     354        // extern "C" { double round( double x ); }
     355        long double round( long double x ) { return roundl( x ); }
     356        long int round( float x ) { return lroundf( x ); }
     357        long int round( double x ) { return lround( x ); }
     358        long int round( long double x ) { return lroundl( x ); }
     359        long long int round( float x ) { return llroundf( x ); }
     360        long long int round( double x ) { return llround( x ); }
     361        long long int round( long double x ) { return llroundl( x ); }
     362
     363        long int lround( float x ) { return lroundf( x ); }
     364        // extern "C" { long int lround( double ); }
     365        long int lround( long double x ) { return lroundl( x ); }
     366        long long int llround( float x ) { return llroundf( x ); }
     367        // extern "C" { long long int llround( double ); }
     368        long long int llround( long double x ) { return llroundl( x ); }
     369} // distribution
    309370
    310371//---------------------- Manipulation ----------------------
    311372
    312 static inline float copysign( float x, float y ) { return copysignf( x, y ); }
    313 // extern "C" { double copysign( double, double ); }
    314 static inline long double copysign( long double x, long double y ) { return copysignl( x, y ); }
    315 
    316 static inline float frexp( float x, int * ip ) { return frexpf( x, ip ); }
    317 // extern "C" { double frexp( double, int * ); }
    318 static inline long double frexp( long double x, int * ip ) { return frexpl( x, ip ); }
    319 
    320 static inline float ldexp( float x, int exp2 ) { return ldexpf( x, exp2 ); }
    321 // extern "C" { double ldexp( double, int ); }
    322 static inline long double ldexp( long double x, int exp2 ) { return ldexpl( x, exp2 ); }
    323 
    324 static inline [ float, float ] modf( float x ) { float i; x = modff( x, &i ); return [ i, x ]; }
    325 static inline float modf( float x, float * i ) { return modff( x, i ); }
    326 static inline [ double, double ] modf( double x ) { double i; x = modf( x, &i ); return [ i, x ]; }
    327 // extern "C" { double modf( double, double * ); }
    328 static inline [ long double, long double ] modf( long double x ) { long double i; x = modfl( x, &i ); return [ i, x ]; }
    329 static inline long double modf( long double x, long double * i ) { return modfl( x, i ); }
    330 
    331 static inline float nextafter( float x, float y ) { return nextafterf( x, y ); }
    332 // extern "C" { double nextafter( double, double ); }
    333 static inline long double nextafter( long double x, long double y ) { return nextafterl( x, y ); }
    334 
    335 static inline float nexttoward( float x, long double y ) { return nexttowardf( x, y ); }
    336 // extern "C" { double nexttoward( double, long double ); }
    337 static inline long double nexttoward( long double x, long double y ) { return nexttowardl( x, y ); }
    338 
    339 static inline float scalbn( float x, int exp ) { return scalbnf( x, exp ); }
    340 // extern "C" { double scalbn( double, int ); }
    341 static inline long double scalbn( long double x, int exp ) { return scalbnl( x, exp ); }
    342 static inline float scalbn( float x, long int exp ) { return scalblnf( x, exp ); }
    343 static inline double scalbn( double x, long int exp ) { return scalbln( x, exp ); }
    344 static inline long double scalbn( long double x, long int exp ) { return scalblnl( x, exp ); }
    345 
    346 static inline float scalbln( float x, long int exp ) { return scalblnf( x, exp ); }
    347 // extern "C" { double scalbln( double, long int ); }
    348 static inline long double scalbln( long double x, long int exp ) { return scalblnl( x, exp ); }
     373static inline {
     374        float copysign( float x, float y ) { return copysignf( x, y ); }
     375        // extern "C" { double copysign( double, double ); }
     376        long double copysign( long double x, long double y ) { return copysignl( x, y ); }
     377
     378        float frexp( float x, int * ip ) { return frexpf( x, ip ); }
     379        // extern "C" { double frexp( double, int * ); }
     380        long double frexp( long double x, int * ip ) { return frexpl( x, ip ); }
     381
     382        float ldexp( float x, int exp2 ) { return ldexpf( x, exp2 ); }
     383        // extern "C" { double ldexp( double, int ); }
     384        long double ldexp( long double x, int exp2 ) { return ldexpl( x, exp2 ); }
     385
     386        [ float, float ] modf( float x ) { float i; x = modff( x, &i ); return [ i, x ]; }
     387        float modf( float x, float * i ) { return modff( x, i ); }
     388        [ double, double ] modf( double x ) { double i; x = modf( x, &i ); return [ i, x ]; }
     389        // extern "C" { double modf( double, double * ); }
     390        [ long double, long double ] modf( long double x ) { long double i; x = modfl( x, &i ); return [ i, x ]; }
     391        long double modf( long double x, long double * i ) { return modfl( x, i ); }
     392
     393        float nextafter( float x, float y ) { return nextafterf( x, y ); }
     394        // extern "C" { double nextafter( double, double ); }
     395        long double nextafter( long double x, long double y ) { return nextafterl( x, y ); }
     396
     397        float nexttoward( float x, long double y ) { return nexttowardf( x, y ); }
     398        // extern "C" { double nexttoward( double, long double ); }
     399        long double nexttoward( long double x, long double y ) { return nexttowardl( x, y ); }
     400
     401        float scalbn( float x, int exp ) { return scalbnf( x, exp ); }
     402        // extern "C" { double scalbn( double, int ); }
     403        long double scalbn( long double x, int exp ) { return scalbnl( x, exp ); }
     404        float scalbn( float x, long int exp ) { return scalblnf( x, exp ); }
     405        double scalbn( double x, long int exp ) { return scalbln( x, exp ); }
     406        long double scalbn( long double x, long int exp ) { return scalblnl( x, exp ); }
     407
     408        float scalbln( float x, long int exp ) { return scalblnf( x, exp ); }
     409        // extern "C" { double scalbln( double, long int ); }
     410        long double scalbln( long double x, long int exp ) { return scalblnl( x, exp ); }
     411} // distribution
    349412
    350413//---------------------------------------
    351414
    352 #include "common.hfa"
    353 
    354 //---------------------------------------
    355 
    356 forall( otype T | { void ?{}( T &, one_t ); T ?+?( T, T ); T ?-?( T, T );T ?*?( T, T ); } )
    357 T lerp( T x, T y, T a ) { return x * ((T){1} - a) + y * a; }
    358 
    359 forall( otype T | { void ?{}( T &, zero_t ); void ?{}( T &, one_t ); int ?<?( T, T ); } )
    360 T step( T edge, T x ) { return x < edge ? (T){0} : (T){1}; }
    361 
    362 forall( otype T | { void ?{}( T &, int ); T clamp( T, T, T ); T ?-?( T, T ); T ?*?( T, T ); T ?/?( T, T ); } )
    363 T smoothstep( T edge0, T edge1, T x ) { T t = clamp( (x - edge0) / (edge1 - edge0), (T){0}, (T){1} ); return t * t * ((T){3} - (T){2} * t); }
     415static inline {
     416        forall( otype T | { void ?{}( T &, one_t ); T ?+?( T, T ); T ?-?( T, T );T ?*?( T, T ); } )
     417        T lerp( T x, T y, T a ) { return x * ((T){1} - a) + y * a; }
     418
     419        forall( otype T | { void ?{}( T &, zero_t ); void ?{}( T &, one_t ); int ?<?( T, T ); } )
     420        T step( T edge, T x ) { return x < edge ? (T){0} : (T){1}; }
     421
     422        forall( otype T | { void ?{}( T &, int ); T clamp( T, T, T ); T ?-?( T, T ); T ?*?( T, T ); T ?/?( T, T ); } )
     423        T smoothstep( T edge0, T edge1, T x ) { T t = clamp( (x - edge0) / (edge1 - edge0), (T){0}, (T){1} ); return t * t * ((T){3} - (T){2} * t); }
     424} // distribution
    364425
    365426// Local Variables: //
  • libcfa/src/startup.cfa

    r3c64c668 r58fe85a  
    1414//
    1515
    16 #include <time.h>                                                                               // tzset
     16#include <time.h>                // tzset
     17#include <locale.h>        // setlocale
    1718#include "startup.hfa"
    1819
     
    2122    void __cfaabi_appready_startup( void ) {
    2223                tzset();                                                                                // initialize time global variables
     24                setlocale(LC_NUMERIC, "");
    2325                #ifdef __CFA_DEBUG__
    2426                extern void heapAppStart();
     
    4143struct __spinlock_t;
    4244extern "C" {
    43         void __cfaabi_dbg_record(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) {}
     45        void __cfaabi_dbg_record_lock(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) {}
    4446}
    4547
  • libcfa/src/stdhdr/assert.h

    r3c64c668 r58fe85a  
    3333        #define verify(x) assert(x)
    3434        #define verifyf(x, ...) assertf(x, __VA_ARGS__)
     35        #define verifyfail(...)
    3536        #define __CFA_WITH_VERIFY__
    3637#else
    3738        #define verify(x)
    3839        #define verifyf(x, ...)
     40        #define verifyfail(...)
    3941#endif
    4042
  • libcfa/src/stdhdr/malloc.h

    r3c64c668 r58fe85a  
    1010// Created On       : Thu Jul 20 15:58:16 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Aug 11 09:06:31 2018
    13 // Update Count     : 10
     12// Last Modified On : Wed May 27 14:13:14 2020
     13// Update Count     : 18
    1414//
    15 
    16 
    17 size_t default_mmap_start();                                                    // CFA extras
    18 size_t default_heap_expansion();
    19 
    20 bool traceHeap();
    21 bool traceHeapOn();
    22 bool traceHeapOff();
    23 
    24 bool traceHeapTerm();
    25 bool traceHeapTermOn();
    26 bool traceHeapTermOff();
    27 
    28 bool checkFree();
    29 bool checkFreeOn();
    30 bool checkFreeOff();
    31 
    32 extern "C" {
    33 size_t malloc_alignment( void * );
    34 bool malloc_zero_fill( void * );
    35 int malloc_stats_fd( int fd );
    36 void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );
    37 } // extern "C"
    3815
    3916extern "C" {
    4017#include_next <malloc.h>                                                                // has internal check for multiple expansion
    4118} // extern "C"
     19
     20#include <heap.hfa>
    4221
    4322// Local Variables: //
  • libcfa/src/stdlib.cfa

    r3c64c668 r58fe85a  
    1010// Created On       : Thu Jan 28 17:10:29 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 08:27:08 2020
    13 // Update Count     : 486
     12// Last Modified On : Thu Nov 12 07:46:09 2020
     13// Update Count     : 503
    1414//
    1515
     
    2020#define _XOPEN_SOURCE 600                                                               // posix_memalign, *rand48
    2121#include <string.h>                                                                             // memcpy, memset
    22 #include <malloc.h>                                                                             // malloc_usable_size
    2322//#include <math.h>                                                                             // fabsf, fabs, fabsl
    2423#include <complex.h>                                                                    // _Complex_I
     
    2726//---------------------------------------
    2827
    29 forall( dtype T | sized(T) ) {
    30         T * alloc_set( T ptr[], size_t dim, char fill ) {       // realloc array with fill
    31                 size_t olen = malloc_usable_size( ptr );                // current allocation
    32                 void * nptr = (void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
    33                 size_t nlen = malloc_usable_size( nptr );               // new allocation
    34                 if ( nlen > olen ) {                                                    // larger ?
    35                         memset( (char *)nptr + olen, (int)fill, nlen - olen ); // initialize added storage
    36                 } // if
    37                 return (T *)nptr;
    38         } // alloc_set
    39 
    40         T * alloc_align_set( T ptr[], size_t align, char fill ) { // aligned realloc with fill
    41                 size_t olen = malloc_usable_size( ptr );                // current allocation
    42                 void * nptr = (void *)realloc( (void *)ptr, align, sizeof(T) ); // CFA realloc
    43                 // char * nptr = alloc_align( ptr, align );
    44                 size_t nlen = malloc_usable_size( nptr );               // new allocation
    45                 if ( nlen > olen ) {                                                    // larger ?
    46                         memset( (char *)nptr + olen, (int)fill, nlen - olen ); // initialize added storage
    47                 } // if
    48                 return (T *)nptr;
    49         } // alloc_align_set
    50 } // distribution
    51 
    52 // allocation/deallocation and constructor/destructor, non-array types
    53 forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } )
    54 T * new( Params p ) {
    55         return &(*malloc()){ p };                                                       // run constructor
    56 } // new
    57 
    58 forall( dtype T | sized(T) | { void ^?{}( T & ); } )
    59 void delete( T * ptr ) {
    60         if ( ptr ) {                                                                            // ignore null
    61                 ^(*ptr){};                                                                              // run destructor
    62                 free( ptr );
    63         } // if
    64 } // delete
    65 
    66 forall( dtype T, ttype Params | sized(T) | { void ^?{}( T & ); void delete( Params ); } )
    67 void delete( T * ptr, Params rest ) {
    68         if ( ptr ) {                                                                            // ignore null
    69                 ^(*ptr){};                                                                              // run destructor
    70                 free( ptr );
    71         } // if
    72         delete( rest );
    73 } // delete
    74 
    75 
    76 // allocation/deallocation and constructor/destructor, array types
    77 forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } )
    78 T * anew( size_t dim, Params p ) {
     28// Cforall allocation/deallocation and constructor/destructor, array types
     29
     30forall( dtype T | sized(T), ttype TT | { void ?{}( T &, TT ); } )
     31T * anew( size_t dim, TT p ) {
    7932        T * arr = alloc( dim );
    8033        for ( unsigned int i = 0; i < dim; i += 1 ) {
     
    8538
    8639forall( dtype T | sized(T) | { void ^?{}( T & ); } )
    87 void adelete( size_t dim, T arr[] ) {
     40void adelete( T arr[] ) {
    8841        if ( arr ) {                                                                            // ignore null
     42                size_t dim = malloc_size( arr ) / sizeof( T );
    8943                for ( int i = dim - 1; i >= 0; i -= 1 ) {               // reverse allocation order, must be unsigned
    9044                        ^(arr[i]){};                                                            // run destructor
     
    9448} // adelete
    9549
    96 forall( dtype T | sized(T) | { void ^?{}( T & ); }, ttype Params | { void adelete( Params ); } )
    97 void adelete( size_t dim, T arr[], Params rest ) {
     50forall( dtype T | sized(T) | { void ^?{}( T & ); }, ttype TT | { void adelete( TT ); } )
     51void adelete( T arr[], TT rest ) {
    9852        if ( arr ) {                                                                            // ignore null
     53                size_t dim = malloc_size( arr ) / sizeof( T );
    9954                for ( int i = dim - 1; i >= 0; i -= 1 ) {               // reverse allocation order, must be unsigned
    10055                        ^(arr[i]){};                                                            // run destructor
     
    255210extern "C" {                                                                                    // override C version
    256211        void srandom( unsigned int seed ) { srand48( (long int)seed ); }
    257         long int random( void ) { return mrand48(); }
     212        long int random( void ) { return mrand48(); }           // GENERATES POSITIVE AND NEGATIVE VALUES
    258213} // extern "C"
    259214
  • libcfa/src/stdlib.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Thu Jan 28 17:12:35 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 08:27:01 2020
    13 // Update Count     : 401
     12// Last Modified On : Sat Dec 12 13:52:34 2020
     13// Update Count     : 536
    1414//
    1515
    1616#pragma once
    1717
    18 #include "bits/defs.hfa"
    19 #include "bits/align.hfa"
     18#include "bits/defs.hfa"                                                                // OPTIONAL_THREAD
     19#include "bits/align.hfa"                                                               // libAlign
    2020
    2121#include <stdlib.h>                                                                             // *alloc, strto*, ato*
    22 
     22#include <heap.hfa>
     23
     24// Reduce includes by explicitly defining these routines.
    2325extern "C" {
    24         void * memalign( size_t align, size_t size );           // malloc.h
     26        void * memalign( size_t alignment, size_t size );       // malloc.h
     27        void * pvalloc( size_t size );                                          // malloc.h
    2528        void * memset( void * dest, int fill, size_t size ); // string.h
    2629        void * memcpy( void * dest, const void * src, size_t size ); // string.h
    27     void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); // CFA heap
    2830} // extern "C"
    29 
    30 void * realloc( void * oaddr, size_t nalign, size_t size ); // CFA heap
    3131
    3232//---------------------------------------
     
    3939//---------------------------------------
    4040
     41#include "common.hfa"
     42
     43//---------------------------------------
     44
     45// Macro because of returns
     46#define $ARRAY_ALLOC( allocation, alignment, dim ) \
     47        if ( _Alignof(T) <= libAlign() ) return (T *)(void *)allocation( dim, (size_t)sizeof(T) ); /* C allocation */ \
     48        else return (T *)alignment( _Alignof(T), dim, sizeof(T) )
     49
    4150static inline forall( dtype T | sized(T) ) {
    42         // C dynamic allocation
     51        // CFA safe equivalents, i.e., implicit size specification
    4352
    4453        T * malloc( void ) {
    45                 if ( _Alignof(T) <= libAlign() ) return (T *)(void *)malloc( (size_t)sizeof(T) ); // C malloc
     54                if ( _Alignof(T) <= libAlign() ) return (T *)(void *)malloc( (size_t)sizeof(T) ); // C allocation
    4655                else return (T *)memalign( _Alignof(T), sizeof(T) );
    4756        } // malloc
    4857
     58        T * aalloc( size_t dim ) {
     59                $ARRAY_ALLOC( aalloc, amemalign, dim );
     60        } // aalloc
     61
    4962        T * calloc( size_t dim ) {
    50                 if ( _Alignof(T) <= libAlign() )return (T *)(void *)calloc( dim, sizeof(T) ); // C calloc
    51                 else return (T *)cmemalign( _Alignof(T), dim, sizeof(T) );
     63                $ARRAY_ALLOC( calloc, cmemalign, dim );
    5264        } // calloc
    5365
     66        T * resize( T * ptr, size_t size ) {                            // CFA resize, eliminate return-type cast
     67                if ( _Alignof(T) <= libAlign() ) return (T *)(void *)resize( (void *)ptr, size ); // CFA resize
     68                else return (T *)(void *)resize( (void *)ptr, _Alignof(T), size ); // CFA resize
     69        } // resize
     70
    5471        T * realloc( T * ptr, size_t size ) {                           // CFA realloc, eliminate return-type cast
    55                 return (T *)(void *)realloc( (void *)ptr, size ); // C realloc
     72                if ( _Alignof(T) <= libAlign() ) return (T *)(void *)realloc( (void *)ptr, size ); // C realloc
     73                else return (T *)(void *)realloc( (void *)ptr, _Alignof(T), size ); // CFA realloc
    5674        } // realloc
    5775
     
    6078        } // memalign
    6179
     80        T * amemalign( size_t align, size_t dim ) {
     81                return (T *)amemalign( align, dim, sizeof(T) ); // CFA amemalign
     82        } // amemalign
     83
    6284        T * cmemalign( size_t align, size_t dim  ) {
    6385                return (T *)cmemalign( align, dim, sizeof(T) ); // CFA cmemalign
     
    7294        } // posix_memalign
    7395
    74         // Cforall dynamic allocation
    75 
    76         T * alloc( void ) {
    77                 return malloc();
    78         } // alloc
    79 
    80         T * alloc( size_t dim ) {
    81                 if ( _Alignof(T) <= libAlign() ) return (T *)(void *)malloc( dim * (size_t)sizeof(T) );
    82                 else return (T *)memalign( _Alignof(T), dim * sizeof(T) );
    83         } // alloc
    84 
    85         T * alloc( T ptr[], size_t dim ) {                                      // realloc
    86                 return (T *)(void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
    87         } // alloc
    88 
    89         T * alloc_set( char fill ) {
    90                 return (T *)memset( (T *)alloc(), (int)fill, sizeof(T) ); // initialize with fill value
    91         } // alloc
    92 
    93         T * alloc_set( T fill ) {
    94                 return (T *)memcpy( (T *)alloc(), &fill, sizeof(T) ); // initialize with fill value
    95         } // alloc
    96 
    97         T * alloc_set( size_t dim, char fill ) {
    98                 return (T *)memset( (T *)alloc( dim ), (int)fill, dim * sizeof(T) ); // initialize with fill value
    99         } // alloc
    100 
    101         T * alloc_set( size_t dim, T fill ) {
    102                 T * r = (T *)alloc( dim );
    103                 for ( i; dim ) { memcpy( &r[i], &fill, sizeof(T) ); } // initialize with fill value
    104                 return r;
    105         } // alloc
    106 
    107         T * alloc_set( size_t dim, const T fill[] ) {
    108                 return (T *)memcpy( (T *)alloc( dim ), fill, dim * sizeof(T) ); // initialize with fill value
    109         } // alloc
    110 } // distribution
    111 
    112 forall( dtype T | sized(T) ) {
    113         T * alloc_set( T ptr[], size_t dim, char fill );        // realloc array with fill
    114 } // distribution
     96        T * valloc( void ) {
     97                return (T *)valloc( sizeof(T) );                                // C valloc
     98        } // valloc
     99
     100        T * pvalloc( void ) {
     101                return (T *)pvalloc( sizeof(T) );                               // C pvalloc
     102        } // pvalloc
     103} // distribution
     104
     105/*
     106        FIX ME : fix alloc interface after Ticker Number 214 is resolved, define and add union to S_fill. Then, modify postfix-fill functions to support T * with nmemb, char, and T object of any size. Finally, change alloc_internal.
     107        Or, just follow the instructions below for that.
     108
     109        1. Replace the current forall-block that contains defintions of S_fill and S_realloc with following:
     110                forall( dtype T | sized(T) ) {
     111                        union  U_fill           { char c; T * a; T t; };
     112                        struct S_fill           { char tag; U_fill(T) fill; };
     113                        struct S_realloc        { inline T *; };
     114                }
     115
     116        2. Replace all current postfix-fill functions with following for updated S_fill:
     117                S_fill(T) ?`fill( char a )                                      { S_fill(T) ret = {'c'}; ret.fill.c = a; return ret; }
     118                S_fill(T) ?`fill( T    a )                                      { S_fill(T) ret = {'t'}; memcpy(&ret.fill.t, &a, sizeof(T)); return ret; }
     119                S_fill(T) ?`fill( T    a[], size_t nmemb )      { S_fill(T) ret = {'a', nmemb}; ret.fill.a = a; return ret; }
     120
     121        3. Replace the $alloc_internal function which is outside ttype forall-block with following function:
     122                T * $alloc_internal( void * Resize, T * Realloc, size_t Align, size_t Dim, S_fill(T) Fill) {
     123                        T * ptr = NULL;
     124                        size_t size = sizeof(T);
     125                        size_t copy_end = 0;
     126
     127                        if(Resize) {
     128                                ptr = (T*) (void *) resize( (int *)Resize, Align, Dim * size );
     129                        } else if (Realloc) {
     130                                if (Fill.tag != '0') copy_end = min(malloc_size( Realloc ), Dim * size);
     131                                ptr = (T*) (void *) realloc( (int *)Realloc, Align, Dim * size );
     132                        } else {
     133                                ptr = (T*) (void *) memalign( Align, Dim * size );
     134                        }
     135
     136                        if(Fill.tag == 'c') {
     137                                memset( (char *)ptr + copy_end, (int)Fill.fill.c, Dim * size - copy_end );
     138                        } else if(Fill.tag == 't') {
     139                                for ( int i = copy_end; i <= Dim * size - size ; i += size ) {
     140                                        memcpy( (char *)ptr + i, &Fill.fill.t, size );
     141                                }
     142                        } else if(Fill.tag == 'a') {
     143                                memcpy( (char *)ptr + copy_end, Fill.fill.a, min(Dim * size - copy_end, size * Fill.nmemb) );
     144                        }
     145
     146                        return ptr;
     147                } // $alloc_internal
     148*/
     149
     150typedef struct S_align                  { inline size_t;  } T_align;
     151typedef struct S_resize                 { inline void *;  }     T_resize;
     152
     153forall( dtype T ) {
     154        struct S_fill           { char tag; char c; size_t size; T * at; char t[50]; };
     155        struct S_realloc        { inline T *; };
     156}
     157
     158static inline T_align   ?`align   ( size_t a )  { return (T_align){a}; }
     159static inline T_resize  ?`resize  ( void * a )  { return (T_resize){a}; }
    115160
    116161static inline forall( dtype T | sized(T) ) {
    117         T * alloc_align( size_t align ) {
    118                 return (T *)memalign( align, sizeof(T) );
    119         } // alloc_align
    120 
    121         T * alloc_align( size_t align, size_t dim ) {
    122                 return (T *)memalign( align, dim * sizeof(T) );
    123         } // alloc_align
    124 
    125         T * alloc_align( T ptr[], size_t align ) {                      // aligned realloc array
    126                 return (T *)(void *)realloc( (void *)ptr, align, sizeof(T) ); // CFA realloc
    127         } // alloc_align
    128 
    129         T * alloc_align( T ptr[], size_t align, size_t dim ) { // aligned realloc array
    130                 return (T *)(void *)realloc( (void *)ptr, align, dim * sizeof(T) ); // CFA realloc
    131         } // alloc_align
    132 
    133         T * alloc_align_set( size_t align, char fill ) {
    134                 return (T *)memset( (T *)alloc_align( align ), (int)fill, sizeof(T) ); // initialize with fill value
    135         } // alloc_align
    136 
    137         T * alloc_align_set( size_t align, T fill ) {
    138                 return (T *)memcpy( (T *)alloc_align( align ), &fill, sizeof(T) ); // initialize with fill value
    139         } // alloc_align
    140 
    141         T * alloc_align_set( size_t align, size_t dim, char fill ) {
    142                 return (T *)memset( (T *)alloc_align( align, dim ), (int)fill, dim * sizeof(T) ); // initialize with fill value
    143         } // alloc_align
    144 
    145         T * alloc_align_set( size_t align, size_t dim, T fill ) {
    146                 T * r = (T *)alloc_align( align, dim );
    147                 for ( i; dim ) { memcpy( &r[i], &fill, sizeof(T) ); } // initialize with fill value
    148                 return r;
    149         } // alloc_align
    150 
    151         T * alloc_align_set( size_t align, size_t dim, const T fill[] ) {
    152                 return (T *)memcpy( (T *)alloc_align( align, dim ), fill, dim * sizeof(T) );
    153         } // alloc_align
    154 } // distribution
    155 
    156 forall( dtype T | sized(T) ) {
    157         T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill ); // aligned realloc array with fill
    158 } // distribution
     162        S_fill(T) ?`fill ( T t ) {
     163                S_fill(T) ret = { 't' };
     164                size_t size = sizeof(T);
     165                if(size > sizeof(ret.t)) { printf("ERROR: const object of size greater than 50 bytes given for dynamic memory fill\n"); exit(1); }
     166                memcpy( &ret.t, &t, size );
     167                return ret;
     168        }
     169        S_fill(T)               ?`fill ( char c )                               { return (S_fill(T)){ 'c', c }; }
     170        S_fill(T)               ?`fill ( T * a )                                { return (S_fill(T)){ 'T', '0', 0, a }; }
     171        S_fill(T)               ?`fill ( T a[], size_t nmemb )  { return (S_fill(T)){ 'a', '0', nmemb * sizeof(T), a }; }
     172
     173        S_realloc(T)    ?`realloc ( T * a )                             { return (S_realloc(T)){a}; }
     174
     175        T * $alloc_internal( void * Resize, T * Realloc, size_t Align, size_t Dim, S_fill(T) Fill) {
     176                T * ptr = NULL;
     177                size_t size = sizeof(T);
     178                size_t copy_end = 0;
     179
     180                if ( Resize ) {
     181                        ptr = (T*) (void *) resize( (void *)Resize, Align, Dim * size );
     182                } else if ( Realloc ) {
     183                        if (Fill.tag != '0') copy_end = min(malloc_size( Realloc ), Dim * size);
     184                        ptr = (T*) (void *) realloc( (void *)Realloc, Align, Dim * size );
     185                } else {
     186                        ptr = (T*) (void *) memalign( Align, Dim * size );
     187                }
     188
     189                if(Fill.tag == 'c') {
     190                        memset( (char *)ptr + copy_end, (int)Fill.c, Dim * size - copy_end );
     191                } else if(Fill.tag == 't') {
     192                        for ( int i = copy_end; i < Dim * size; i += size ) {
     193                                memcpy( (char *)ptr + i, &Fill.t, size );
     194                        }
     195                } else if(Fill.tag == 'a') {
     196                        memcpy( (char *)ptr + copy_end, Fill.at, min(Dim * size - copy_end, Fill.size) );
     197                } else if(Fill.tag == 'T') {
     198                        for ( int i = copy_end; i < Dim * size; i += size ) {
     199                                memcpy( (char *)ptr + i, Fill.at, size );
     200                        }
     201                }
     202
     203                return ptr;
     204        } // $alloc_internal
     205
     206        forall( ttype TT | { T * $alloc_internal( void *, T *, size_t, size_t, S_fill(T), TT ); } ) {
     207
     208                T * $alloc_internal( void *       , T * Realloc, size_t Align, size_t Dim, S_fill(T) Fill, T_resize Resize, TT rest) {
     209                return $alloc_internal( Resize, (T*)0p, Align, Dim, Fill, rest);
     210                }
     211
     212                T * $alloc_internal( void * Resize, T *        , size_t Align, size_t Dim, S_fill(T) Fill, S_realloc(T) Realloc, TT rest) {
     213                return $alloc_internal( (void*)0p, Realloc, Align, Dim, Fill, rest);
     214                }
     215
     216                T * $alloc_internal( void * Resize, T * Realloc, size_t      , size_t Dim, S_fill(T) Fill, T_align Align, TT rest) {
     217                return $alloc_internal( Resize, Realloc, Align, Dim, Fill, rest);
     218                }
     219
     220                T * $alloc_internal( void * Resize, T * Realloc, size_t Align, size_t Dim, S_fill(T)     , S_fill(T) Fill, TT rest) {
     221                return $alloc_internal( Resize, Realloc, Align, Dim, Fill, rest);
     222                }
     223
     224            T * alloc( TT all ) {
     225                return $alloc_internal( (void*)0p, (T*)0p, (_Alignof(T) > libAlign() ? _Alignof(T) : libAlign()), (size_t)1, (S_fill(T)){'0'}, all);
     226            }
     227
     228            T * alloc( size_t dim, TT all ) {
     229                return $alloc_internal( (void*)0p, (T*)0p, (_Alignof(T) > libAlign() ? _Alignof(T) : libAlign()), dim, (S_fill(T)){'0'}, all);
     230            }
     231
     232        } // distribution TT
     233} // distribution T
    159234
    160235static inline forall( dtype T | sized(T) ) {
    161         // data, non-array types
     236        // CFA safe initialization/copy, i.e., implicit size specification, non-array types
    162237        T * memset( T * dest, char fill ) {
    163238                return (T *)memset( dest, fill, sizeof(T) );
     
    167242                return (T *)memcpy( dest, src, sizeof(T) );
    168243        } // memcpy
    169 } // distribution
    170 
    171 static inline forall( dtype T | sized(T) ) {
    172         // data, array types
     244
     245        // CFA safe initialization/copy, i.e., implicit size specification, array types
    173246        T * amemset( T dest[], char fill, size_t dim ) {
    174247                return (T *)(void *)memset( dest, fill, dim * sizeof(T) ); // C memset
     
    180253} // distribution
    181254
    182 // allocation/deallocation and constructor/destructor, non-array types
    183 forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * new( Params p );
    184 forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void delete( T * ptr );
    185 forall( dtype T, ttype Params | sized(T) | { void ^?{}( T & ); void delete( Params ); } ) void delete( T * ptr, Params rest );
    186 
    187 // allocation/deallocation and constructor/destructor, array types
    188 forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * anew( size_t dim, Params p );
    189 forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void adelete( size_t dim, T arr[] );
    190 forall( dtype T | sized(T) | { void ^?{}( T & ); }, ttype Params | { void adelete( Params ); } ) void adelete( size_t dim, T arr[], Params rest );
     255// CFA deallocation for multiple objects
     256static inline forall( dtype T )                                                 // FIX ME, problems with 0p in list
     257void free( T * ptr ) {
     258        free( (void *)ptr );                                                            // C free
     259} // free
     260static inline forall( dtype T, ttype TT | { void free( TT ); } )
     261void free( T * ptr, TT rest ) {
     262        free( ptr );
     263        free( rest );
     264} // free
     265
     266// CFA allocation/deallocation and constructor/destructor, non-array types
     267static inline forall( dtype T | sized(T), ttype TT | { void ?{}( T &, TT ); } )
     268T * new( TT p ) {
     269        return &(*(T *)malloc()){ p };                                                  // run constructor
     270} // new
     271
     272static inline forall( dtype T | { void ^?{}( T & ); } )
     273void delete( T * ptr ) {
     274        // special case for 0-sized object => always call destructor
     275        if ( ptr || sizeof(ptr) == 0 ) {                                        // ignore null but not 0-sized objects
     276                ^(*ptr){};                                                                              // run destructor
     277        } // if
     278        free( ptr );                                                                            // always call free
     279} // delete
     280static inline forall( dtype T, ttype TT | { void ^?{}( T & ); void delete( TT ); } )
     281void delete( T * ptr, TT rest ) {
     282        delete( ptr );
     283        delete( rest );
     284} // delete
     285
     286// CFA allocation/deallocation and constructor/destructor, array types
     287forall( dtype T | sized(T), ttype TT | { void ?{}( T &, TT ); } ) T * anew( size_t dim, TT p );
     288forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void adelete( T arr[] );
     289forall( dtype T | sized(T) | { void ^?{}( T & ); }, ttype TT | { void adelete( TT ); } ) void adelete( T arr[], TT rest );
    191290
    192291//---------------------------------------
     
    254353extern "C" {                                                                                    // override C version
    255354        void srandom( unsigned int seed );
    256         long int random( void );
     355        long int random( void );                                                        // GENERATES POSITIVE AND NEGATIVE VALUES
     356        // For positive values, use unsigned int, e.g., unsigned int r = random() % 100U;
    257357} // extern "C"
    258358
     
    261361        long int random( long int u ) { if ( u < 0 ) return random( u, 0 ); else return random( 0, u ); } // [0,u)
    262362        unsigned long int random( void ) { return lrand48(); }
     363        unsigned long int random( unsigned long int u ) { return lrand48() % u; } // [0,u)
    263364        unsigned long int random( unsigned long int l, unsigned long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l) + l; } // [l,u)
    264         unsigned long int random( unsigned long int u ) { return lrand48() % u; } // [0,u)
    265365
    266366        char random( void ) { return (unsigned long int)random(); }
     
    283383//---------------------------------------
    284384
    285 #include "common.hfa"
    286 
    287 //---------------------------------------
    288 
    289 extern bool threading_enabled(void) OPTIONAL_THREAD;
     385extern bool threading_enabled( void ) OPTIONAL_THREAD;
    290386
    291387// Local Variables: //
  • libcfa/src/time.hfa

    r3c64c668 r58fe85a  
    1010// Created On       : Wed Mar 14 23:18:57 2018
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 08:24:32 2020
    13 // Update Count     : 654
     12// Last Modified On : Wed Jun 17 16:13:00 2020
     13// Update Count     : 663
    1414//
    1515
     
    2020
    2121#include <time.h>                                                                               // timespec
    22 extern "C" {
    2322#include <sys/time.h>                                                                   // timeval
    24 }
    2523#include <time_t.hfa>                                                                   // Duration/Time types
    2624
     
    9189        int64_t ?`w( Duration dur ) { return dur.tn / (7LL * 24LL * 60LL * 60LL * TIMEGRAN); }
    9290
     91        double ?`dns( Duration dur ) { return dur.tn; }
     92        double ?`dus( Duration dur ) { return dur.tn / ((double)TIMEGRAN / 1_000_000.); }
     93        double ?`dms( Duration dur ) { return dur.tn / ((double)TIMEGRAN / 1_000.); }
     94        double ?`ds( Duration dur ) { return dur.tn / (double)TIMEGRAN; }
     95        double ?`dm( Duration dur ) { return dur.tn / (60. * TIMEGRAN); }
     96        double ?`dh( Duration dur ) { return dur.tn / (60. * 60. * (double)TIMEGRAN); }
     97        double ?`dd( Duration dur ) { return dur.tn / (24. * 60. * 60. * (double)TIMEGRAN); }
     98        double ?`dw( Duration dur ) { return dur.tn / (7. * 24. * 60. * 60. * (double)TIMEGRAN); }
     99
    93100        Duration max( Duration lhs, Duration rhs ) { return  (lhs.tn < rhs.tn) ? rhs : lhs;}
    94101        Duration min( Duration lhs, Duration rhs ) { return !(rhs.tn < lhs.tn) ? lhs : rhs;}
Note: See TracChangeset for help on using the changeset viewer.