Changes in / [33e1c91:929d925]
- Files:
-
- 6 added
- 60 edited
-
Jenkins/FullBuild (modified) (1 diff)
-
Jenkinsfile (modified) (2 diffs)
-
benchmark/io/io_uring.h (modified) (1 diff)
-
doc/theses/mubeen_zulfiqar_MMath/.gitignore (modified) (1 diff)
-
doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex (modified) (2 diffs)
-
doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex (modified) (1 diff)
-
libcfa/prelude/defines.hfa.in (modified) (2 diffs)
-
libcfa/src/bits/signal.hfa (modified) (1 diff)
-
libcfa/src/concurrency/coroutine.cfa (modified) (1 diff)
-
libcfa/src/concurrency/io.cfa (modified) (2 diffs)
-
libcfa/src/concurrency/io/setup.cfa (modified) (1 diff)
-
libcfa/src/concurrency/kernel.cfa (modified) (3 diffs)
-
libcfa/src/concurrency/kernel.hfa (modified) (1 diff)
-
libcfa/src/concurrency/kernel/startup.cfa (modified) (4 diffs)
-
libcfa/src/concurrency/kernel_private.hfa (modified) (4 diffs)
-
libcfa/src/concurrency/locks.cfa (modified) (1 diff)
-
libcfa/src/concurrency/monitor.cfa (modified) (1 diff)
-
libcfa/src/concurrency/mutex.cfa (modified) (1 diff)
-
libcfa/src/concurrency/preemption.cfa (modified) (1 diff)
-
libcfa/src/concurrency/ready_queue.cfa (modified) (10 diffs)
-
libcfa/src/concurrency/ready_subqueue.hfa (modified) (9 diffs)
-
libcfa/src/concurrency/thread.cfa (modified) (2 diffs)
-
libcfa/src/containers/array.hfa (modified) (3 diffs)
-
libcfa/src/device/cpu.cfa (modified) (6 diffs)
-
libcfa/src/device/cpu.hfa (modified) (2 diffs)
-
libcfa/src/exception.c (modified) (1 diff)
-
libcfa/src/stdhdr/pthread.h (added)
-
src/AST/Convert.cpp (modified) (1 diff)
-
src/AST/Decl.cpp (modified) (1 diff)
-
src/AST/Decl.hpp (modified) (1 diff)
-
src/AST/Pass.impl.hpp (modified) (5 diffs)
-
src/CodeGen/CodeGenerator.cc (modified) (1 diff)
-
src/CodeGen/CodeGenerator.h (modified) (1 diff)
-
src/Common/PassVisitor.h (modified) (3 diffs)
-
src/Common/PassVisitor.impl.h (modified) (17 diffs)
-
src/InitTweak/InitTweak.cc (modified) (2 diffs)
-
src/Parser/DeclarationNode.cc (modified) (1 diff)
-
src/Parser/ExpressionNode.cc (modified) (1 diff)
-
src/Parser/ParseNode.h (modified) (1 diff)
-
src/Parser/TypedefTable.cc (modified) (2 diffs)
-
src/Parser/lex.ll (modified) (3 diffs)
-
src/Parser/parser.yy (modified) (7 diffs)
-
src/SymTab/Indexer.cc (modified) (3 diffs)
-
src/SymTab/Indexer.h (modified) (2 diffs)
-
src/SymTab/Validate.cc (modified) (15 diffs)
-
src/SynTree/Declaration.h (modified) (1 diff)
-
src/SynTree/Expression.h (modified) (1 diff)
-
src/SynTree/Mutator.h (modified) (1 diff)
-
src/SynTree/SynTree.h (modified) (1 diff)
-
src/SynTree/TypeDecl.cc (modified) (1 diff)
-
src/SynTree/TypeExpr.cc (modified) (1 diff)
-
src/SynTree/Visitor.h (modified) (1 diff)
-
tests/.expect/gmp.arm64.txt (added)
-
tests/.expect/typedefRedef-ERR1.txt (modified) (1 diff)
-
tests/.expect/typedefRedef.txt (modified) (1 diff)
-
tests/array-container/.expect/array-md-sbscr-cases.arm64.txt (added)
-
tests/array-container/.expect/array-md-sbscr-cases.x86.txt (added)
-
tests/array-container/.expect/language-dim-mismatch.txt (added)
-
tests/array-container/array-basic.cfa (modified) (3 diffs)
-
tests/array-container/array-md-sbscr-cases.cfa (modified) (2 diffs)
-
tests/array-container/language-dim-mismatch.cfa (added)
-
tests/device/cpu.cfa (modified) (2 diffs)
-
tests/literals.cfa (modified) (2 diffs)
-
tests/math.cfa (modified) (6 diffs)
-
tests/test.py (modified) (1 diff)
-
tests/typedefRedef.cfa (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
Jenkins/FullBuild
r33e1c91 r929d925 18 18 19 19 parallel ( 20 gcc_ 8_x86_new: { trigger_build( 'gcc-8', 'x86' ) },21 gcc_ 7_x86_new: { trigger_build( 'gcc-7', 'x86' ) },22 gcc_ 6_x86_new: { trigger_build( 'gcc-6', 'x86' ) },23 gcc_ 9_x64_new: { trigger_build( 'gcc-9','x64' ) },24 gcc_ 8_x64_new: { trigger_build( 'gcc-8', 'x64' ) },25 gcc_ 7_x64_new: { trigger_build( 'gcc-7', 'x64' ) },26 gcc_ 6_x64_new: { trigger_build( 'gcc-6', 'x64' ) },27 gcc_ 5_x64_new: { trigger_build( 'gcc-5', 'x64' ) },28 clang_x64_new: { trigger_build( 'clang', 'x64' ) },20 gcc_08_x86_new: { trigger_build( 'gcc-8', 'x86' ) }, 21 gcc_07_x86_new: { trigger_build( 'gcc-7', 'x86' ) }, 22 gcc_06_x86_new: { trigger_build( 'gcc-6', 'x86' ) }, 23 gcc_10_x64_new: { trigger_build( 'gcc-10', 'x64' ) }, 24 gcc_09_x64_new: { trigger_build( 'gcc-9', 'x64' ) }, 25 gcc_08_x64_new: { trigger_build( 'gcc-8', 'x64' ) }, 26 gcc_07_x64_new: { trigger_build( 'gcc-7', 'x64' ) }, 27 gcc_06_x64_new: { trigger_build( 'gcc-6', 'x64' ) }, 28 clang_x64_new: { trigger_build( 'clang', 'x64' ) }, 29 29 ) 30 30 } -
Jenkinsfile
r33e1c91 r929d925 305 305 BuildSettings(java.util.Collections$UnmodifiableMap param, String branch) { 306 306 switch( param.Compiler ) { 307 case 'gcc-11': 308 this.Compiler = new CC_Desc('gcc-11', 'g++-11', 'gcc-11', '-flto=auto') 309 break 310 case 'gcc-10': 311 this.Compiler = new CC_Desc('gcc-10', 'g++-10', 'gcc-10', '-flto=auto') 312 break 307 313 case 'gcc-9': 308 314 this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9', '-flto=auto') … … 324 330 break 325 331 case 'clang': 326 this.Compiler = new CC_Desc('clang', 'clang++-10', 'gcc- 9', '-flto=thin -flto-jobs=0')332 this.Compiler = new CC_Desc('clang', 'clang++-10', 'gcc-10', '-flto=thin -flto-jobs=0') 327 333 break 328 334 default : -
benchmark/io/io_uring.h
r33e1c91 r929d925 1 1 extern "C" { 2 #ifndef _GNU_SOURCE /* See feature_test_macros(7) */3 #define _GNU_SOURCE /* See feature_test_macros(7) */4 #endif5 2 #include <errno.h> 6 3 #include <stdio.h> -
doc/theses/mubeen_zulfiqar_MMath/.gitignore
r33e1c91 r929d925 1 1 # Intermediate Results: 2 out/2 build/ 3 3 4 4 # Final Files: -
doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex
r33e1c91 r929d925 35 35 ==================== 36 36 37 \section Performance Matrices of Memory Allocators 37 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Performance Matrices 40 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 42 43 \section{Performance Matrices of Memory Allocators} 38 44 39 45 When it comes to memory allocators, there are no set standards of performance. Performance of a memory allocator depends highly on the usage pattern of the application. A memory allocator that is the best performer for a certain application X might be the worst for some other application which has completely different memory usage pattern compared to the application X. It is extremely difficult to make one universally best memory allocator which will outperform every other memory allocator for every usage pattern. So, there is a lack of a set of standard benchmarks that are used to evaluate a memory allocators's performance. 40 46 41 47 If we breakdown the goals of a memory allocator, there are two basic matrices on which a memory allocator's performance is evaluated. 42 43 1. Memory Overhead 44 2. Speed 45 46 /subsection Memory Overhead 47 Memory overhead is the extra memory that a memory allocator takes from OS which is not requested by the application. Ideally, an allocator should get just enough memory from OS that can fulfill application's request and should return this memory to OS as soon as applications frees it. But, allocators retain more memory compared to what application has asked for which causes memory overhead. Memory overhead can happen for various reasons. 48 49 /subsubsection Fragmentation 50 Fragmentation is one of the major reasons behind memory overhead. Fragmentation happens because of situations that are either necassary for proper functioning of the allocator such as internal memory management and book-keeping or are out of allocator's control such as application's usage pattern. 51 52 /subsubsubsection Internal Fragmentation 53 For internal book-keeping, allocators divide raw memory given by OS into chunks, blocks, or lists that can fulfill application's requested size. Allocators use memory given by OS for creating headers, footers etc. to store information about these chunks, blocks, or lists. This increases usage of memory in-addition to the memory requested by application as the allocators need to store their book-keeping information. This extra usage of memory for allocator's own book-keeping is called Internal Fragmentation. Although it cases memory overhead but this overhead is necassary for an allocator's proper funtioning. 54 48 \begin{enumerate} 49 \item 50 Memory Overhead 51 \item 52 Speed 53 \end{enumerate} 54 55 \subsection{Memory Overhead} 56 Memory overhead is the extra memory that a memory allocator takes from OS which is not requested by the application. Ideally, an allocator should get just enough memory from OS that can fulfill application's request and should return this memory to OS as soon as applications frees it. But, allocators retain more memory compared to what application has asked for which causes memory overhead. Memory overhead can happen for various reasons. 57 58 \subsubsection{Fragmentation} 59 Fragmentation is one of the major reasons behind memory overhead. Fragmentation happens because of situations that are either necassary for proper functioning of the allocator such as internal memory management and book-keeping or are out of allocator's control such as application's usage pattern. 60 61 \paragraph{Internal Fragmentation} 62 For internal book-keeping, allocators divide raw memory given by OS into chunks, blocks, or lists that can fulfill application's requested size. Allocators use memory given by OS for creating headers, footers etc. to store information about these chunks, blocks, or lists. This increases usage of memory in-addition to the memory requested by application as the allocators need to store their book-keeping information. This extra usage of memory for allocator's own book-keeping is called Internal Fragmentation. Although it cases memory overhead but this overhead is necassary for an allocator's proper funtioning. 55 63 56 64 *** FIX ME: Insert a figure of internal fragmentation with explanation 57 65 58 /subsubsubsection External Fragmentation 59 External fragmentation is the free bits of memory between or around chunks of memory that are currently in-use of the application. Segmentation in memory due to application's usage pattern causes external fragmentation. The memory which is part of external fragmentation is completely free as it is neither used by allocator's internal book-keeping nor by the application. Ideally, an allocator should return a segment of memory back to the OS as soon as application frees it. But, this is not always the case. Allocators get memory from OS in one of the two ways. 60 61 \begin{itemize} 62 \item 63 MMap: an allocator can ask OS for whole pages in mmap area. Then, the allocator segments the page internally and fulfills application's request. 64 \item 65 Heap: an allocator can ask OS for memory in heap area using system calls such as sbrk. Heap are grows downwards and shrinks upwards. 66 \begin{itemize} 67 68 If an allocator uses mmap area, it can only return extra memory back to OS if the whole page is free i.e. no chunk on the page is in-use of the application. Even if one chunk on the whole page is currently in-use of the application, the allocator has to retain the whole page. 69 70 If an allocator uses the heap area, it can only return the continous free memory at the end of the heap area that is currently in allocator's possession as heap area shrinks upwards. If there are free bits of memory in-between chunks of memory that are currently in-use of the application, the allocator can not return these free bits. 71 72 *** FIX ME: Insert a figure of above scenrio with explanation 73 74 Even if the entire heap area is free except one small chunk at the end of heap area that is being used by the application, the allocator cannot return the free heap area back to the OS as it is not a continous region at the end of heap area. 75 76 *** FIX ME: Insert a figure of above scenrio with explanation 77 78 Such scenerios cause external fragmentation but it is out of the allocator's control and depend on application's usage pattern. 79 80 /subsubsection Internal Memory Management 81 Allocators such as je-malloc (FIX ME: insert reference) pro-actively get some memory from the OS and divide it into chunks of certain sizes that can be used in-future to fulfill application's request. This causes memory overhead as these chunks are made before application's request. There is also the possibility that an application may not even request memory of these sizes during their whole life-time. 82 83 *** FIX ME: Insert a figure of above scenrio with explanation 84 85 Allocators such as rp-malloc (FIX ME: insert reference) maintain lists or blocks of sized memory segments that is freed by the application for future use. These lists are maintained without any guarantee that application will even request these sizes again. 86 87 Such tactics are usually used to gain speed as allocator will not have to get raw memory from OS and manage it at the time of application's request but they do cause memory overhead. 88 89 Fragmentation and managed sized chunks of free memory can lead to Heap Blowup as the allocator may not be able to use the fragments or sized free chunks of free memory to fulfill application's requests of other sizes. 90 91 /subsection Speed 92 When it comes to performance evaluation of any piece of software, its runtime is usually the first thing that is evaluated. The same is true for memory allocators but, in case of memory allocators, speed does not only mean the runtime of memory allocator's routines but there are other factors too. 93 94 /subsubsection Runtime Speed 95 Low runtime is the main goal of a memory allocator when it comes it proving its speed. Runtime is the time that it takes for a routine of memory allocator to complete its execution. As mentioned in (FIX ME: refernce to routines' list), there four basic routines that are used in memory allocation. Ideally, each routine of a memory allocator should be fast. Some memory allocator designs use pro-active measures (FIX ME: local refernce) to gain speed when allocating some memory to the application. Some memory allocators do memory allocation faster than memory freeing (FIX ME: graph refernce) while others show similar speed whether memory is allocated or freed. 96 97 /subsubsection Memory Access Speed 98 Runtime speed is not the only speed matrix in memory allocators. The memory that a memory allocator has allocated to the application also needs to be accessible as quick as possible. The application should be able to read/write allocated memory quickly. The allocation method of a memory allocator may introduce some delays when it comes to memory access speed, which is specially important in concurrent applications. Ideally, a memory allocator should allocate all memory on a cache-line to only one thread and no cache-line should be shared among multiple threads. If a memory allocator allocates memory to multple threads on a same cache line, then cache may get invalidated more frequesntly when two different threads running on two different processes will try to read/write the same memory region. On the other hand, if one cache-line is used by only one thread then the cache may get invalidated less frequently. This sharing of one cache-line among multiple threads is called false sharing (FIX ME: cite wasik). 99 100 /subsubsubsection Active False Sharing 101 Active false sharing is the sharing of one cache-line among multiple threads that is caused by memory allocator. It happens when two threads request memory from memory allocator and the allocator allocates memory to both of them on the same cache-line. After that, if the threads are running on different processes who have their own caches and both threads start reading/writing the allocated memory simultanously, their caches will start getting invalidated every time the other thread writes something to the memory. This will cause the application to slow down as the process has to load cache much more frequently. 102 103 *** FIX ME: Insert a figure of above scenrio with explanation 104 105 /subsubsubsection Passive False Sharing 106 Passive false sharing is the kind of false sharing which is caused by the application and not the memory allocator. The memory allocator may preservce passive false sharing in future instead of eradicating it. But, passive false sharing is initiated by the application. 107 108 /subsubsubsubsection Program Induced Passive False Sharing 109 Program induced false sharing is completely out of memory allocator's control and is purely caused by the application. When a thread in the application creates multiple objects in the dynamic area and allocator allocates memory for these objects on the same cache-line as the objects are created by the same thread. Passive false sharing will occur if this thread passes one of these objects to another thread but it retains the rest of these objects or it passes some/all of the remaining objects to some third thread(s). Now, one cache-line is shared among multiple threads but it is caused by the application and not the allocator. It is out of allocator's control and has the similar performance impact as Active False Sharing (FIX ME: cite local) if these threads, who are sharing the same cache-line, start reading/writing the given objects simultanously. 66 \paragraph{External Fragmentation} 67 External fragmentation is the free bits of memory between or around chunks of memory that are currently in-use of the application. Segmentation in memory due to application's usage pattern causes external fragmentation. The memory which is part of external fragmentation is completely free as it is neither used by allocator's internal book-keeping nor by the application. Ideally, an allocator should return a segment of memory back to the OS as soon as application frees it. But, this is not always the case. Allocators get memory from OS in one of the two ways. 68 69 \begin{itemize} 70 \item 71 MMap: an allocator can ask OS for whole pages in mmap area. Then, the allocator segments the page internally and fulfills application's request. 72 \item 73 Heap: an allocator can ask OS for memory in heap area using system calls such as sbrk. Heap are grows downwards and shrinks upwards. 74 \begin{itemize} 75 \item 76 If an allocator uses mmap area, it can only return extra memory back to OS if the whole page is free i.e. no chunk on the page is in-use of the application. Even if one chunk on the whole page is currently in-use of the application, the allocator has to retain the whole page. 77 \item 78 If an allocator uses the heap area, it can only return the continous free memory at the end of the heap area that is currently in allocator's possession as heap area shrinks upwards. If there are free bits of memory in-between chunks of memory that are currently in-use of the application, the allocator can not return these free bits. 79 80 *** FIX ME: Insert a figure of above scenrio with explanation 81 \item 82 Even if the entire heap area is free except one small chunk at the end of heap area that is being used by the application, the allocator cannot return the free heap area back to the OS as it is not a continous region at the end of heap area. 83 84 *** FIX ME: Insert a figure of above scenrio with explanation 85 86 \item 87 Such scenerios cause external fragmentation but it is out of the allocator's control and depend on application's usage pattern. 88 \end{itemize} 89 \end{itemize} 90 91 \subsubsection{Internal Memory Management} 92 Allocators such as je-malloc (FIX ME: insert reference) pro-actively get some memory from the OS and divide it into chunks of certain sizes that can be used in-future to fulfill application's request. This causes memory overhead as these chunks are made before application's request. There is also the possibility that an application may not even request memory of these sizes during their whole life-time. 93 94 *** FIX ME: Insert a figure of above scenrio with explanation 95 96 Allocators such as rp-malloc (FIX ME: insert reference) maintain lists or blocks of sized memory segments that is freed by the application for future use. These lists are maintained without any guarantee that application will even request these sizes again. 97 98 Such tactics are usually used to gain speed as allocator will not have to get raw memory from OS and manage it at the time of application's request but they do cause memory overhead. 99 100 Fragmentation and managed sized chunks of free memory can lead to Heap Blowup as the allocator may not be able to use the fragments or sized free chunks of free memory to fulfill application's requests of other sizes. 101 102 \subsection{Speed} 103 When it comes to performance evaluation of any piece of software, its runtime is usually the first thing that is evaluated. The same is true for memory allocators but, in case of memory allocators, speed does not only mean the runtime of memory allocator's routines but there are other factors too. 104 105 \subsubsection{Runtime Speed} 106 Low runtime is the main goal of a memory allocator when it comes it proving its speed. Runtime is the time that it takes for a routine of memory allocator to complete its execution. As mentioned in (FIX ME: refernce to routines' list), there four basic routines that are used in memory allocation. Ideally, each routine of a memory allocator should be fast. Some memory allocator designs use pro-active measures (FIX ME: local refernce) to gain speed when allocating some memory to the application. Some memory allocators do memory allocation faster than memory freeing (FIX ME: graph refernce) while others show similar speed whether memory is allocated or freed. 107 108 \subsubsection{Memory Access Speed} 109 Runtime speed is not the only speed matrix in memory allocators. The memory that a memory allocator has allocated to the application also needs to be accessible as quick as possible. The application should be able to read/write allocated memory quickly. The allocation method of a memory allocator may introduce some delays when it comes to memory access speed, which is specially important in concurrent applications. Ideally, a memory allocator should allocate all memory on a cache-line to only one thread and no cache-line should be shared among multiple threads. If a memory allocator allocates memory to multple threads on a same cache line, then cache may get invalidated more frequesntly when two different threads running on two different processes will try to read/write the same memory region. On the other hand, if one cache-line is used by only one thread then the cache may get invalidated less frequently. This sharing of one cache-line among multiple threads is called false sharing (FIX ME: cite wasik). 110 111 \paragraph{Active False Sharing} 112 Active false sharing is the sharing of one cache-line among multiple threads that is caused by memory allocator. It happens when two threads request memory from memory allocator and the allocator allocates memory to both of them on the same cache-line. After that, if the threads are running on different processes who have their own caches and both threads start reading/writing the allocated memory simultanously, their caches will start getting invalidated every time the other thread writes something to the memory. This will cause the application to slow down as the process has to load cache much more frequently. 113 114 *** FIX ME: Insert a figure of above scenrio with explanation 115 116 \paragraph{Passive False Sharing} 117 Passive false sharing is the kind of false sharing which is caused by the application and not the memory allocator. The memory allocator may preservce passive false sharing in future instead of eradicating it. But, passive false sharing is initiated by the application. 118 119 \subparagraph{Program Induced Passive False Sharing} 120 Program induced false sharing is completely out of memory allocator's control and is purely caused by the application. When a thread in the application creates multiple objects in the dynamic area and allocator allocates memory for these objects on the same cache-line as the objects are created by the same thread. Passive false sharing will occur if this thread passes one of these objects to another thread but it retains the rest of these objects or it passes some/all of the remaining objects to some third thread(s). Now, one cache-line is shared among multiple threads but it is caused by the application and not the allocator. It is out of allocator's control and has the similar performance impact as Active False Sharing (FIX ME: cite local) if these threads, who are sharing the same cache-line, start reading/writing the given objects simultanously. 110 121 111 122 *** FIX ME: Insert a figure of above scenrio 1 with explanation … … 113 124 *** FIX ME: Insert a figure of above scenrio 2 with explanation 114 125 115 /subsubsubsubsection Program Induced Allocator Preserved Passive False Sharing 116 Program induced allocator preserved passive false sharing is another interesting case of passive false sharing. Both the application and the allocator are partially responsible for it. It starts the same as Program Induced False Sharing (FIX ME: cite local). Once, an application thread has created multiple dynamic objects on the same cache-line and ditributed these objects among multiple threads causing sharing of one cache-line among multiple threads (Program Induced Passive False Sharing). This kind of false sharing occurs when one of these threads, which got the object on the shared cache-line, frees the passed object then re-allocates another object but the allocator returns the same object (on the shared cache-line) that this thread just freed. Although, the application caused the false sharing to happen in the frst place however, to prevent furthur false sharing, the allocator should have returned the new object on some other cache-line which is only shared by the allocating thread. When it comes to performnce impact, this passive false sharing will slow down the application just like any other kind of false sharing if the threads sharing the cache-line start reading/writing the objects simultanously. 117 118 *** FIX ME: Insert a figure of above scenrio with explanation 126 \subparagraph{Program Induced Allocator Preserved Passive False Sharing} 127 Program induced allocator preserved passive false sharing is another interesting case of passive false sharing. Both the application and the allocator are partially responsible for it. It starts the same as Program Induced False Sharing (FIX ME: cite local). Once, an application thread has created multiple dynamic objects on the same cache-line and ditributed these objects among multiple threads causing sharing of one cache-line among multiple threads (Program Induced Passive False Sharing). This kind of false sharing occurs when one of these threads, which got the object on the shared cache-line, frees the passed object then re-allocates another object but the allocator returns the same object (on the shared cache-line) that this thread just freed. Although, the application caused the false sharing to happen in the frst place however, to prevent furthur false sharing, the allocator should have returned the new object on some other cache-line which is only shared by the allocating thread. When it comes to performnce impact, this passive false sharing will slow down the application just like any other kind of false sharing if the threads sharing the cache-line start reading/writing the objects simultanously. 128 129 130 *** FIX ME: Insert a figure of above scenrio with explanation 131 132 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 133 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 134 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Micro Benchmark Suite 135 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 136 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 137 138 \section{Micro Benchmark Suite} 139 The aim of micro benchmark suite is to create a set of programs that can evaluate a memory allocator based on the performance matrices described in (FIX ME: local cite). These programs can be taken as a standard to benchmark an allocator's basic goals. These programs give details of an allocator's memory overhead and speed under a certain allocation pattern. The speed of the allocator is benchmarked in different ways. Similarly, false sharing happening in an allocator is also measured in multiple ways. These benchmarks evalute the allocator under a certain allocation pattern which is configurable and can be changed using a few knobs to benchmark observe an allocator's performance under a desired allocation pattern. 140 141 Micro Benchmark Suite benchmarks an allocator's performance by allocating dynamic objects and, then, measuring specifc matrices. The benchmark suite evaluates an allocator with a certain allocation pattern. Bnechmarks have different knobs that can be used to change allocation pattern and evaluate an allocator under desired conditions. These can be set by giving commandline arguments to the benchmark on execution. 142 143 Following is the list of avalable knobs. 144 145 *** FIX ME: Add knobs items after finalize 146 147 \subsection{Memory Benchmark} 148 Memory benchmark measures memory overhead of an allocator. It allocates a number of dynamic objects. Then, by reading /self/proc/maps, gets the total memory that the allocator has reuested from the OS. Finally, it calculates the memory head by taking the difference between the memory the allocator has requested from the OS and the memory that program has allocated. 149 *** FIX ME: Insert a figure of above benchmark with description 150 151 \subsubsection{Relevant Knobs} 152 *** FIX ME: Insert Relevant Knobs 153 154 \subsection{Speed Benchmark} 155 Speed benchmark calculates the runtime speed of an allocator's functions (FIX ME: cite allocator routines). It does by measuring the runtime of allocator routines in two different ways. 156 157 \subsubsection{Speed Time} 158 The time method does a certain amount of work by calling each routine of the allocator (FIX ME: cite allocator routines) a specific time. It calculates the total time it took to perform this workload. Then, it divides the time it took by the workload and calculates the average time taken by the allocator's routine. 159 *** FIX ME: Insert a figure of above benchmark with description 160 161 \paragraph{Relevant Knobs} 162 *** FIX ME: Insert Relevant Knobs 163 164 \subsubsection{Speed Workload} 165 The worload method uses the opposite approach. It calls the allocator's routines for a specific amount of time and measures how much work was done during that time. Then, similar to the time method, it divides the time by the workload done during that time and calculates the average time taken by the allocator's routine. 166 *** FIX ME: Insert a figure of above benchmark with description 167 168 \paragraph{Relevant Knobs} 169 *** FIX ME: Insert Relevant Knobs 170 171 \subsection{Cache Scratch} 172 Cache Scratch benchmark measures program induced allocator preserved passive false sharing (FIX ME CITE) in an allocator. It does so in two ways. 173 174 \subsubsection{Cache Scratch Time} 175 Cache Scratch Time allocates dynamic objects. Then, it benchmarks program induced allocator preserved passive false sharing (FIX ME CITE) in an allocator by measuring the time it takes to read/write these objects. 176 *** FIX ME: Insert a figure of above benchmark with description 177 178 \paragraph{Relevant Knobs} 179 *** FIX ME: Insert Relevant Knobs 180 181 \subsubsection{Cache Scratch Layout} 182 Cache Scratch Layout also allocates dynamic objects. Then, it benchmarks program induced allocator preserved passive false sharing (FIX ME CITE) by using heap addresses returned by the allocator. It calculates how many objects were allocated to different threads on the same cache line. 183 *** FIX ME: Insert a figure of above benchmark with description 184 185 \paragraph{Relevant Knobs} 186 *** FIX ME: Insert Relevant Knobs 187 188 \subsection{Cache Thrash} 189 Cache Thrash benchmark measures allocator induced passive false sharing (FIX ME CITE) in an allocator. It also does so in two ways. 190 191 \subsubsection{Cache Thrash Time} 192 Cache Thrash Time allocates dynamic objects. Then, it benchmarks allocator induced false sharing (FIX ME CITE) in an allocator by measuring the time it takes to read/write these objects. 193 *** FIX ME: Insert a figure of above benchmark with description 194 195 \paragraph{Relevant Knobs} 196 *** FIX ME: Insert Relevant Knobs 197 198 \subsubsection{Cache Thrash Layout} 199 Cache Thrash Layout also allocates dynamic objects. Then, it benchmarks allocator induced false sharing (FIX ME CITE) by using heap addresses returned by the allocator. It calculates how many objects were allocated to different threads on the same cache line. 200 *** FIX ME: Insert a figure of above benchmark with description 201 202 \paragraph{Relevant Knobs} 203 *** FIX ME: Insert Relevant Knobs 204 205 \section{Results} 206 *** FIX ME: add configuration details of memory allocators 207 208 \subsection{Memory Benchmark} 209 210 \subsubsection{Relevant Knobs} 211 212 \subsection{Speed Benchmark} 213 214 \subsubsection{Speed Time} 215 216 \paragraph{Relevant Knobs} 217 218 \subsubsection{Speed Workload} 219 220 \paragraph{Relevant Knobs} 221 222 \subsection{Cache Scratch} 223 224 \subsubsection{Cache Scratch Time} 225 226 \paragraph{Relevant Knobs} 227 228 \subsubsection{Cache Scratch Layout} 229 230 \paragraph{Relevant Knobs} 231 232 \subsection{Cache Thrash} 233 234 \subsubsection{Cache Thrash Time} 235 236 \paragraph{Relevant Knobs} 237 238 \subsubsection{Cache Thrash Layout} 239 240 \paragraph{Relevant Knobs} -
doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex
r33e1c91 r929d925 165 165 % cfa macros used in the document 166 166 \input{common} 167 %\usepackageinput{common} 167 168 \CFAStyle % CFA code-style for all languages 168 \lstset{ language=CFA,basicstyle=\linespread{0.9}\tt} % CFA default language169 \lstset{basicstyle=\linespread{0.9}\tt} % CFA typewriter font 169 170 \newcommand{\PAB}[1]{{\color{red}PAB: #1}} 170 171 -
libcfa/prelude/defines.hfa.in
r33e1c91 r929d925 171 171 #undef CFA_HAVE_LINUX_IO_URING_H 172 172 173 /* Defined if librseq support is present when compiling libcfathread. */ 174 #undef CFA_HAVE_LINUX_LIBRSEQ 175 176 /* Defined if rseq support is present when compiling libcfathread. */ 177 #undef CFA_HAVE_LINUX_RSEQ_H 178 173 179 /* Defined if openat2 support is present when compiling libcfathread. */ 174 180 #undef CFA_HAVE_OPENAT2 … … 205 211 #undef HAVE_LINUX_IO_URING_H 206 212 213 /* Define to 1 if you have the <linux/rseq.h> header file. */ 214 #undef HAVE_LINUX_RSEQ_H 215 207 216 /* Define to 1 if you have the <memory.h> header file. */ 208 217 #undef HAVE_MEMORY_H -
libcfa/src/bits/signal.hfa
r33e1c91 r929d925 20 20 21 21 #include <errno.h> 22 #define __USE_GNU23 22 #include <signal.h> 24 #undef __USE_GNU25 23 #include <stdlib.h> 26 24 #include <string.h> -
libcfa/src/concurrency/coroutine.cfa
r33e1c91 r929d925 15 15 16 16 #define __cforall_thread__ 17 #define _GNU_SOURCE 17 18 18 19 #include "coroutine.hfa" -
libcfa/src/concurrency/io.cfa
r33e1c91 r929d925 15 15 16 16 #define __cforall_thread__ 17 #define _GNU_SOURCE 17 18 18 19 #if defined(__CFA_DEBUG__) … … 23 24 24 25 #if defined(CFA_HAVE_LINUX_IO_URING_H) 25 #define _GNU_SOURCE /* See feature_test_macros(7) */26 26 #include <errno.h> 27 27 #include <signal.h> -
libcfa/src/concurrency/io/setup.cfa
r33e1c91 r929d925 15 15 16 16 #define __cforall_thread__ 17 #define _GNU_SOURCE /* See feature_test_macros(7) */17 #define _GNU_SOURCE 18 18 19 19 #if defined(__CFA_DEBUG__) -
libcfa/src/concurrency/kernel.cfa
r33e1c91 r929d925 15 15 16 16 #define __cforall_thread__ 17 #define _GNU_SOURCE 18 17 19 // #define __CFA_DEBUG_PRINT_RUNTIME_CORE__ 18 20 … … 278 280 279 281 // Spin a little on I/O, just in case 280 for(5) {282 for(5) { 281 283 __maybe_io_drain( this ); 282 284 readyThread = pop_fast( this->cltr ); … … 285 287 286 288 // no luck, try stealing a few times 287 for(5) {289 for(5) { 288 290 if( __maybe_io_drain( this ) ) { 289 291 readyThread = pop_fast( this->cltr ); -
libcfa/src/concurrency/kernel.hfa
r33e1c91 r929d925 66 66 unsigned id; 67 67 unsigned target; 68 unsigned last; 68 69 unsigned long long int cutoff; 69 70 } rdq; -
libcfa/src/concurrency/kernel/startup.cfa
r33e1c91 r929d925 15 15 16 16 #define __cforall_thread__ 17 #define _GNU_SOURCE 17 18 18 19 // C Includes 19 20 #include <errno.h> // errno 21 #include <signal.h> 20 22 #include <string.h> // strerror 21 23 #include <unistd.h> // sysconf 24 22 25 extern "C" { 23 26 #include <limits.h> // PTHREAD_STACK_MIN 27 #include <unistd.h> // syscall 24 28 #include <sys/eventfd.h> // eventfd 25 29 #include <sys/mman.h> // mprotect … … 136 140 }; 137 141 142 #if defined(CFA_HAVE_LINUX_LIBRSEQ) 143 // No data needed 144 #elif defined(CFA_HAVE_LINUX_RSEQ_H) 145 extern "Cforall" { 146 __attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq @= { 147 .cpu_id : RSEQ_CPU_ID_UNINITIALIZED, 148 }; 149 } 150 #else 151 // No data needed 152 #endif 153 138 154 //----------------------------------------------------------------------------- 139 155 // Struct to steal stack … … 468 484 self_mon_p = &self_mon; 469 485 link.next = 0p; 470 link.ts = 0;486 link.ts = -1llu; 471 487 preferred = -1u; 472 488 last_proc = 0p; … … 497 513 this.rdq.id = -1u; 498 514 this.rdq.target = -1u; 515 this.rdq.last = -1u; 499 516 this.rdq.cutoff = 0ull; 500 517 do_terminate = false; -
libcfa/src/concurrency/kernel_private.hfa
r33e1c91 r929d925 16 16 #pragma once 17 17 18 #if !defined(__cforall_thread__) 19 #error kernel_private.hfa should only be included in libcfathread source 20 #endif 21 18 22 #include "kernel.hfa" 19 23 #include "thread.hfa" … … 22 26 #include "stats.hfa" 23 27 28 extern "C" { 29 #if defined(CFA_HAVE_LINUX_LIBRSEQ) 30 #include <rseq/rseq.h> 31 #elif defined(CFA_HAVE_LINUX_RSEQ_H) 32 #include <linux/rseq.h> 33 #else 34 #ifndef _GNU_SOURCE 35 #error kernel_private requires gnu_source 36 #endif 37 #include <sched.h> 38 #endif 39 } 40 24 41 //----------------------------------------------------------------------------- 25 42 // Scheduler 26 27 28 43 extern "C" { 29 44 void disable_interrupts() OPTIONAL_THREAD; … … 39 54 40 55 //----------------------------------------------------------------------------- 56 // Hardware 57 58 #if defined(CFA_HAVE_LINUX_LIBRSEQ) 59 // No data needed 60 #elif defined(CFA_HAVE_LINUX_RSEQ_H) 61 extern "Cforall" { 62 extern __attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq; 63 } 64 #else 65 // No data needed 66 #endif 67 68 static inline int __kernel_getcpu() { 69 /* paranoid */ verify( ! __preemption_enabled() ); 70 #if defined(CFA_HAVE_LINUX_LIBRSEQ) 71 return rseq_current_cpu(); 72 #elif defined(CFA_HAVE_LINUX_RSEQ_H) 73 int r = __cfaabi_rseq.cpu_id; 74 /* paranoid */ verify( r >= 0 ); 75 return r; 76 #else 77 return sched_getcpu(); 78 #endif 79 } 80 81 //----------------------------------------------------------------------------- 41 82 // Processor 42 83 void main(processorCtx_t *); … … 44 85 void * __create_pthread( pthread_t *, void * (*)(void *), void * ); 45 86 void __destroy_pthread( pthread_t pthread, void * stack, void ** retval ); 46 47 48 87 49 88 extern cluster * mainCluster; -
libcfa/src/concurrency/locks.cfa
r33e1c91 r929d925 16 16 17 17 #define __cforall_thread__ 18 #define _GNU_SOURCE 18 19 19 20 #include "locks.hfa" -
libcfa/src/concurrency/monitor.cfa
r33e1c91 r929d925 15 15 16 16 #define __cforall_thread__ 17 #define _GNU_SOURCE 17 18 18 19 #include "monitor.hfa" -
libcfa/src/concurrency/mutex.cfa
r33e1c91 r929d925 17 17 18 18 #define __cforall_thread__ 19 #define _GNU_SOURCE 19 20 20 21 #include "mutex.hfa" -
libcfa/src/concurrency/preemption.cfa
r33e1c91 r929d925 15 15 16 16 #define __cforall_thread__ 17 #define _GNU_SOURCE 18 17 19 // #define __CFA_DEBUG_PRINT_PREEMPTION__ 18 20 -
libcfa/src/concurrency/ready_queue.cfa
r33e1c91 r929d925 15 15 16 16 #define __cforall_thread__ 17 #define _GNU_SOURCE 18 17 19 // #define __CFA_DEBUG_PRINT_READY_QUEUE__ 18 20 19 21 20 #define USE_RELAXED_FIFO22 // #define USE_RELAXED_FIFO 21 23 // #define USE_WORK_STEALING 24 #define USE_CPU_WORK_STEALING 22 25 23 26 #include "bits/defs.hfa" 27 #include "device/cpu.hfa" 24 28 #include "kernel_private.hfa" 25 29 26 #define _GNU_SOURCE27 30 #include "stdlib.hfa" 28 31 #include "math.hfa" 29 32 33 #include <errno.h> 30 34 #include <unistd.h> 35 36 extern "C" { 37 #include <sys/syscall.h> // __NR_xxx 38 } 31 39 32 40 #include "ready_subqueue.hfa" … … 46 54 #endif 47 55 48 #if defined(USE_RELAXED_FIFO) 56 #if defined(USE_CPU_WORK_STEALING) 57 #define READYQ_SHARD_FACTOR 2 58 #elif defined(USE_RELAXED_FIFO) 49 59 #define BIAS 4 50 60 #define READYQ_SHARD_FACTOR 4 … … 85 95 } 86 96 97 #if defined(CFA_HAVE_LINUX_LIBRSEQ) 98 // No forward declaration needed 99 #define __kernel_rseq_register rseq_register_current_thread 100 #define __kernel_rseq_unregister rseq_unregister_current_thread 101 #elif defined(CFA_HAVE_LINUX_RSEQ_H) 102 void __kernel_raw_rseq_register (void); 103 void __kernel_raw_rseq_unregister(void); 104 105 #define __kernel_rseq_register __kernel_raw_rseq_register 106 #define __kernel_rseq_unregister __kernel_raw_rseq_unregister 107 #else 108 // No forward declaration needed 109 // No initialization needed 110 static inline void noop(void) {} 111 112 #define __kernel_rseq_register noop 113 #define __kernel_rseq_unregister noop 114 #endif 115 87 116 //======================================================================= 88 117 // Cluster wide reader-writer lock … … 107 136 // Lock-Free registering/unregistering of threads 108 137 unsigned register_proc_id( void ) with(*__scheduler_lock) { 138 __kernel_rseq_register(); 139 109 140 __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc); 110 141 bool * handle = (bool *)&kernelTLS().sched_lock; … … 161 192 162 193 __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc); 194 195 __kernel_rseq_unregister(); 163 196 } 164 197 … … 214 247 //======================================================================= 215 248 void ?{}(__ready_queue_t & this) with (this) { 216 lanes.data = 0p; 217 lanes.tscs = 0p; 218 lanes.count = 0; 249 #if defined(USE_CPU_WORK_STEALING) 250 lanes.count = cpu_info.hthrd_count * READYQ_SHARD_FACTOR; 251 lanes.data = alloc( lanes.count ); 252 lanes.tscs = alloc( lanes.count ); 253 254 for( idx; (size_t)lanes.count ) { 255 (lanes.data[idx]){}; 256 lanes.tscs[idx].tv = rdtscl(); 257 } 258 #else 259 lanes.data = 0p; 260 lanes.tscs = 0p; 261 lanes.count = 0; 262 #endif 219 263 } 220 264 221 265 void ^?{}(__ready_queue_t & this) with (this) { 222 verify( SEQUENTIAL_SHARD == lanes.count ); 266 #if !defined(USE_CPU_WORK_STEALING) 267 verify( SEQUENTIAL_SHARD == lanes.count ); 268 #endif 269 223 270 free(lanes.data); 224 271 free(lanes.tscs); … … 226 273 227 274 //----------------------------------------------------------------------- 275 #if defined(USE_CPU_WORK_STEALING) 276 __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) { 277 __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr); 278 279 processor * const proc = kernelTLS().this_processor; 280 const bool external = !push_local || (!proc) || (cltr != proc->cltr); 281 282 const int cpu = __kernel_getcpu(); 283 /* paranoid */ verify(cpu >= 0); 284 /* paranoid */ verify(cpu < cpu_info.hthrd_count); 285 /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count); 286 287 const cpu_map_entry_t & map = cpu_info.llc_map[cpu]; 288 /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count); 289 /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count); 290 /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR); 291 292 const int start = map.self * READYQ_SHARD_FACTOR; 293 unsigned i; 294 do { 295 unsigned r; 296 if(unlikely(external)) { r = __tls_rand(); } 297 else { r = proc->rdq.its++; } 298 i = start + (r % READYQ_SHARD_FACTOR); 299 // If we can't lock it retry 300 } while( !__atomic_try_acquire( &lanes.data[i].lock ) ); 301 302 // Actually push it 303 push(lanes.data[i], thrd); 304 305 // Unlock and return 306 __atomic_unlock( &lanes.data[i].lock ); 307 308 #if !defined(__CFA_NO_STATISTICS__) 309 if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED); 310 else __tls_stats()->ready.push.local.success++; 311 #endif 312 313 __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first); 314 315 } 316 317 // Pop from the ready queue from a given cluster 318 __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) { 319 /* paranoid */ verify( lanes.count > 0 ); 320 /* paranoid */ verify( kernelTLS().this_processor ); 321 322 const int cpu = __kernel_getcpu(); 323 /* paranoid */ verify(cpu >= 0); 324 /* paranoid */ verify(cpu < cpu_info.hthrd_count); 325 /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count); 326 327 const cpu_map_entry_t & map = cpu_info.llc_map[cpu]; 328 /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count); 329 /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count); 330 /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR); 331 332 processor * const proc = kernelTLS().this_processor; 333 const int start = map.self * READYQ_SHARD_FACTOR; 334 335 // Did we already have a help target 336 if(proc->rdq.target == -1u) { 337 // if We don't have a 338 unsigned long long min = ts(lanes.data[start]); 339 for(i; READYQ_SHARD_FACTOR) { 340 unsigned long long tsc = ts(lanes.data[start + i]); 341 if(tsc < min) min = tsc; 342 } 343 proc->rdq.cutoff = min; 344 345 /* paranoid */ verify(lanes.count < 65536); // The following code assumes max 65536 cores. 346 /* paranoid */ verify(map.count < 65536); // The following code assumes max 65536 cores. 347 uint64_t chaos = __tls_rand(); 348 uint64_t high_chaos = (chaos >> 32); 349 uint64_t mid_chaos = (chaos >> 16) & 0xffff; 350 uint64_t low_chaos = chaos & 0xffff; 351 352 unsigned me = map.self; 353 unsigned cpu_chaos = map.start + (mid_chaos % map.count); 354 bool global = cpu_chaos == me; 355 356 if(global) { 357 proc->rdq.target = high_chaos % lanes.count; 358 } else { 359 proc->rdq.target = (cpu_chaos * READYQ_SHARD_FACTOR) + (low_chaos % READYQ_SHARD_FACTOR); 360 /* paranoid */ verify(proc->rdq.target >= (map.start * READYQ_SHARD_FACTOR)); 361 /* paranoid */ verify(proc->rdq.target < ((map.start + map.count) * READYQ_SHARD_FACTOR)); 362 } 363 364 /* paranoid */ verify(proc->rdq.target != -1u); 365 } 366 else { 367 const unsigned long long bias = 0; //2_500_000_000; 368 const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff; 369 { 370 unsigned target = proc->rdq.target; 371 proc->rdq.target = -1u; 372 if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) { 373 $thread * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help)); 374 proc->rdq.last = target; 375 if(t) return t; 376 } 377 } 378 379 unsigned last = proc->rdq.last; 380 if(last != -1u && lanes.tscs[last].tv < cutoff && ts(lanes.data[last]) < cutoff) { 381 $thread * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help)); 382 if(t) return t; 383 } 384 else { 385 proc->rdq.last = -1u; 386 } 387 } 388 389 for(READYQ_SHARD_FACTOR) { 390 unsigned i = start + (proc->rdq.itr++ % READYQ_SHARD_FACTOR); 391 if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t; 392 } 393 394 // All lanes where empty return 0p 395 return 0p; 396 } 397 398 __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) { 399 processor * const proc = kernelTLS().this_processor; 400 unsigned last = proc->rdq.last; 401 if(last != -1u) { 402 struct $thread * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.steal)); 403 if(t) return t; 404 proc->rdq.last = -1u; 405 } 406 407 unsigned i = __tls_rand() % lanes.count; 408 return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal)); 409 } 410 __attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) { 411 return search(cltr); 412 } 413 #endif 228 414 #if defined(USE_RELAXED_FIFO) 229 415 //----------------------------------------------------------------------- … … 519 705 if(is_empty(sl)) { 520 706 assert( sl.anchor.next == 0p ); 521 assert( sl.anchor.ts == 0);707 assert( sl.anchor.ts == -1llu ); 522 708 assert( mock_head(sl) == sl.prev ); 523 709 } else { 524 710 assert( sl.anchor.next != 0p ); 525 assert( sl.anchor.ts != 0);711 assert( sl.anchor.ts != -1llu ); 526 712 assert( mock_head(sl) != sl.prev ); 527 713 } … … 573 759 lanes.tscs = alloc(lanes.count, lanes.tscs`realloc); 574 760 for(i; lanes.count) { 575 unsigned long long tsc = ts(lanes.data[i]); 576 lanes.tscs[i].tv = tsc != 0 ? tsc : rdtscl(); 761 unsigned long long tsc1 = ts(lanes.data[i]); 762 unsigned long long tsc2 = rdtscl() 763 lanes.tscs[i].tv = min(tsc1, tsc2); 577 764 } 578 765 #endif 579 766 } 580 767 581 // Grow the ready queue 582 void ready_queue_grow(struct cluster * cltr) { 583 size_t ncount; 584 int target = cltr->procs.total; 585 586 /* paranoid */ verify( ready_mutate_islocked() ); 587 __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n"); 588 589 // Make sure that everything is consistent 590 /* paranoid */ check( cltr->ready_queue ); 591 592 // grow the ready queue 593 with( cltr->ready_queue ) { 594 // Find new count 595 // Make sure we always have atleast 1 list 596 if(target >= 2) { 597 ncount = target * READYQ_SHARD_FACTOR; 598 } else { 599 ncount = SEQUENTIAL_SHARD; 600 } 601 602 // Allocate new array (uses realloc and memcpies the data) 603 lanes.data = alloc( ncount, lanes.data`realloc ); 604 605 // Fix the moved data 606 for( idx; (size_t)lanes.count ) { 607 fix(lanes.data[idx]); 608 } 609 610 // Construct new data 611 for( idx; (size_t)lanes.count ~ ncount) { 612 (lanes.data[idx]){}; 613 } 614 615 // Update original 616 lanes.count = ncount; 617 } 618 619 fix_times(cltr); 620 621 reassign_cltr_id(cltr); 622 623 // Make sure that everything is consistent 624 /* paranoid */ check( cltr->ready_queue ); 625 626 __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n"); 627 628 /* paranoid */ verify( ready_mutate_islocked() ); 629 } 630 631 // Shrink the ready queue 632 void ready_queue_shrink(struct cluster * cltr) { 633 /* paranoid */ verify( ready_mutate_islocked() ); 634 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n"); 635 636 // Make sure that everything is consistent 637 /* paranoid */ check( cltr->ready_queue ); 638 639 int target = cltr->procs.total; 640 641 with( cltr->ready_queue ) { 642 // Remember old count 643 size_t ocount = lanes.count; 644 645 // Find new count 646 // Make sure we always have atleast 1 list 647 lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD; 648 /* paranoid */ verify( ocount >= lanes.count ); 649 /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 ); 650 651 // for printing count the number of displaced threads 652 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__) 653 __attribute__((unused)) size_t displaced = 0; 654 #endif 655 656 // redistribute old data 657 for( idx; (size_t)lanes.count ~ ocount) { 658 // Lock is not strictly needed but makes checking invariants much easier 659 __attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock); 660 verify(locked); 661 662 // As long as we can pop from this lane to push the threads somewhere else in the queue 663 while(!is_empty(lanes.data[idx])) { 664 struct $thread * thrd; 665 unsigned long long _; 666 [thrd, _] = pop(lanes.data[idx]); 667 668 push(cltr, thrd, true); 669 670 // for printing count the number of displaced threads 671 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__) 672 displaced++; 673 #endif 674 } 675 676 // Unlock the lane 677 __atomic_unlock(&lanes.data[idx].lock); 678 679 // TODO print the queue statistics here 680 681 ^(lanes.data[idx]){}; 682 } 683 684 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced); 685 686 // Allocate new array (uses realloc and memcpies the data) 687 lanes.data = alloc( lanes.count, lanes.data`realloc ); 688 689 // Fix the moved data 690 for( idx; (size_t)lanes.count ) { 691 fix(lanes.data[idx]); 692 } 693 } 694 695 fix_times(cltr); 696 697 reassign_cltr_id(cltr); 698 699 // Make sure that everything is consistent 700 /* paranoid */ check( cltr->ready_queue ); 701 702 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n"); 703 /* paranoid */ verify( ready_mutate_islocked() ); 704 } 768 #if defined(USE_CPU_WORK_STEALING) 769 // ready_queue size is fixed in this case 770 void ready_queue_grow(struct cluster * cltr) {} 771 void ready_queue_shrink(struct cluster * cltr) {} 772 #else 773 // Grow the ready queue 774 void ready_queue_grow(struct cluster * cltr) { 775 size_t ncount; 776 int target = cltr->procs.total; 777 778 /* paranoid */ verify( ready_mutate_islocked() ); 779 __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n"); 780 781 // Make sure that everything is consistent 782 /* paranoid */ check( cltr->ready_queue ); 783 784 // grow the ready queue 785 with( cltr->ready_queue ) { 786 // Find new count 787 // Make sure we always have atleast 1 list 788 if(target >= 2) { 789 ncount = target * READYQ_SHARD_FACTOR; 790 } else { 791 ncount = SEQUENTIAL_SHARD; 792 } 793 794 // Allocate new array (uses realloc and memcpies the data) 795 lanes.data = alloc( ncount, lanes.data`realloc ); 796 797 // Fix the moved data 798 for( idx; (size_t)lanes.count ) { 799 fix(lanes.data[idx]); 800 } 801 802 // Construct new data 803 for( idx; (size_t)lanes.count ~ ncount) { 804 (lanes.data[idx]){}; 805 } 806 807 // Update original 808 lanes.count = ncount; 809 } 810 811 fix_times(cltr); 812 813 reassign_cltr_id(cltr); 814 815 // Make sure that everything is consistent 816 /* paranoid */ check( cltr->ready_queue ); 817 818 __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n"); 819 820 /* paranoid */ verify( ready_mutate_islocked() ); 821 } 822 823 // Shrink the ready queue 824 void ready_queue_shrink(struct cluster * cltr) { 825 /* paranoid */ verify( ready_mutate_islocked() ); 826 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n"); 827 828 // Make sure that everything is consistent 829 /* paranoid */ check( cltr->ready_queue ); 830 831 int target = cltr->procs.total; 832 833 with( cltr->ready_queue ) { 834 // Remember old count 835 size_t ocount = lanes.count; 836 837 // Find new count 838 // Make sure we always have atleast 1 list 839 lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD; 840 /* paranoid */ verify( ocount >= lanes.count ); 841 /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 ); 842 843 // for printing count the number of displaced threads 844 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__) 845 __attribute__((unused)) size_t displaced = 0; 846 #endif 847 848 // redistribute old data 849 for( idx; (size_t)lanes.count ~ ocount) { 850 // Lock is not strictly needed but makes checking invariants much easier 851 __attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock); 852 verify(locked); 853 854 // As long as we can pop from this lane to push the threads somewhere else in the queue 855 while(!is_empty(lanes.data[idx])) { 856 struct $thread * thrd; 857 unsigned long long _; 858 [thrd, _] = pop(lanes.data[idx]); 859 860 push(cltr, thrd, true); 861 862 // for printing count the number of displaced threads 863 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__) 864 displaced++; 865 #endif 866 } 867 868 // Unlock the lane 869 __atomic_unlock(&lanes.data[idx].lock); 870 871 // TODO print the queue statistics here 872 873 ^(lanes.data[idx]){}; 874 } 875 876 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced); 877 878 // Allocate new array (uses realloc and memcpies the data) 879 lanes.data = alloc( lanes.count, lanes.data`realloc ); 880 881 // Fix the moved data 882 for( idx; (size_t)lanes.count ) { 883 fix(lanes.data[idx]); 884 } 885 } 886 887 fix_times(cltr); 888 889 reassign_cltr_id(cltr); 890 891 // Make sure that everything is consistent 892 /* paranoid */ check( cltr->ready_queue ); 893 894 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n"); 895 /* paranoid */ verify( ready_mutate_islocked() ); 896 } 897 #endif 705 898 706 899 #if !defined(__CFA_NO_STATISTICS__) … … 710 903 } 711 904 #endif 905 906 907 #if defined(CFA_HAVE_LINUX_LIBRSEQ) 908 // No definition needed 909 #elif defined(CFA_HAVE_LINUX_RSEQ_H) 910 911 #if defined( __x86_64 ) || defined( __i386 ) 912 #define RSEQ_SIG 0x53053053 913 #elif defined( __ARM_ARCH ) 914 #ifdef __ARMEB__ 915 #define RSEQ_SIG 0xf3def5e7 /* udf #24035 ; 0x5de3 (ARMv6+) */ 916 #else 917 #define RSEQ_SIG 0xe7f5def3 /* udf #24035 ; 0x5de3 */ 918 #endif 919 #endif 920 921 extern void __disable_interrupts_hard(); 922 extern void __enable_interrupts_hard(); 923 924 void __kernel_raw_rseq_register (void) { 925 /* paranoid */ verify( __cfaabi_rseq.cpu_id == RSEQ_CPU_ID_UNINITIALIZED ); 926 927 // int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, (sigset_t *)0p, _NSIG / 8); 928 int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, RSEQ_SIG); 929 if(ret != 0) { 930 int e = errno; 931 switch(e) { 932 case EINVAL: abort("KERNEL ERROR: rseq register invalid argument"); 933 case ENOSYS: abort("KERNEL ERROR: rseq register no supported"); 934 case EFAULT: abort("KERNEL ERROR: rseq register with invalid argument"); 935 case EBUSY : abort("KERNEL ERROR: rseq register already registered"); 936 case EPERM : abort("KERNEL ERROR: rseq register sig argument on unregistration does not match the signature received on registration"); 937 default: abort("KERNEL ERROR: rseq register unexpected return %d", e); 938 } 939 } 940 } 941 942 void __kernel_raw_rseq_unregister(void) { 943 /* paranoid */ verify( __cfaabi_rseq.cpu_id >= 0 ); 944 945 // int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, (sigset_t *)0p, _NSIG / 8); 946 int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG); 947 if(ret != 0) { 948 int e = errno; 949 switch(e) { 950 case EINVAL: abort("KERNEL ERROR: rseq unregister invalid argument"); 951 case ENOSYS: abort("KERNEL ERROR: rseq unregister no supported"); 952 case EFAULT: abort("KERNEL ERROR: rseq unregister with invalid argument"); 953 case EBUSY : abort("KERNEL ERROR: rseq unregister already registered"); 954 case EPERM : abort("KERNEL ERROR: rseq unregister sig argument on unregistration does not match the signature received on registration"); 955 default: abort("KERNEL ERROR: rseq unregisteunexpected return %d", e); 956 } 957 } 958 } 959 #else 960 // No definition needed 961 #endif -
libcfa/src/concurrency/ready_subqueue.hfa
r33e1c91 r929d925 32 32 this.prev = mock_head(this); 33 33 this.anchor.next = 0p; 34 this.anchor.ts = 0;34 this.anchor.ts = -1llu; 35 35 #if !defined(__CFA_NO_STATISTICS__) 36 36 this.cnt = 0; … … 44 44 /* paranoid */ verify( &mock_head(this)->link.ts == &this.anchor.ts ); 45 45 /* paranoid */ verify( mock_head(this)->link.next == 0p ); 46 /* paranoid */ verify( mock_head(this)->link.ts == 0);46 /* paranoid */ verify( mock_head(this)->link.ts == -1llu ); 47 47 /* paranoid */ verify( mock_head(this) == this.prev ); 48 48 /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 ); … … 55 55 // Make sure the list is empty 56 56 /* paranoid */ verify( this.anchor.next == 0p ); 57 /* paranoid */ verify( this.anchor.ts == 0);57 /* paranoid */ verify( this.anchor.ts == -1llu ); 58 58 /* paranoid */ verify( mock_head(this) == this.prev ); 59 59 } … … 64 64 /* paranoid */ verify( this.lock ); 65 65 /* paranoid */ verify( node->link.next == 0p ); 66 /* paranoid */ verify( node->link.ts == 0);66 /* paranoid */ verify( node->link.ts == -1llu ); 67 67 /* paranoid */ verify( this.prev->link.next == 0p ); 68 /* paranoid */ verify( this.prev->link.ts == 0);68 /* paranoid */ verify( this.prev->link.ts == -1llu ); 69 69 if( this.anchor.next == 0p ) { 70 70 /* paranoid */ verify( this.anchor.next == 0p ); 71 /* paranoid */ verify( this.anchor.ts == 0 ); 71 /* paranoid */ verify( this.anchor.ts == -1llu ); 72 /* paranoid */ verify( this.anchor.ts != 0 ); 72 73 /* paranoid */ verify( this.prev == mock_head( this ) ); 73 74 } else { 74 75 /* paranoid */ verify( this.anchor.next != 0p ); 76 /* paranoid */ verify( this.anchor.ts != -1llu ); 75 77 /* paranoid */ verify( this.anchor.ts != 0 ); 76 78 /* paranoid */ verify( this.prev != mock_head( this ) ); … … 92 94 /* paranoid */ verify( this.lock ); 93 95 /* paranoid */ verify( this.anchor.next != 0p ); 96 /* paranoid */ verify( this.anchor.ts != -1llu ); 94 97 /* paranoid */ verify( this.anchor.ts != 0 ); 95 98 … … 99 102 this.anchor.next = node->link.next; 100 103 this.anchor.ts = node->link.ts; 101 bool is_empty = this.anchor. ts == 0;104 bool is_empty = this.anchor.next == 0p; 102 105 node->link.next = 0p; 103 node->link.ts = 0;106 node->link.ts = -1llu; 104 107 #if !defined(__CFA_NO_STATISTICS__) 105 108 this.cnt--; … … 110 113 111 114 /* paranoid */ verify( node->link.next == 0p ); 112 /* paranoid */ verify( node->link.ts == 0 ); 115 /* paranoid */ verify( node->link.ts == -1llu ); 116 /* paranoid */ verify( node->link.ts != 0 ); 117 /* paranoid */ verify( this.anchor.ts != 0 ); 113 118 return [node, ts]; 114 119 } … … 116 121 // Check whether or not list is empty 117 122 static inline bool is_empty(__intrusive_lane_t & this) { 118 return this.anchor. ts == 0;123 return this.anchor.next == 0p; 119 124 } 120 125 … … 122 127 static inline unsigned long long ts(__intrusive_lane_t & this) { 123 128 // Cannot verify here since it may not be locked 129 /* paranoid */ verify(this.anchor.ts != 0); 124 130 return this.anchor.ts; 125 131 } -
libcfa/src/concurrency/thread.cfa
r33e1c91 r929d925 15 15 16 16 #define __cforall_thread__ 17 #define _GNU_SOURCE 17 18 18 19 #include "thread.hfa" … … 39 40 curr_cluster = &cl; 40 41 link.next = 0p; 41 link.ts = 0;42 link.ts = -1llu; 42 43 preferred = -1u; 43 44 last_proc = 0p; -
libcfa/src/containers/array.hfa
r33e1c91 r929d925 1 1 2 2 3 // a type whose size is n 4 #define Z(n) char[n] 5 6 // the inverse of Z(-) 7 #define z(N) sizeof(N) 8 9 forall( T & ) struct tag {}; 3 forall( __CFA_tysys_id_only_X & ) struct tag {}; 10 4 #define ttag(T) ((tag(T)){}) 11 #define ztag(n) ttag( Z(n))5 #define ztag(n) ttag(n) 12 6 13 7 … … 18 12 forall( [N], S & | sized(S), Timmed &, Tbase & ) { 19 13 struct arpk { 20 S strides[ z(N)];14 S strides[N]; 21 15 }; 22 16 … … 56 50 57 51 static inline size_t ?`len( arpk(N, S, Timmed, Tbase) & a ) { 58 return z(N);52 return N; 59 53 } 60 54 61 55 // workaround #226 (and array relevance thereof demonstrated in mike102/otype-slow-ndims.cfa) 62 56 static inline void ?{}( arpk(N, S, Timmed, Tbase) & this ) { 63 void ?{}( S (&inner)[ z(N)] ) {}57 void ?{}( S (&inner)[N] ) {} 64 58 ?{}(this.strides); 65 59 } 66 60 static inline void ^?{}( arpk(N, S, Timmed, Tbase) & this ) { 67 void ^?{}( S (&inner)[ z(N)] ) {}61 void ^?{}( S (&inner)[N] ) {} 68 62 ^?{}(this.strides); 69 63 } -
libcfa/src/device/cpu.cfa
r33e1c91 r929d925 253 253 } 254 254 255 #if defined(__CFA_WITH_VERIFY__)256 // Check widths are consistent257 for(i; 1~cpus) {258 for(j; cache_levels) {259 verifyf(raw[0][j].width == raw[i][j].width, "Unexpected width %u for cpu %u, index %u. Expected %u.", raw[i][j].width, i, j, raw[0][j].width);260 }261 }262 #endif263 264 255 return raw; 265 256 } 266 257 258 struct llc_map_t { 259 raw_cache_instance * raw; 260 unsigned count; 261 unsigned start; 262 }; 263 267 264 // returns an allocate list of all the different distinct last level caches 268 static [* idx_range_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {265 static [*llc_map_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) { 269 266 // Allocate at least one element 270 idx_range_t* ranges = alloc();267 llc_map_t* ranges = alloc(); 271 268 size_t range_cnt = 1; 272 269 273 270 // Initialize with element 0 274 *ranges = raw[0][llc_idx].range; 271 ranges->raw = &raw[0][llc_idx]; 272 ranges->count = 0; 273 ranges->start = -1u; 275 274 276 275 // Go over all other cpus 277 276 CPU_LOOP: for(i; 1~cpus) { 278 277 // Check if the range is already there 279 idx_range_t candidate = raw[i][llc_idx].range;278 raw_cache_instance * candidate = &raw[i][llc_idx]; 280 279 for(j; range_cnt) { 281 idx_range_texist = ranges[j];280 llc_map_t & exist = ranges[j]; 282 281 // If the range is already there just jump to the next cpu 283 if(0 == strcmp(candidate , exist)) continue CPU_LOOP;282 if(0 == strcmp(candidate->range, exist.raw->range)) continue CPU_LOOP; 284 283 } 285 284 286 285 // The range wasn't there, added to the list 287 286 ranges = alloc(range_cnt + 1, ranges`realloc); 288 ranges[range_cnt] = candidate; 287 ranges[range_cnt].raw = candidate; 288 ranges[range_cnt].count = 0; 289 ranges[range_cnt].start = -1u; 289 290 range_cnt++; 290 291 } … … 296 297 struct cpu_pairing_t { 297 298 unsigned cpu; 298 unsigned llc_id;299 unsigned id; 299 300 }; 300 301 301 302 int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) { 302 return lhs. llc_id < rhs.llc_id;303 } 304 305 static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, idx_range_t * maps, size_t map_cnt) {303 return lhs.id < rhs.id; 304 } 305 306 static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) { 306 307 cpu_pairing_t * pairings = alloc(cpus); 307 308 … … 310 311 idx_range_t want = raw[i][0].range; 311 312 MAP_LOOP: for(j; map_cnt) { 312 if(0 != strcmp(want, maps[j] )) continue MAP_LOOP;313 314 pairings[i]. llc_id = j;313 if(0 != strcmp(want, maps[j].raw->range)) continue MAP_LOOP; 314 315 pairings[i].id = j; 315 316 continue CPU_LOOP; 316 317 } … … 321 322 return pairings; 322 323 } 324 325 #include <fstream.hfa> 323 326 324 327 extern "C" { … … 345 348 346 349 // Find number of distinct cache instances 347 idx_range_t * maps;350 llc_map_t * maps; 348 351 size_t map_cnt; 349 352 [maps, map_cnt] = distinct_llcs(cpus, cache_levels - llc, raw); 350 353 351 /* paranoid */ verify((map_cnt * raw[0][cache_levels - llc].width) == cpus); 354 #if defined(__CFA_WITH_VERIFY__) 355 // Verify that the caches cover the all the cpus 356 { 357 unsigned width1 = 0; 358 unsigned width2 = 0; 359 for(i; map_cnt) { 360 const char * _; 361 width1 += read_width(maps[i].raw->range, strlen(maps[i].raw->range), &_); 362 width2 += maps[i].raw->width; 363 } 364 verify(width1 == cpus); 365 verify(width2 == cpus); 366 } 367 #endif 352 368 353 369 // Get mappings from cpu to cache instance … … 357 373 qsort(pairings, cpus); 358 374 359 unsigned llc_width = raw[0][cache_levels - llc].width; 360 361 // From the mappins build the actual cpu map we want 375 { 376 unsigned it = 0; 377 for(i; cpus) { 378 unsigned llc_id = pairings[i].id; 379 if(maps[llc_id].start == -1u) { 380 maps[llc_id].start = it; 381 it += maps[llc_id].raw->width; 382 /* paranoid */ verify(maps[llc_id].start < it); 383 /* paranoid */ verify(it != -1u); 384 } 385 } 386 /* paranoid */ verify(it == cpus); 387 } 388 389 // From the mappings build the actual cpu map we want 362 390 struct cpu_map_entry_t * entries = alloc(cpus); 363 391 for(i; cpus) { entries[i].count = 0; } 364 392 for(i; cpus) { 393 /* paranoid */ verify(pairings[i].id < map_cnt); 365 394 unsigned c = pairings[i].cpu; 366 entries[c].start = pairings[i].llc_id * llc_width; 367 entries[c].count = llc_width; 395 unsigned llc_id = pairings[i].id; 396 unsigned width = maps[llc_id].raw->width; 397 unsigned start = maps[llc_id].start; 398 unsigned self = start + (maps[llc_id].count++); 399 entries[c].count = width; 400 entries[c].start = start; 401 entries[c].self = self; 368 402 } 369 403 -
libcfa/src/device/cpu.hfa
r33e1c91 r929d925 17 17 18 18 struct cpu_map_entry_t { 19 unsigned self; 19 20 unsigned start; 20 21 unsigned count; … … 22 23 23 24 struct cpu_info_t { 25 // array of size [hthrd_count] 24 26 const cpu_map_entry_t * llc_map; 27 28 // Number of _hardware_ threads present in the system 25 29 size_t hthrd_count; 26 30 }; -
libcfa/src/exception.c
r33e1c91 r929d925 256 256 // the whole stack. 257 257 258 #if defined( __x86_64 ) || defined( __i386 ) 258 259 // We did not simply reach the end of the stack without finding a handler. This is an error. 259 260 if ( ret != _URC_END_OF_STACK ) { 261 #else // defined( __ARM_ARCH ) 262 // The return code from _Unwind_RaiseException seems to be corrupt on ARM at end of stack. 263 // This workaround tries to keep default exception handling working. 264 if ( ret == _URC_FATAL_PHASE1_ERROR || ret == _URC_FATAL_PHASE2_ERROR ) { 265 #endif 260 266 printf("UNWIND ERROR %d after raise exception\n", ret); 261 267 abort(); -
src/AST/Convert.cpp
r33e1c91 r929d925 2415 2415 } 2416 2416 2417 virtual void visit( const DimensionExpr * old ) override final { 2418 // DimensionExpr gets desugared away in Validate. 2419 // As long as new-AST passes don't use it, this cheap-cheerful error 2420 // detection helps ensure that these occurrences have been compiled 2421 // away, as expected. To move the DimensionExpr boundary downstream 2422 // or move the new-AST translation boundary upstream, implement 2423 // DimensionExpr in the new AST and implement a conversion. 2424 (void) old; 2425 assert(false && "DimensionExpr should not be present at new-AST boundary"); 2426 } 2427 2417 2428 virtual void visit( const AsmExpr * old ) override final { 2418 2429 this->node = visitBaseExpr( old, -
src/AST/Decl.cpp
r33e1c91 r929d925 78 78 79 79 const char * TypeDecl::typeString() const { 80 static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array length type" };80 static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized length value" }; 81 81 static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." ); 82 82 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); -
src/AST/Decl.hpp
r33e1c91 r929d925 175 175 class TypeDecl final : public NamedTypeDecl { 176 176 public: 177 enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS };177 enum Kind { Dtype, DStype, Otype, Ftype, Ttype, Dimension, NUMBER_OF_KINDS }; 178 178 179 179 Kind kind; -
src/AST/Pass.impl.hpp
r33e1c91 r929d925 479 479 guard_symtab guard { *this }; 480 480 // implicit add __func__ identifier as specified in the C manual 6.4.2.2 481 static ast::ptr< ast::ObjectDecl > func{ new ast::ObjectDecl{ 481 static ast::ptr< ast::ObjectDecl > func{ new ast::ObjectDecl{ 482 482 CodeLocation{}, "__func__", 483 483 new ast::ArrayType{ … … 522 522 VISIT({ 523 523 guard_symtab guard { * this }; 524 maybe_accept( node, &StructDecl::params ); 525 maybe_accept( node, &StructDecl::members ); 524 maybe_accept( node, &StructDecl::params ); 525 maybe_accept( node, &StructDecl::members ); 526 maybe_accept( node, &StructDecl::attributes ); 526 527 }) 527 528 … … 543 544 VISIT({ 544 545 guard_symtab guard { * this }; 545 maybe_accept( node, &UnionDecl::params ); 546 maybe_accept( node, &UnionDecl::members ); 546 maybe_accept( node, &UnionDecl::params ); 547 maybe_accept( node, &UnionDecl::members ); 548 maybe_accept( node, &UnionDecl::attributes ); 547 549 }) 548 550 … … 562 564 VISIT( 563 565 // unlike structs, traits, and unions, enums inject their members into the global scope 564 maybe_accept( node, &EnumDecl::params ); 565 maybe_accept( node, &EnumDecl::members ); 566 maybe_accept( node, &EnumDecl::params ); 567 maybe_accept( node, &EnumDecl::members ); 568 maybe_accept( node, &EnumDecl::attributes ); 566 569 ) 567 570 … … 577 580 VISIT({ 578 581 guard_symtab guard { *this }; 579 maybe_accept( node, &TraitDecl::params ); 580 maybe_accept( node, &TraitDecl::members ); 582 maybe_accept( node, &TraitDecl::params ); 583 maybe_accept( node, &TraitDecl::members ); 584 maybe_accept( node, &TraitDecl::attributes ); 581 585 }) 582 586 -
src/CodeGen/CodeGenerator.cc
r33e1c91 r929d925 589 589 output << nameExpr->get_name(); 590 590 } // if 591 } 592 593 void CodeGenerator::postvisit( DimensionExpr * dimensionExpr ) { 594 extension( dimensionExpr ); 595 output << "/*non-type*/" << dimensionExpr->get_name(); 591 596 } 592 597 -
src/CodeGen/CodeGenerator.h
r33e1c91 r929d925 92 92 void postvisit( TupleIndexExpr * tupleExpr ); 93 93 void postvisit( TypeExpr *typeExpr ); 94 void postvisit( DimensionExpr *dimensionExpr ); 94 95 void postvisit( AsmExpr * ); 95 96 void postvisit( StmtExpr * ); -
src/Common/PassVisitor.h
r33e1c91 r929d925 167 167 virtual void visit( TypeExpr * typeExpr ) override final; 168 168 virtual void visit( const TypeExpr * typeExpr ) override final; 169 virtual void visit( DimensionExpr * dimensionExpr ) override final; 170 virtual void visit( const DimensionExpr * dimensionExpr ) override final; 169 171 virtual void visit( AsmExpr * asmExpr ) override final; 170 172 virtual void visit( const AsmExpr * asmExpr ) override final; … … 309 311 virtual Expression * mutate( CommaExpr * commaExpr ) override final; 310 312 virtual Expression * mutate( TypeExpr * typeExpr ) override final; 313 virtual Expression * mutate( DimensionExpr * dimensionExpr ) override final; 311 314 virtual Expression * mutate( AsmExpr * asmExpr ) override final; 312 315 virtual Expression * mutate( ImplicitCopyCtorExpr * impCpCtorExpr ) override final; … … 542 545 class WithIndexer { 543 546 protected: 544 WithIndexer( ) {}547 WithIndexer( bool trackIdentifiers = true ) : indexer(trackIdentifiers) {} 545 548 ~WithIndexer() {} 546 549 -
src/Common/PassVisitor.impl.h
r33e1c91 r929d925 636 636 maybeAccept_impl( node->parameters, *this ); 637 637 maybeAccept_impl( node->members , *this ); 638 maybeAccept_impl( node->attributes, *this ); 638 639 } 639 640 … … 656 657 maybeAccept_impl( node->parameters, *this ); 657 658 maybeAccept_impl( node->members , *this ); 659 maybeAccept_impl( node->attributes, *this ); 658 660 } 659 661 … … 676 678 maybeMutate_impl( node->parameters, *this ); 677 679 maybeMutate_impl( node->members , *this ); 680 maybeMutate_impl( node->attributes, *this ); 678 681 } 679 682 … … 697 700 maybeAccept_impl( node->parameters, *this ); 698 701 maybeAccept_impl( node->members , *this ); 702 maybeAccept_impl( node->attributes, *this ); 699 703 } 700 704 … … 714 718 maybeAccept_impl( node->parameters, *this ); 715 719 maybeAccept_impl( node->members , *this ); 720 maybeAccept_impl( node->attributes, *this ); 716 721 } 717 722 … … 732 737 maybeMutate_impl( node->parameters, *this ); 733 738 maybeMutate_impl( node->members , *this ); 739 maybeMutate_impl( node->attributes, *this ); 734 740 } 735 741 … … 750 756 maybeAccept_impl( node->parameters, *this ); 751 757 maybeAccept_impl( node->members , *this ); 758 maybeAccept_impl( node->attributes, *this ); 752 759 753 760 VISIT_END( node ); … … 763 770 maybeAccept_impl( node->parameters, *this ); 764 771 maybeAccept_impl( node->members , *this ); 772 maybeAccept_impl( node->attributes, *this ); 765 773 766 774 VISIT_END( node ); … … 776 784 maybeMutate_impl( node->parameters, *this ); 777 785 maybeMutate_impl( node->members , *this ); 786 maybeMutate_impl( node->attributes, *this ); 778 787 779 788 MUTATE_END( Declaration, node ); … … 790 799 maybeAccept_impl( node->parameters, *this ); 791 800 maybeAccept_impl( node->members , *this ); 801 maybeAccept_impl( node->attributes, *this ); 792 802 } 793 803 … … 805 815 maybeAccept_impl( node->parameters, *this ); 806 816 maybeAccept_impl( node->members , *this ); 817 maybeAccept_impl( node->attributes, *this ); 807 818 } 808 819 … … 820 831 maybeMutate_impl( node->parameters, *this ); 821 832 maybeMutate_impl( node->members , *this ); 833 maybeMutate_impl( node->attributes, *this ); 822 834 } 823 835 … … 2507 2519 2508 2520 //-------------------------------------------------------------------------- 2521 // DimensionExpr 2522 template< typename pass_type > 2523 void PassVisitor< pass_type >::visit( DimensionExpr * node ) { 2524 VISIT_START( node ); 2525 2526 indexerScopedAccept( node->result, *this ); 2527 2528 VISIT_END( node ); 2529 } 2530 2531 template< typename pass_type > 2532 void PassVisitor< pass_type >::visit( const DimensionExpr * node ) { 2533 VISIT_START( node ); 2534 2535 indexerScopedAccept( node->result, *this ); 2536 2537 VISIT_END( node ); 2538 } 2539 2540 template< typename pass_type > 2541 Expression * PassVisitor< pass_type >::mutate( DimensionExpr * node ) { 2542 MUTATE_START( node ); 2543 2544 indexerScopedMutate( node->env , *this ); 2545 indexerScopedMutate( node->result, *this ); 2546 2547 MUTATE_END( Expression, node ); 2548 } 2549 2550 //-------------------------------------------------------------------------- 2509 2551 // AsmExpr 2510 2552 template< typename pass_type > … … 3145 3187 3146 3188 maybeAccept_impl( node->forall, *this ); 3147 // xxx - should PointerType visit/mutate dimension?3189 maybeAccept_impl( node->dimension, *this ); 3148 3190 maybeAccept_impl( node->base, *this ); 3149 3191 … … 3156 3198 3157 3199 maybeAccept_impl( node->forall, *this ); 3158 // xxx - should PointerType visit/mutate dimension?3200 maybeAccept_impl( node->dimension, *this ); 3159 3201 maybeAccept_impl( node->base, *this ); 3160 3202 … … 3167 3209 3168 3210 maybeMutate_impl( node->forall, *this ); 3169 // xxx - should PointerType visit/mutate dimension?3211 maybeMutate_impl( node->dimension, *this ); 3170 3212 maybeMutate_impl( node->base, *this ); 3171 3213 … … 3856 3898 3857 3899 //-------------------------------------------------------------------------- 3858 // Attribute3900 // Constant 3859 3901 template< typename pass_type > 3860 3902 void PassVisitor< pass_type >::visit( Constant * node ) { -
src/InitTweak/InitTweak.cc
r33e1c91 r929d925 10 10 // Created On : Fri May 13 11:26:36 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Dec 13 23:15:52 201913 // Update Count : 812 // Last Modified On : Wed Jun 16 20:57:22 2021 13 // Update Count : 18 14 14 // 15 15 … … 1217 1217 void addDataSectonAttribute( ObjectDecl * objDecl ) { 1218 1218 objDecl->attributes.push_back(new Attribute("section", { 1219 new ConstantExpr( Constant::from_string(".data#") ), 1220 })); 1219 new ConstantExpr( Constant::from_string(".data" 1220 #if defined( __x86_64 ) || defined( __i386 ) // assembler comment to prevent assembler warning message 1221 "#" 1222 #else // defined( __ARM_ARCH ) 1223 "//" 1224 #endif 1225 ))})); 1221 1226 } 1222 1227 1223 1228 void addDataSectionAttribute( ast::ObjectDecl * objDecl ) { 1224 1229 objDecl->attributes.push_back(new ast::Attribute("section", { 1225 ast::ConstantExpr::from_string(objDecl->location, ".data#"), 1226 })); 1230 ast::ConstantExpr::from_string(objDecl->location, ".data" 1231 #if defined( __x86_64 ) || defined( __i386 ) // assembler comment to prevent assembler warning message 1232 "#" 1233 #else // defined( __ARM_ARCH ) 1234 "//" 1235 #endif 1236 )})); 1227 1237 } 1228 1238 -
src/Parser/DeclarationNode.cc
r33e1c91 r929d925 1076 1076 if ( variable.tyClass != TypeDecl::NUMBER_OF_KINDS ) { 1077 1077 // otype is internally converted to dtype + otype parameters 1078 static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::DStype, TypeDecl::Dtype, TypeDecl::Ftype, TypeDecl::Ttype, TypeDecl::D type};1078 static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::DStype, TypeDecl::Dtype, TypeDecl::Ftype, TypeDecl::Ttype, TypeDecl::Dimension }; 1079 1079 static_assert( sizeof(kindMap) / sizeof(kindMap[0]) == TypeDecl::NUMBER_OF_KINDS, "DeclarationNode::build: kindMap is out of sync." ); 1080 1080 assertf( variable.tyClass < sizeof(kindMap)/sizeof(kindMap[0]), "Variable's tyClass is out of bounds." ); 1081 TypeDecl * ret = new TypeDecl( *name, Type::StorageClasses(), nullptr, kindMap[ variable.tyClass ], variable.tyClass == TypeDecl::Otype || variable.tyClass == TypeDecl::ALtype, variable.initializer ? variable.initializer->buildType() : nullptr );1081 TypeDecl * ret = new TypeDecl( *name, Type::StorageClasses(), nullptr, kindMap[ variable.tyClass ], variable.tyClass == TypeDecl::Otype, variable.initializer ? variable.initializer->buildType() : nullptr ); 1082 1082 buildList( variable.assertions, ret->get_assertions() ); 1083 1083 return ret; -
src/Parser/ExpressionNode.cc
r33e1c91 r929d925 509 509 } // build_varref 510 510 511 DimensionExpr * build_dimensionref( const string * name ) { 512 DimensionExpr * expr = new DimensionExpr( *name ); 513 delete name; 514 return expr; 515 } // build_varref 511 516 // TODO: get rid of this and OperKinds and reuse code from OperatorTable 512 517 static const char * OperName[] = { // must harmonize with OperKinds -
src/Parser/ParseNode.h
r33e1c91 r929d925 183 183 184 184 NameExpr * build_varref( const std::string * name ); 185 DimensionExpr * build_dimensionref( const std::string * name ); 185 186 186 187 Expression * build_cast( DeclarationNode * decl_node, ExpressionNode * expr_node ); -
src/Parser/TypedefTable.cc
r33e1c91 r929d925 10 10 // Created On : Sat May 16 15:20:13 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Mar 15 20:56:47202113 // Update Count : 26 012 // Last Modified On : Wed May 19 08:30:14 2021 13 // Update Count : 262 14 14 // 15 15 … … 31 31 switch ( kind ) { 32 32 case IDENTIFIER: return "identifier"; 33 case TYPEDIMname: return "typedim"; 33 34 case TYPEDEFname: return "typedef"; 34 35 case TYPEGENname: return "typegen"; -
src/Parser/lex.ll
r33e1c91 r929d925 10 10 * Created On : Sat Sep 22 08:58:10 2001 11 11 * Last Modified By : Peter A. Buhr 12 * Last Modified On : Thu Apr 1 13:22:31202113 * Update Count : 75 412 * Last Modified On : Sun Jun 20 18:41:09 2021 13 * Update Count : 759 14 14 */ 15 15 … … 117 117 hex_constant {hex_prefix}{hex_digits}{integer_suffix_opt} 118 118 119 // GCC: D (double) and iI (imaginary) suffixes, and DL (long double)119 // GCC: floating D (double), imaginary iI, and decimal floating DF, DD, DL 120 120 exponent "_"?[eE]"_"?[+-]?{decimal_digits} 121 121 floating_size 16|32|32x|64|64x|80|128|128x 122 122 floating_length ([fFdDlLwWqQ]|[fF]{floating_size}) 123 123 floating_suffix ({floating_length}?[iI]?)|([iI]{floating_length}) 124 floating_suffix_opt ("_"?({floating_suffix}|"DL"))? 124 decimal_floating_suffix [dD][fFdDlL] 125 floating_suffix_opt ("_"?({floating_suffix}|{decimal_floating_suffix}))? 125 126 decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal}) 126 127 floating_decimal {decimal_digits}"."{exponent}?{floating_suffix_opt} … … 234 235 continue { KEYWORD_RETURN(CONTINUE); } 235 236 coroutine { KEYWORD_RETURN(COROUTINE); } // CFA 237 _Decimal32 { KEYWORD_RETURN(DECIMAL32); } // GCC 238 _Decimal64 { KEYWORD_RETURN(DECIMAL64); } // GCC 239 _Decimal128 { KEYWORD_RETURN(DECIMAL128); } // GCC 236 240 default { KEYWORD_RETURN(DEFAULT); } 237 241 disable { KEYWORD_RETURN(DISABLE); } // CFA -
src/Parser/parser.yy
r33e1c91 r929d925 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Apr 26 18:41:54202113 // Update Count : 499012 // Last Modified On : Sun Jun 20 18:46:51 2021 13 // Update Count : 5023 14 14 // 15 15 … … 269 269 %token INT128 UINT128 uuFLOAT80 uuFLOAT128 // GCC 270 270 %token uFLOAT16 uFLOAT32 uFLOAT32X uFLOAT64 uFLOAT64X uFLOAT128 // GCC 271 %token DECIMAL32 DECIMAL64 DECIMAL128 // GCC 271 272 %token ZERO_T ONE_T // CFA 272 273 %token SIZEOF TYPEOF VALIST AUTO_TYPE // GCC … … 287 288 288 289 // names and constants: lexer differentiates between identifier and typedef names 289 %token<tok> IDENTIFIER QUOTED_IDENTIFIER TYPED EFname TYPEGENname290 %token<tok> IDENTIFIER QUOTED_IDENTIFIER TYPEDIMname TYPEDEFname TYPEGENname 290 291 %token<tok> TIMEOUT WOR CATCH RECOVER CATCHRESUME FIXUP FINALLY // CFA 291 292 %token<tok> INTEGERconstant CHARACTERconstant STRINGliteral … … 586 587 | quasi_keyword 587 588 { $$ = new ExpressionNode( build_varref( $1 ) ); } 589 | TYPEDIMname // CFA, generic length argument 590 // { $$ = new ExpressionNode( new TypeExpr( maybeMoveBuildType( DeclarationNode::newFromTypedef( $1 ) ) ) ); } 591 // { $$ = new ExpressionNode( build_varref( $1 ) ); } 592 { $$ = new ExpressionNode( build_dimensionref( $1 ) ); } 588 593 | tuple 589 594 | '(' comma_expression ')' … … 1887 1892 | uFLOAT128 1888 1893 { $$ = DeclarationNode::newBasicType( DeclarationNode::uFloat128 ); } 1894 | DECIMAL32 1895 { SemanticError( yylloc, "_Decimal32 is currently unimplemented." ); $$ = nullptr; } 1896 | DECIMAL64 1897 { SemanticError( yylloc, "_Decimal64 is currently unimplemented." ); $$ = nullptr; } 1898 | DECIMAL128 1899 { SemanticError( yylloc, "_Decimal128 is currently unimplemented." ); $$ = nullptr; } 1889 1900 | COMPLEX // C99 1890 1901 { $$ = DeclarationNode::newComplexType( DeclarationNode::Complex ); } … … 2535 2546 | '[' identifier_or_type_name ']' 2536 2547 { 2537 typedefTable.addToScope( *$2, TYPED EFname, "9" );2538 $$ = DeclarationNode::newTypeParam( TypeDecl:: ALtype, $2 );2548 typedefTable.addToScope( *$2, TYPEDIMname, "9" ); 2549 $$ = DeclarationNode::newTypeParam( TypeDecl::Dimension, $2 ); 2539 2550 } 2540 2551 // | type_specifier identifier_parameter_declarator … … 2590 2601 { $$ = new ExpressionNode( new TypeExpr( maybeMoveBuildType( $1 ) ) ); } 2591 2602 | assignment_expression 2592 { SemanticError( yylloc, toString("Expression generic parameters are currently unimplemented: ", $1->build()) ); $$ = nullptr; }2593 2603 | type_list ',' type 2594 2604 { $$ = (ExpressionNode *)($1->set_last( new ExpressionNode( new TypeExpr( maybeMoveBuildType( $3 ) ) ) )); } 2595 2605 | type_list ',' assignment_expression 2596 { SemanticError( yylloc, toString("Expression generic parameters are currently unimplemented: ", $3->build()) ); $$ = nullptr; } 2597 // { $$ = (ExpressionNode *)( $1->set_last( $3 )); } 2606 { $$ = (ExpressionNode *)( $1->set_last( $3 )); } 2598 2607 ; 2599 2608 -
src/SymTab/Indexer.cc
r33e1c91 r929d925 74 74 } 75 75 76 Indexer::Indexer( )76 Indexer::Indexer( bool trackIdentifiers ) 77 77 : idTable(), typeTable(), structTable(), enumTable(), unionTable(), traitTable(), 78 prevScope(), scope( 0 ), repScope( 0 ) { ++* stats().count; }78 prevScope(), scope( 0 ), repScope( 0 ), trackIdentifiers( trackIdentifiers ) { ++* stats().count; } 79 79 80 80 Indexer::~Indexer() { … … 110 110 111 111 void Indexer::lookupId( const std::string & id, std::list< IdData > &out ) const { 112 assert( trackIdentifiers ); 113 112 114 ++* stats().lookup_calls; 113 115 if ( ! idTable ) return; … … 434 436 const Declaration * deleteStmt ) { 435 437 ++* stats().add_calls; 438 if ( ! trackIdentifiers ) return; 436 439 const std::string &name = decl->name; 437 440 if ( name == "" ) return; -
src/SymTab/Indexer.h
r33e1c91 r929d925 31 31 class Indexer : public std::enable_shared_from_this<SymTab::Indexer> { 32 32 public: 33 explicit Indexer( );33 explicit Indexer( bool trackIdentifiers = true ); 34 34 virtual ~Indexer(); 35 35 … … 180 180 /// returns true if there exists a declaration with C linkage and the given name with a different mangled name 181 181 bool hasIncompatibleCDecl( const std::string & id, const std::string & mangleName ) const; 182 183 bool trackIdentifiers; 182 184 }; 183 185 } // namespace SymTab -
src/SymTab/Validate.cc
r33e1c91 r929d925 105 105 106 106 struct FixQualifiedTypes final : public WithIndexer { 107 FixQualifiedTypes() : WithIndexer(false) {} 107 108 Type * postmutate( QualifiedType * ); 108 109 }; … … 174 175 }; 175 176 177 /// Does early resolution on the expressions that give enumeration constants their values 178 struct ResolveEnumInitializers final : public WithIndexer, public WithGuards, public WithVisitorRef<ResolveEnumInitializers>, public WithShortCircuiting { 179 ResolveEnumInitializers( const Indexer * indexer ); 180 void postvisit( EnumDecl * enumDecl ); 181 182 private: 183 const Indexer * local_indexer; 184 185 }; 186 176 187 /// Replaces array and function types in forall lists by appropriate pointer type and assigns each Object and Function declaration a unique ID. 177 188 struct ForallPointerDecay_old final { … … 260 271 void previsit( StructInstType * inst ); 261 272 void previsit( UnionInstType * inst ); 273 }; 274 275 /// desugar declarations and uses of dimension paramaters like [N], 276 /// from type-system managed values, to tunnneling via ordinary types, 277 /// as char[-] in and sizeof(-) out 278 struct TranslateDimensionGenericParameters : public WithIndexer, public WithGuards { 279 static void translateDimensions( std::list< Declaration * > &translationUnit ); 280 TranslateDimensionGenericParameters(); 281 282 bool nextVisitedNodeIsChildOfSUIT = false; // SUIT = Struct or Union -Inst Type 283 bool visitingChildOfSUIT = false; 284 void changeState_ChildOfSUIT( bool newVal ); 285 void premutate( StructInstType * sit ); 286 void premutate( UnionInstType * uit ); 287 void premutate( BaseSyntaxNode * node ); 288 289 TypeDecl * postmutate( TypeDecl * td ); 290 Expression * postmutate( DimensionExpr * de ); 291 Expression * postmutate( Expression * e ); 262 292 }; 263 293 … … 307 337 PassVisitor<EnumAndPointerDecay_old> epc; 308 338 PassVisitor<LinkReferenceToTypes_old> lrt( nullptr ); 339 PassVisitor<ResolveEnumInitializers> rei( nullptr ); 309 340 PassVisitor<ForallPointerDecay_old> fpd; 310 341 PassVisitor<CompoundLiteral> compoundliteral; … … 326 357 Stats::Heap::newPass("validate-B"); 327 358 Stats::Time::BlockGuard guard("validate-B"); 328 Stats::Time::TimeBlock("Link Reference To Types", [&]() { 329 acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions 330 }); 331 Stats::Time::TimeBlock("Fix Qualified Types", [&]() { 332 mutateAll( translationUnit, fixQual ); // must happen after LinkReferenceToTypes_old, because aggregate members are accessed 333 }); 334 Stats::Time::TimeBlock("Hoist Structs", [&]() { 335 HoistStruct::hoistStruct( translationUnit ); // must happen after EliminateTypedef, so that aggregate typedefs occur in the correct order 336 }); 337 Stats::Time::TimeBlock("Eliminate Typedefs", [&]() { 338 EliminateTypedef::eliminateTypedef( translationUnit ); // 339 }); 359 acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions 360 mutateAll( translationUnit, fixQual ); // must happen after LinkReferenceToTypes_old, because aggregate members are accessed 361 HoistStruct::hoistStruct( translationUnit ); 362 EliminateTypedef::eliminateTypedef( translationUnit ); 340 363 } 341 364 { 342 365 Stats::Heap::newPass("validate-C"); 343 366 Stats::Time::BlockGuard guard("validate-C"); 344 acceptAll( translationUnit, genericParams ); // check as early as possible - can't happen before LinkReferenceToTypes_old 345 ReturnChecker::checkFunctionReturns( translationUnit ); 346 InitTweak::fixReturnStatements( translationUnit ); // must happen before autogen 367 Stats::Time::TimeBlock("Validate Generic Parameters", [&]() { 368 acceptAll( translationUnit, genericParams ); // check as early as possible - can't happen before LinkReferenceToTypes_old; observed failing when attempted before eliminateTypedef 369 }); 370 Stats::Time::TimeBlock("Translate Dimensions", [&]() { 371 TranslateDimensionGenericParameters::translateDimensions( translationUnit ); 372 }); 373 Stats::Time::TimeBlock("Resolve Enum Initializers", [&]() { 374 acceptAll( translationUnit, rei ); // must happen after translateDimensions because rei needs identifier lookup, which needs name mangling 375 }); 376 Stats::Time::TimeBlock("Check Function Returns", [&]() { 377 ReturnChecker::checkFunctionReturns( translationUnit ); 378 }); 379 Stats::Time::TimeBlock("Fix Return Statements", [&]() { 380 InitTweak::fixReturnStatements( translationUnit ); // must happen before autogen 381 }); 347 382 } 348 383 { … … 644 679 } 645 680 646 LinkReferenceToTypes_old::LinkReferenceToTypes_old( const Indexer * other_indexer ) {681 LinkReferenceToTypes_old::LinkReferenceToTypes_old( const Indexer * other_indexer ) : WithIndexer( false ) { 647 682 if ( other_indexer ) { 648 683 local_indexer = other_indexer; … … 664 699 } 665 700 666 void checkGenericParameters( ReferenceToType * inst ) {667 for ( Expression * param : inst->parameters ) {668 if ( ! dynamic_cast< TypeExpr * >( param ) ) {669 SemanticError( inst, "Expression parameters for generic types are currently unsupported: " );670 }671 }672 }673 674 701 void LinkReferenceToTypes_old::postvisit( StructInstType * structInst ) { 675 702 const StructDecl * st = local_indexer->lookupStruct( structInst->name ); … … 682 709 forwardStructs[ structInst->name ].push_back( structInst ); 683 710 } // if 684 checkGenericParameters( structInst );685 711 } 686 712 … … 695 721 forwardUnions[ unionInst->name ].push_back( unionInst ); 696 722 } // if 697 checkGenericParameters( unionInst );698 723 } 699 724 … … 807 832 forwardEnums.erase( fwds ); 808 833 } // if 809 810 for ( Declaration * member : enumDecl->members ) {811 ObjectDecl * field = strict_dynamic_cast<ObjectDecl *>( member );812 if ( field->init ) {813 // need to resolve enumerator initializers early so that other passes that determine if an expression is constexpr have the appropriate information.814 SingleInit * init = strict_dynamic_cast<SingleInit *>( field->init );815 ResolvExpr::findSingleExpression( init->value, new BasicType( Type::Qualifiers(), BasicType::SignedInt ), indexer );816 }817 }818 834 } // if 819 835 } … … 878 894 typeInst->set_isFtype( typeDecl->kind == TypeDecl::Ftype ); 879 895 } // if 896 } // if 897 } 898 899 ResolveEnumInitializers::ResolveEnumInitializers( const Indexer * other_indexer ) : WithIndexer( true ) { 900 if ( other_indexer ) { 901 local_indexer = other_indexer; 902 } else { 903 local_indexer = &indexer; 904 } // if 905 } 906 907 void ResolveEnumInitializers::postvisit( EnumDecl * enumDecl ) { 908 if ( enumDecl->body ) { 909 for ( Declaration * member : enumDecl->members ) { 910 ObjectDecl * field = strict_dynamic_cast<ObjectDecl *>( member ); 911 if ( field->init ) { 912 // need to resolve enumerator initializers early so that other passes that determine if an expression is constexpr have the appropriate information. 913 SingleInit * init = strict_dynamic_cast<SingleInit *>( field->init ); 914 ResolvExpr::findSingleExpression( init->value, new BasicType( Type::Qualifiers(), BasicType::SignedInt ), indexer ); 915 } 916 } 880 917 } // if 881 918 } … … 1152 1189 GuardScope( typedeclNames ); 1153 1190 mutateAll( aggr->parameters, * visitor ); 1191 mutateAll( aggr->attributes, * visitor ); 1154 1192 1155 1193 // unroll mutateAll for aggr->members so that implicit typedefs for nested types are added to the aggregate body. … … 1220 1258 } 1221 1259 } 1260 } 1261 1262 // Test for special name on a generic parameter. Special treatment for the 1263 // special name is a bootstrapping hack. In most cases, the worlds of T's 1264 // and of N's don't overlap (normal treamtemt). The foundations in 1265 // array.hfa use tagging for both types and dimensions. Tagging treats 1266 // its subject parameter even more opaquely than T&, which assumes it is 1267 // possible to have a pointer/reference to such an object. Tagging only 1268 // seeks to identify the type-system resident at compile time. Both N's 1269 // and T's can make tags. The tag definition uses the special name, which 1270 // is treated as "an N or a T." This feature is not inteded to be used 1271 // outside of the definition and immediate uses of a tag. 1272 static inline bool isReservedTysysIdOnlyName( const std::string & name ) { 1273 // name's prefix was __CFA_tysys_id_only, before it got wrapped in __..._generic 1274 int foundAt = name.find("__CFA_tysys_id_only"); 1275 if (foundAt == 0) return true; 1276 if (foundAt == 2 && name[0] == '_' && name[1] == '_') return true; 1277 return false; 1222 1278 } 1223 1279 … … 1238 1294 TypeSubstitution sub; 1239 1295 auto paramIter = params->begin(); 1240 for ( size_t i = 0; paramIter != params->end(); ++paramIter, ++i ) { 1241 if ( i < args.size() ) { 1242 TypeExpr * expr = strict_dynamic_cast< TypeExpr * >( * std::next( args.begin(), i ) ); 1243 sub.add( (* paramIter)->get_name(), expr->get_type()->clone() ); 1244 } else if ( i == args.size() ) { 1296 auto argIter = args.begin(); 1297 for ( ; paramIter != params->end(); ++paramIter, ++argIter ) { 1298 if ( argIter != args.end() ) { 1299 TypeExpr * expr = dynamic_cast< TypeExpr * >( * argIter ); 1300 if ( expr ) { 1301 sub.add( (* paramIter)->get_name(), expr->get_type()->clone() ); 1302 } 1303 } else { 1245 1304 Type * defaultType = (* paramIter)->get_init(); 1246 1305 if ( defaultType ) { 1247 1306 args.push_back( new TypeExpr( defaultType->clone() ) ); 1248 1307 sub.add( (* paramIter)->get_name(), defaultType->clone() ); 1308 argIter = std::prev(args.end()); 1309 } else { 1310 SemanticError( inst, "Too few type arguments in generic type " ); 1249 1311 } 1250 1312 } 1313 assert( argIter != args.end() ); 1314 bool typeParamDeclared = (*paramIter)->kind != TypeDecl::Kind::Dimension; 1315 bool typeArgGiven; 1316 if ( isReservedTysysIdOnlyName( (*paramIter)->name ) ) { 1317 // coerce a match when declaration is reserved name, which means "either" 1318 typeArgGiven = typeParamDeclared; 1319 } else { 1320 typeArgGiven = dynamic_cast< TypeExpr * >( * argIter ); 1321 } 1322 if ( ! typeParamDeclared && typeArgGiven ) SemanticError( inst, "Type argument given for value parameter: " ); 1323 if ( typeParamDeclared && ! typeArgGiven ) SemanticError( inst, "Expression argument given for type parameter: " ); 1251 1324 } 1252 1325 1253 1326 sub.apply( inst ); 1254 if ( args.size() < params->size() ) SemanticError( inst, "Too few type arguments in generic type " );1255 1327 if ( args.size() > params->size() ) SemanticError( inst, "Too many type arguments in generic type " ); 1256 1328 } … … 1263 1335 void ValidateGenericParameters::previsit( UnionInstType * inst ) { 1264 1336 validateGeneric( inst ); 1337 } 1338 1339 void TranslateDimensionGenericParameters::translateDimensions( std::list< Declaration * > &translationUnit ) { 1340 PassVisitor<TranslateDimensionGenericParameters> translator; 1341 mutateAll( translationUnit, translator ); 1342 } 1343 1344 TranslateDimensionGenericParameters::TranslateDimensionGenericParameters() : WithIndexer( false ) {} 1345 1346 // Declaration of type variable: forall( [N] ) -> forall( N & | sized( N ) ) 1347 TypeDecl * TranslateDimensionGenericParameters::postmutate( TypeDecl * td ) { 1348 if ( td->kind == TypeDecl::Dimension ) { 1349 td->kind = TypeDecl::Dtype; 1350 if ( ! isReservedTysysIdOnlyName( td->name ) ) { 1351 td->sized = true; 1352 } 1353 } 1354 return td; 1355 } 1356 1357 // Situational awareness: 1358 // array( float, [[currentExpr]] ) has visitingChildOfSUIT == true 1359 // array( float, [[currentExpr]] - 1 ) has visitingChildOfSUIT == false 1360 // size_t x = [[currentExpr]] has visitingChildOfSUIT == false 1361 void TranslateDimensionGenericParameters::changeState_ChildOfSUIT( bool newVal ) { 1362 GuardValue( nextVisitedNodeIsChildOfSUIT ); 1363 GuardValue( visitingChildOfSUIT ); 1364 visitingChildOfSUIT = nextVisitedNodeIsChildOfSUIT; 1365 nextVisitedNodeIsChildOfSUIT = newVal; 1366 } 1367 void TranslateDimensionGenericParameters::premutate( StructInstType * sit ) { 1368 (void) sit; 1369 changeState_ChildOfSUIT(true); 1370 } 1371 void TranslateDimensionGenericParameters::premutate( UnionInstType * uit ) { 1372 (void) uit; 1373 changeState_ChildOfSUIT(true); 1374 } 1375 void TranslateDimensionGenericParameters::premutate( BaseSyntaxNode * node ) { 1376 (void) node; 1377 changeState_ChildOfSUIT(false); 1378 } 1379 1380 // Passing values as dimension arguments: array( float, 7 ) -> array( float, char[ 7 ] ) 1381 // Consuming dimension parameters: size_t x = N - 1 ; -> size_t x = sizeof(N) - 1 ; 1382 // Intertwined reality: array( float, N ) -> array( float, N ) 1383 // array( float, N - 1 ) -> array( float, char[ sizeof(N) - 1 ] ) 1384 // Intertwined case 1 is not just an optimization. 1385 // Avoiding char[sizeof(-)] is necessary to enable the call of f to bind the value of N, in: 1386 // forall([N]) void f( array(float, N) & ); 1387 // array(float, 7) a; 1388 // f(a); 1389 1390 Expression * TranslateDimensionGenericParameters::postmutate( DimensionExpr * de ) { 1391 // Expression de is an occurrence of N in LHS of above examples. 1392 // Look up the name that de references. 1393 // If we are in a struct body, then this reference can be to an entry of the stuct's forall list. 1394 // Whether or not we are in a struct body, this reference can be to an entry of a containing function's forall list. 1395 // If we are in a struct body, then the stuct's forall declarations are innermost (functions don't occur in structs). 1396 // Thus, a potential struct's declaration is highest priority. 1397 // A struct's forall declarations are already renamed with _generic_ suffix. Try that name variant first. 1398 1399 std::string useName = "__" + de->name + "_generic_"; 1400 TypeDecl * namedParamDecl = const_cast<TypeDecl *>( strict_dynamic_cast<const TypeDecl *, nullptr >( indexer.lookupType( useName ) ) ); 1401 1402 if ( ! namedParamDecl ) { 1403 useName = de->name; 1404 namedParamDecl = const_cast<TypeDecl *>( strict_dynamic_cast<const TypeDecl *, nullptr >( indexer.lookupType( useName ) ) ); 1405 } 1406 1407 // Expect to find it always. A misspelled name would have been parsed as an identifier. 1408 assert( namedParamDecl && "Type-system-managed value name not found in symbol table" ); 1409 1410 delete de; 1411 1412 TypeInstType * refToDecl = new TypeInstType( 0, useName, namedParamDecl ); 1413 1414 if ( visitingChildOfSUIT ) { 1415 // As in postmutate( Expression * ), topmost expression needs a TypeExpr wrapper 1416 // But avoid ArrayType-Sizeof 1417 return new TypeExpr( refToDecl ); 1418 } else { 1419 // the N occurrence is being used directly as a runtime value, 1420 // if we are in a type instantiation, then the N is within a bigger value computation 1421 return new SizeofExpr( refToDecl ); 1422 } 1423 } 1424 1425 Expression * TranslateDimensionGenericParameters::postmutate( Expression * e ) { 1426 if ( visitingChildOfSUIT ) { 1427 // e is an expression used as an argument to instantiate a type 1428 if (! dynamic_cast< TypeExpr * >( e ) ) { 1429 // e is a value expression 1430 // but not a DimensionExpr, which has a distinct postmutate 1431 Type * typeExprContent = new ArrayType( 0, new BasicType( 0, BasicType::Char ), e, true, false ); 1432 TypeExpr * result = new TypeExpr( typeExprContent ); 1433 return result; 1434 } 1435 } 1436 return e; 1265 1437 } 1266 1438 -
src/SynTree/Declaration.h
r33e1c91 r929d925 201 201 typedef NamedTypeDecl Parent; 202 202 public: 203 enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS };203 enum Kind { Dtype, DStype, Otype, Ftype, Ttype, Dimension, NUMBER_OF_KINDS }; 204 204 205 205 Kind kind; -
src/SynTree/Expression.h
r33e1c91 r929d925 587 587 }; 588 588 589 /// DimensionExpr represents a type-system provided value used in an expression ( forrall([N]) ... N + 1 ) 590 class DimensionExpr : public Expression { 591 public: 592 std::string name; 593 594 DimensionExpr( std::string name ); 595 DimensionExpr( const DimensionExpr & other ); 596 virtual ~DimensionExpr(); 597 598 const std::string & get_name() const { return name; } 599 void set_name( std::string newValue ) { name = newValue; } 600 601 virtual DimensionExpr * clone() const override { return new DimensionExpr( * this ); } 602 virtual void accept( Visitor & v ) override { v.visit( this ); } 603 virtual void accept( Visitor & v ) const override { v.visit( this ); } 604 virtual Expression * acceptMutator( Mutator & m ) override { return m.mutate( this ); } 605 virtual void print( std::ostream & os, Indenter indent = {} ) const override; 606 }; 607 589 608 /// AsmExpr represents a GCC 'asm constraint operand' used in an asm statement: [output] "=f" (result) 590 609 class AsmExpr : public Expression { -
src/SynTree/Mutator.h
r33e1c91 r929d925 80 80 virtual Expression * mutate( CommaExpr * commaExpr ) = 0; 81 81 virtual Expression * mutate( TypeExpr * typeExpr ) = 0; 82 virtual Expression * mutate( DimensionExpr * dimensionExpr ) = 0; 82 83 virtual Expression * mutate( AsmExpr * asmExpr ) = 0; 83 84 virtual Expression * mutate( ImplicitCopyCtorExpr * impCpCtorExpr ) = 0; -
src/SynTree/SynTree.h
r33e1c91 r929d925 85 85 class CommaExpr; 86 86 class TypeExpr; 87 class DimensionExpr; 87 88 class AsmExpr; 88 89 class ImplicitCopyCtorExpr; -
src/SynTree/TypeDecl.cc
r33e1c91 r929d925 33 33 34 34 const char * TypeDecl::typeString() const { 35 static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array length type" };35 static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized length value" }; 36 36 static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." ); 37 37 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); -
src/SynTree/TypeExpr.cc
r33e1c91 r929d925 35 35 } 36 36 37 DimensionExpr::DimensionExpr( std::string name ) : Expression(), name(name) { 38 assertf(name != "0", "Zero is not a valid name"); 39 assertf(name != "1", "One is not a valid name"); 40 } 41 42 DimensionExpr::DimensionExpr( const DimensionExpr & other ) : Expression( other ), name( other.name ) { 43 } 44 45 DimensionExpr::~DimensionExpr() {} 46 47 void DimensionExpr::print( std::ostream & os, Indenter indent ) const { 48 os << "Type-Sys Value: " << get_name(); 49 Expression::print( os, indent ); 50 } 37 51 // Local Variables: // 38 52 // tab-width: 4 // -
src/SynTree/Visitor.h
r33e1c91 r929d925 135 135 virtual void visit( TypeExpr * node ) { visit( const_cast<const TypeExpr *>(node) ); } 136 136 virtual void visit( const TypeExpr * typeExpr ) = 0; 137 virtual void visit( DimensionExpr * node ) { visit( const_cast<const DimensionExpr *>(node) ); } 138 virtual void visit( const DimensionExpr * typeExpr ) = 0; 137 139 virtual void visit( AsmExpr * node ) { visit( const_cast<const AsmExpr *>(node) ); } 138 140 virtual void visit( const AsmExpr * asmExpr ) = 0; -
tests/.expect/typedefRedef-ERR1.txt
r33e1c91 r929d925 1 typedefRedef.cfa: 69:25: warning: Compiled1 typedefRedef.cfa:75:25: warning: Compiled 2 2 typedefRedef.cfa:4:1 error: Cannot redefine typedef: Foo 3 typedefRedef.cfa: 59:1 error: Cannot redefine typedef: ARR3 typedefRedef.cfa:65:1 error: Cannot redefine typedef: ARR -
tests/.expect/typedefRedef.txt
r33e1c91 r929d925 1 typedefRedef.cfa: 69:25: warning: Compiled1 typedefRedef.cfa:75:25: warning: Compiled -
tests/array-container/array-basic.cfa
r33e1c91 r929d925 61 61 forall( [Nw], [Nx], [Ny], [Nz] ) 62 62 void fillHelloData( array( float, Nw, Nx, Ny, Nz ) & wxyz ) { 63 for (w; z(Nw))64 for (x; z(Nx))65 for (y; z(Ny))66 for (z; z(Nz))63 for (w; Nw) 64 for (x; Nx) 65 for (y; Ny) 66 for (z; Nz) 67 67 wxyz[w][x][y][z] = getMagicNumber(w, x, y, z); 68 68 } 69 69 70 forall( [ Zn]70 forall( [N] 71 71 , S & | sized(S) 72 72 ) 73 float total1d_low( arpk( Zn, S, float, float ) & a ) {73 float total1d_low( arpk(N, S, float, float ) & a ) { 74 74 float total = 0.0f; 75 for (i; z(Zn))75 for (i; N) 76 76 total += a[i]; 77 77 return total; … … 98 98 99 99 expect = 0; 100 for (i; z(Nw))100 for (i; Nw) 101 101 expect += getMagicNumber( i, slice_ix, slice_ix, slice_ix ); 102 102 printf("expect Ws = %f\n", expect); … … 117 117 118 118 expect = 0; 119 for (i; z(Nx))119 for (i; Nx) 120 120 expect += getMagicNumber( slice_ix, i, slice_ix, slice_ix ); 121 121 printf("expect Xs = %f\n", expect); -
tests/array-container/array-md-sbscr-cases.cfa
r33e1c91 r929d925 20 20 forall( [Nw], [Nx], [Ny], [Nz] ) 21 21 void fillHelloData( array( float, Nw, Nx, Ny, Nz ) & wxyz ) { 22 for (w; z(Nw))23 for (x; z(Nx))24 for (y; z(Ny))25 for (z; z(Nz))22 for (w; Nw) 23 for (x; Nx) 24 for (y; Ny) 25 for (z; Nz) 26 26 wxyz[w][x][y][z] = getMagicNumber(w, x, y, z); 27 27 } … … 246 246 assert(( wxyz[[2, 3, 4, 5]] == valExpected )); 247 247 248 for ( i; z(Nw)) {248 for ( i; Nw ) { 249 249 assert(( wxyz[[ i, 3, 4, 5 ]] == getMagicNumber(i, 3, 4, 5) )); 250 250 } 251 251 252 for ( i; z(Nx)) {252 for ( i; Nx ) { 253 253 assert(( wxyz[[ 2, i, 4, 5 ]] == getMagicNumber(2, i, 4, 5) )); 254 254 } 255 255 256 for ( i; z(Ny)) {256 for ( i; Ny ) { 257 257 assert(( wxyz[[ 2, 3, i, 5 ]] == getMagicNumber(2, 3, i, 5) )); 258 258 } 259 259 260 for ( i; z(Nz)) {260 for ( i; Nz ) { 261 261 assert(( wxyz[[ 2, 3, 4, i ]] == getMagicNumber(2, 3, 4, i) )); 262 262 } 263 263 264 for ( i; z(Nw)) {264 for ( i; Nw ) { 265 265 assert(( wxyz[[ i, all, 4, 5 ]][3] == getMagicNumber(i, 3, 4, 5) )); 266 266 } 267 267 268 for ( i; z(Nw)) {268 for ( i; Nw ) { 269 269 assert(( wxyz[[ all, 3, 4, 5 ]][i] == getMagicNumber(i, 3, 4, 5) )); 270 270 } -
tests/device/cpu.cfa
r33e1c91 r929d925 17 17 #include <fstream.hfa> 18 18 #include <device/cpu.hfa> 19 #include <stdlib.hfa> 20 21 #include <errno.h> 22 #include <stdio.h> 23 #include <string.h> 24 #include <unistd.h> 25 19 26 extern "C" { 27 #include <dirent.h> 28 #include <sys/types.h> 29 #include <sys/stat.h> 20 30 #include <sys/sysinfo.h> 31 #include <fcntl.h> 32 } 33 34 // go through a directory calling fn on each file 35 static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) { 36 // open the directory 37 DIR *dir = opendir(path); 38 if(dir == 0p) { return ENOTDIR; } 39 40 // call fn for each 41 struct dirent * ent; 42 while ((ent = readdir(dir)) != 0p) { 43 fn( ent ); 44 } 45 46 // no longer need this 47 closedir(dir); 48 return 0; 49 } 50 51 // count the number of directories with the specified prefix 52 // the directories counted have the form '[prefix]N' where prefix is the parameter 53 // and N is an base 10 integer. 54 static int count_prefix_dirs(const char * path, const char * prefix) { 55 // read the directory and find the cpu count 56 // and make sure everything is as expected 57 int max = -1; 58 int count = 0; 59 void lambda(struct dirent * ent) { 60 // were are looking for prefixX, where X is a number 61 // check that it starts with 'cpu 62 char * s = strstr(ent->d_name, prefix); 63 if(s == 0p) { return; } 64 if(s != ent->d_name) { return; } 65 66 // check that the next part is a number 67 s += strlen(prefix); 68 char * end; 69 long int val = strtol(s, &end, 10); 70 if(*end != '\0' || val < 0) { return; } 71 72 // check that it's a directory 73 if(ent->d_type != DT_DIR) { return; } 74 75 // it's a match! 76 max = max(val, max); 77 count++; 78 } 79 iterate_dir(path, lambda); 80 81 /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max); 82 83 return count; 84 } 85 86 // Count number of cache *indexes* in the system 87 // cache indexes are distinct from cache level as Data or Instruction cache 88 // can share a level but not an index 89 // PITFALL: assumes all cpus have the same indexes as cpu0 90 static int count_cache_indexes(void) { 91 return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index"); 92 } 93 94 // read information about a spcficic cache index/cpu file into the output buffer 95 static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) { 96 // Pick the file we want and read it 97 char buf[128]; 98 /* paranoid */ __attribute__((unused)) int len = 99 snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file); 100 /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx); 101 102 int fd = open(buf, 0, O_RDONLY); 103 /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf); 104 105 ssize_t r = read(fd, out, out_len); 106 /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf); 107 108 /* paranoid */ __attribute__((unused)) int ret = 109 close(fd); 110 /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf); 111 112 out[r-1] = '\0'; 113 return r-1; 114 } 115 116 unsigned find_idx() { 117 int idxs = count_cache_indexes(); 118 119 unsigned found_level = 0; 120 unsigned found = -1u; 121 for(i; idxs) { 122 unsigned idx = idxs - 1 - i; 123 char buf[32]; 124 125 // Level is the cache level: higher means bigger and slower 126 read_cpuidxinfo_into(0, idx, "level", buf, 32); 127 char * end; 128 unsigned long level = strtoul(buf, &end, 10); 129 /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, that doesn't sound right", 0); 130 /* paranoid */ verify(*end == '\0'); 131 132 if(found_level < level) { 133 found_level = level; 134 found = idx; 135 } 136 } 137 138 /* paranoid */ verify(found != -1u); 139 return found; 21 140 } 22 141 23 142 int main() { 143 //----------------------------------------------------------------------- 24 144 int ret1 = get_nprocs(); 25 145 int ret2 = cpu_info.hthrd_count; … … 31 151 } 32 152 153 //----------------------------------------------------------------------- 154 // Make sure no one has the same self 155 for(ime; cpu_info.hthrd_count) { 156 unsigned me = cpu_info.llc_map[ime].self; 157 { 158 unsigned s = cpu_info.llc_map[ime].start; 159 unsigned e = s + cpu_info.llc_map[ime].count; 160 if(me < s || me >= e) { 161 sout | "CPU" | ime | "outside of it's own map: " | s | "<=" | me | "<" | e; 162 } 163 } 164 165 166 for(ithem; cpu_info.hthrd_count) { 167 if(ime == ithem) continue; 168 169 unsigned them = cpu_info.llc_map[ithem].self; 170 if(me == them) { 171 sout | "CPU" | ime | "has conflicting self id with" | ithem | "(" | me | ")"; 172 } 173 } 174 } 175 176 177 //----------------------------------------------------------------------- 178 unsigned idx = find_idx(); 179 // For all procs check mapping is consistent 180 for(cpu_me; cpu_info.hthrd_count) { 181 char buf_me[32]; 182 size_t len_me = read_cpuidxinfo_into(cpu_me, idx, "shared_cpu_list", buf_me, 32); 183 for(cpu_them; cpu_info.hthrd_count) { 184 if(cpu_me == cpu_them) continue; 185 char buf_them[32]; 186 size_t len_them = read_cpuidxinfo_into(cpu_them, idx, "shared_cpu_list", buf_them, 32); 187 188 bool match_file = len_them == len_me && 0 == strncmp(buf_them, buf_me, len_me); 189 bool match_info = cpu_info.llc_map[cpu_me].start == cpu_info.llc_map[cpu_them].start && cpu_info.llc_map[cpu_me].count == cpu_info.llc_map[cpu_them].count; 190 191 if(match_file != match_info) { 192 sout | "CPU" | cpu_me | "and" | cpu_them | "have inconsitent file and cpu_info"; 193 sout | cpu_me | ": <" | cpu_info.llc_map[cpu_me ].start | "," | cpu_info.llc_map[cpu_me ].count | "> '" | buf_me | "'"; 194 sout | cpu_me | ": <" | cpu_info.llc_map[cpu_them].start | "," | cpu_info.llc_map[cpu_them].count | "> '" | buf_them | "'"; 195 } 196 } 197 } 33 198 } -
tests/literals.cfa
r33e1c91 r929d925 10 10 // Created On : Sat Sep 9 16:34:38 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sat Aug 29 10:57:56 202013 // Update Count : 2 2612 // Last Modified On : Sat Jun 19 15:47:49 2021 13 // Update Count : 237 14 14 // 15 15 … … 63 63 -0X0123456789ABCDEF; -0X0123456789ABCDEFu; -0X0123456789ABCDEFl; -0X0123456789ABCDEFll; -0X0123456789ABCDEFul; -0X0123456789ABCDEFlu; -0X0123456789ABCDEFull; -0X0123456789ABCDEFllu; 64 64 65 // floating literals 66 67 0123456789.; 0123456789.f; 0123456789.d; 0123456789.l; 0123456789.F; 0123456789.D; 0123456789.L; 68 +0123456789.; +0123456789.f; +0123456789.d; +0123456789.l; +0123456789.F; +0123456789.D; +0123456789.L; 69 -0123456789.; -0123456789.f; -0123456789.d; -0123456789.l; -0123456789.F; -0123456789.D; -0123456789.L; 70 71 0123456789.e09; 0123456789.e09f; 0123456789.e09d; 0123456789.e09l; 0123456789.e09F; 0123456789.e09D; 0123456789.e09L; 72 +0123456789.e09; +0123456789.e09f; +0123456789.e09d; +0123456789.e09l; +0123456789.e09F; +0123456789.e09D; +0123456789.e09L; 73 -0123456789.e09; -0123456789.e09f; -0123456789.e09d; -0123456789.e09l; -0123456789.e09F; -0123456789.e09D; -0123456789.e09L; 74 75 0123456789.e+09; 0123456789.e+09f; 0123456789.e+09d; 0123456789.e+09l; 0123456789.e+09F; 0123456789.e+09D; 0123456789.e+09L; 76 +0123456789.e+09; +0123456789.e+09f; +0123456789.e+09d; +0123456789.e+09l; +0123456789.e+09F; +0123456789.e+09D; +0123456789.e+09L; 77 -0123456789.e+09; -0123456789.e+09f; -0123456789.e+09d; -0123456789.e+09l; -0123456789.e+09F; -0123456789.e+09D; -0123456789.e+09L; 78 79 0123456789.e-09; 0123456789.e-09f; 0123456789.e-09d; 0123456789.e-09l; 0123456789.e-09F; 0123456789.e-09D; 0123456789.e-09L; 80 +0123456789.e-09; +0123456789.e-09f; +0123456789.e-09d; +0123456789.e-09l; +0123456789.e-09F; +0123456789.e-09D; +0123456789.e-09L; 81 -0123456789.e-09; -0123456789.e-09f; -0123456789.e-09d; -0123456789.e-09l; -0123456789.e-09F; -0123456789.e-09D; -0123456789.e-09L; 82 83 .0123456789; .0123456789f; .0123456789d; .0123456789l; .0123456789F; .0123456789D; .0123456789L; 84 +.0123456789; +.0123456789f; +.0123456789d; +.0123456789l; +.0123456789F; +.0123456789D; +.0123456789L; 85 -.0123456789; -.0123456789f; -.0123456789d; -.0123456789l; -.0123456789F; -.0123456789D; -.0123456789L; 86 87 .0123456789e09; .0123456789e09f; .0123456789e09d; .0123456789e09l; .0123456789e09F; .0123456789e09D; .0123456789e09L; 88 +.0123456789e09; +.0123456789e09f; +.0123456789e09d; +.0123456789e09l; +.0123456789e09F; +.0123456789e09D; +.0123456789e09L; 89 -.0123456789e09; -.0123456789e09f; -.0123456789e09d; -.0123456789e09l; -.0123456789e09F; -.0123456789e09D; -.0123456789e09L; 90 91 .0123456789E+09; .0123456789E+09f; .0123456789E+09d; .0123456789E+09l; .0123456789E+09F; .0123456789E+09D; .0123456789E+09L; 92 +.0123456789E+09; +.0123456789E+09f; +.0123456789E+09d; +.0123456789E+09l; +.0123456789E+09F; +.0123456789E+09D; +.0123456789E+09L; 93 -.0123456789E+09; -.0123456789E+09f; -.0123456789E+09d; -.0123456789E+09l; -.0123456789E+09F; -.0123456789E+09D; -.0123456789E+09L; 94 95 .0123456789E-09; .0123456789E-09f; .0123456789E-09d; .0123456789E-09l; .0123456789E-09F; .0123456789E-09D; .0123456789E-09L; 96 -.0123456789E-09; -.0123456789E-09f; -.0123456789E-09d; -.0123456789E-09l; -.0123456789E-09F; -.0123456789E-09D; -.0123456789E-09L; 97 -.0123456789E-09; -.0123456789E-09f; -.0123456789E-09d; -.0123456789E-09l; -.0123456789E-09F; -.0123456789E-09D; -.0123456789E-09L; 98 99 0123456789.0123456789; 0123456789.0123456789f; 0123456789.0123456789d; 0123456789.0123456789l; 0123456789.0123456789F; 0123456789.0123456789D; 0123456789.0123456789L; 100 +0123456789.0123456789; +0123456789.0123456789f; +0123456789.0123456789d; +0123456789.0123456789l; +0123456789.0123456789F; +0123456789.0123456789D; +0123456789.0123456789L; 101 -0123456789.0123456789; -0123456789.0123456789f; -0123456789.0123456789d; -0123456789.0123456789l; -0123456789.0123456789F; -0123456789.0123456789D; -0123456789.0123456789L; 102 103 0123456789.0123456789E09; 0123456789.0123456789E09f; 0123456789.0123456789E09d; 0123456789.0123456789E09l; 0123456789.0123456789E09F; 0123456789.0123456789E09D; 0123456789.0123456789E09L; 104 +0123456789.0123456789E09; +0123456789.0123456789E09f; +0123456789.0123456789E09d; +0123456789.0123456789E09l; +0123456789.0123456789E09F; +0123456789.0123456789E09D; +0123456789.0123456789E09L; 105 -0123456789.0123456789E09; -0123456789.0123456789E09f; -0123456789.0123456789E09d; -0123456789.0123456789E09l; -0123456789.0123456789E09F; -0123456789.0123456789E09D; -0123456789.0123456789E09L; 106 107 0123456789.0123456789E+09; 0123456789.0123456789E+09f; 0123456789.0123456789E+09d; 0123456789.0123456789E+09l; 0123456789.0123456789E+09F; 0123456789.0123456789E+09D; 0123456789.0123456789E+09L; 108 +0123456789.0123456789E+09; +0123456789.0123456789E+09f; +0123456789.0123456789E+09d; +0123456789.0123456789E+09l; +0123456789.0123456789E+09F; +0123456789.0123456789E+09D; +0123456789.0123456789E+09L; 109 -0123456789.0123456789E+09; -0123456789.0123456789E+09f; -0123456789.0123456789E+09d; -0123456789.0123456789E+09l; -0123456789.0123456789E+09F; -0123456789.0123456789E+09D; -0123456789.0123456789E+09L; 110 111 0123456789.0123456789E-09; 0123456789.0123456789E-09f; 0123456789.0123456789E-09d; 0123456789.0123456789E-09l; 0123456789.0123456789E-09F; 0123456789.0123456789E-09D; 0123456789.0123456789E-09L; 112 +0123456789.0123456789E-09; +0123456789.0123456789E-09f; +0123456789.0123456789E-09d; +0123456789.0123456789E-09l; +0123456789.0123456789E-09F; +0123456789.0123456789E-09D; +0123456789.0123456789E-09L; 113 -0123456789.0123456789E-09; -0123456789.0123456789E-09f; -0123456789.0123456789E-09d; -0123456789.0123456789E-09l; -0123456789.0123456789E-09F; -0123456789.0123456789E-09D; -0123456789.0123456789E-09L; 114 65 115 // decimal floating literals 66 116 67 0123456789.; 0123456789.f; 0123456789.l; 0123456789.F; 0123456789.L; 0123456789.DL; 68 +0123456789.; +0123456789.f; +0123456789.l; +0123456789.F; +0123456789.L; +0123456789.DL; 69 -0123456789.; -0123456789.f; -0123456789.l; -0123456789.F; -0123456789.L; -0123456789.DL; 70 71 0123456789.e09; 0123456789.e09f; 0123456789.e09l; 0123456789.e09F; 0123456789.e09L; 0123456789.e09DL; 72 +0123456789.e09; +0123456789.e09f; +0123456789.e09l; +0123456789.e09F; +0123456789.e09L; +0123456789.e09DL; 73 -0123456789.e09; -0123456789.e09f; -0123456789.e09l; -0123456789.e09F; -0123456789.e09L; -0123456789.e09DL; 74 75 0123456789.e+09; 0123456789.e+09f; 0123456789.e+09l; 0123456789.e+09F; 0123456789.e+09L; 0123456789.e+09DL; 76 +0123456789.e+09; +0123456789.e+09f; +0123456789.e+09l; +0123456789.e+09F; +0123456789.e+09L; +0123456789.e+09DL; 77 -0123456789.e+09; -0123456789.e+09f; -0123456789.e+09l; -0123456789.e+09F; -0123456789.e+09L; -0123456789.e+09DL; 78 79 0123456789.e-09; 0123456789.e-09f; 0123456789.e-09l; 0123456789.e-09F; 0123456789.e-09L; 0123456789.e-09DL; 80 +0123456789.e-09; +0123456789.e-09f; +0123456789.e-09l; +0123456789.e-09F; +0123456789.e-09L; +0123456789.e-09DL; 81 -0123456789.e-09; -0123456789.e-09f; -0123456789.e-09l; -0123456789.e-09F; -0123456789.e-09L; -0123456789.e-09DL; 82 83 .0123456789; .0123456789f; .0123456789l; .0123456789F; .0123456789L; .0123456789DL; 84 +.0123456789; +.0123456789f; +.0123456789l; +.0123456789F; +.0123456789L; +.0123456789DL; 85 -.0123456789; -.0123456789f; -.0123456789l; -.0123456789F; -.0123456789L; -.0123456789DL; 86 87 .0123456789e09; .0123456789e09f; .0123456789e09l; .0123456789e09F; .0123456789e09L; .0123456789e09DL; 88 +.0123456789e09; +.0123456789e09f; +.0123456789e09l; +.0123456789e09F; +.0123456789e09L; +.0123456789e09DL; 89 -.0123456789e09; -.0123456789e09f; -.0123456789e09l; -.0123456789e09F; -.0123456789e09L; -.0123456789e09DL; 90 91 .0123456789E+09; .0123456789E+09f; .0123456789E+09l; .0123456789E+09F; .0123456789E+09L; .0123456789E+09DL; 92 +.0123456789E+09; +.0123456789E+09f; +.0123456789E+09l; +.0123456789E+09F; +.0123456789E+09L; +.0123456789E+09DL; 93 -.0123456789E+09; -.0123456789E+09f; -.0123456789E+09l; -.0123456789E+09F; -.0123456789E+09L; -.0123456789E+09DL; 94 95 .0123456789E-09; .0123456789E-09f; .0123456789E-09l; .0123456789E-09F; .0123456789E-09L; .0123456789E-09DL; 96 -.0123456789E-09; -.0123456789E-09f; -.0123456789E-09l; -.0123456789E-09F; -.0123456789E-09L; -.0123456789E-09DL; 97 -.0123456789E-09; -.0123456789E-09f; -.0123456789E-09l; -.0123456789E-09F; -.0123456789E-09L; -.0123456789E-09DL; 98 99 0123456789.0123456789; 0123456789.0123456789f; 0123456789.0123456789l; 0123456789.0123456789F; 0123456789.0123456789L; 0123456789.0123456789DL; 100 +0123456789.0123456789; +0123456789.0123456789f; +0123456789.0123456789l; +0123456789.0123456789F; +0123456789.0123456789L; +0123456789.0123456789DL; 101 -0123456789.0123456789; -0123456789.0123456789f; -0123456789.0123456789l; -0123456789.0123456789F; -0123456789.0123456789L; -0123456789.0123456789DL; 102 103 0123456789.0123456789E09; 0123456789.0123456789E09f; 0123456789.0123456789E09l; 0123456789.0123456789E09F; 0123456789.0123456789E09L; 0123456789.0123456789E09DL; 104 +0123456789.0123456789E09; +0123456789.0123456789E09f; +0123456789.0123456789E09l; +0123456789.0123456789E09F; +0123456789.0123456789E09L; +0123456789.0123456789E09DL; 105 -0123456789.0123456789E09; -0123456789.0123456789E09f; -0123456789.0123456789E09l; -0123456789.0123456789E09F; -0123456789.0123456789E09L; -0123456789.0123456789E09DL; 106 107 0123456789.0123456789E+09; 0123456789.0123456789E+09f; 0123456789.0123456789E+09l; 0123456789.0123456789E+09F; 0123456789.0123456789E+09L; 0123456789.0123456789E+09DL; 108 +0123456789.0123456789E+09; +0123456789.0123456789E+09f; +0123456789.0123456789E+09l; +0123456789.0123456789E+09F; +0123456789.0123456789E+09L; +0123456789.0123456789E+09DL; 109 -0123456789.0123456789E+09; -0123456789.0123456789E+09f; -0123456789.0123456789E+09l; -0123456789.0123456789E+09F; -0123456789.0123456789E+09L; -0123456789.0123456789E+09DL; 110 111 0123456789.0123456789E-09; 0123456789.0123456789E-09f; 0123456789.0123456789E-09l; 0123456789.0123456789E-09F; 0123456789.0123456789E-09L; 0123456789.0123456789E-09DL; 112 +0123456789.0123456789E-09; +0123456789.0123456789E-09f; +0123456789.0123456789E-09l; +0123456789.0123456789E-09F; +0123456789.0123456789E-09L; +0123456789.0123456789E-09DL; 113 -0123456789.0123456789E-09; -0123456789.0123456789E-09f; -0123456789.0123456789E-09l; -0123456789.0123456789E-09F; -0123456789.0123456789E-09L; -0123456789.0123456789E-09DL; 117 #if ! defined( __aarch64__ ) // unsupported on ARM after gcc-9 118 0123456789.df; 0123456789.dd; 0123456789.dl; 0123456789.DF; 0123456789.DD; 0123456789.DL; 119 +0123456789.df; +0123456789.dd; +0123456789.dl; +0123456789.DF; +0123456789.DD; +0123456789.DL; 120 -0123456789.df; -0123456789.dd; -0123456789.dl; -0123456789.DF; -0123456789.DD; -0123456789.DL; 121 122 0123456789.e09df; 0123456789.e09dd; 0123456789.e09dl; 0123456789.e09DF; 0123456789.e09DD; 0123456789.e09DL; 123 +0123456789.e09df; +0123456789.e09dd; +0123456789.e09dl; +0123456789.e09DF; +0123456789.e09DD; +0123456789.e09DL; 124 -0123456789.e09df; -0123456789.e09dd; -0123456789.e09dl; -0123456789.e09DF; -0123456789.e09DD; -0123456789.e09DL; 125 126 0123456789.e+09df; 0123456789.e+09dd; 0123456789.e+09dl; 0123456789.e+09DF; 0123456789.e+09DD; 0123456789.e+09DL; 127 +0123456789.e+09df; +0123456789.e+09dd; +0123456789.e+09dl; +0123456789.e+09DF; +0123456789.e+09DD; +0123456789.e+09DL; 128 -0123456789.e+09df; -0123456789.e+09dd; -0123456789.e+09dl; -0123456789.e+09DF; -0123456789.e+09DD; -0123456789.e+09DL; 129 130 0123456789.e-09df; 0123456789.e-09dd; 0123456789.e-09dl; 0123456789.e-09DF; 0123456789.e-09DD; 0123456789.e-09DL; 131 +0123456789.e-09df; +0123456789.e-09dd; +0123456789.e-09dl; +0123456789.e-09DF; +0123456789.e-09DD; +0123456789.e-09DL; 132 -0123456789.e-09df; -0123456789.e-09dd; -0123456789.e-09dl; -0123456789.e-09DF; -0123456789.e-09DD; -0123456789.e-09DL; 133 134 .0123456789df; .0123456789dd; .0123456789dl; .0123456789DF; .0123456789DD; .0123456789DL; 135 +.0123456789df; +.0123456789dd; +.0123456789dl; +.0123456789DF; +.0123456789DD; +.0123456789DL; 136 -.0123456789df; -.0123456789dd; -.0123456789dl; -.0123456789DF; -.0123456789DD; -.0123456789DL; 137 138 .0123456789e09df; .0123456789e09dd; .0123456789e09dl; .0123456789e09DF; .0123456789e09DD; .0123456789e09DL; 139 +.0123456789e09df; +.0123456789e09dd; +.0123456789e09dl; +.0123456789e09DF; +.0123456789e09DD; +.0123456789e09DL; 140 -.0123456789e09df; -.0123456789e09dd; -.0123456789e09dl; -.0123456789e09DF; -.0123456789e09DD; -.0123456789e09DL; 141 142 .0123456789E+09df; .0123456789E+09dd; .0123456789E+09dl; .0123456789E+09DF; .0123456789E+09DD; .0123456789E+09DL; 143 +.0123456789E+09df; +.0123456789E+09dd; +.0123456789E+09dl; +.0123456789E+09DF; +.0123456789E+09DD; +.0123456789E+09DL; 144 -.0123456789E+09df; -.0123456789E+09dd; -.0123456789E+09dl; -.0123456789E+09DF; -.0123456789E+09DD; -.0123456789E+09DL; 145 146 .0123456789E-09df; .0123456789E-09dd; .0123456789E-09dl; .0123456789E-09DF; .0123456789E-09DD; .0123456789E-09DL; 147 -.0123456789E-09df; -.0123456789E-09dd; -.0123456789E-09dl; -.0123456789E-09DF; -.0123456789E-09DD; -.0123456789E-09DL; 148 -.0123456789E-09df; -.0123456789E-09dd; -.0123456789E-09dl; -.0123456789E-09DF; -.0123456789E-09DD; -.0123456789E-09DL; 149 150 0123456789.0123456789df; 0123456789.0123456789dd; 0123456789.0123456789dl; 0123456789.0123456789DF; 0123456789.0123456789DD; 0123456789.0123456789DL; 151 +0123456789.0123456789df; +0123456789.0123456789dd; +0123456789.0123456789dl; +0123456789.0123456789DF; +0123456789.0123456789DD; +0123456789.0123456789DL; 152 -0123456789.0123456789df; -0123456789.0123456789dd; -0123456789.0123456789dl; -0123456789.0123456789DF; -0123456789.0123456789DD; -0123456789.0123456789DL; 153 154 0123456789.0123456789E09df; 0123456789.0123456789E09dd; 0123456789.0123456789E09dl; 0123456789.0123456789E09DF; 0123456789.0123456789E09DD; 0123456789.0123456789E09DL; 155 +0123456789.0123456789E09df; +0123456789.0123456789E09dd; +0123456789.0123456789E09dl; +0123456789.0123456789E09DF; +0123456789.0123456789E09DD; +0123456789.0123456789E09DL; 156 -0123456789.0123456789E09df; -0123456789.0123456789E09dd; -0123456789.0123456789E09dl; -0123456789.0123456789E09DF; -0123456789.0123456789E09DD; -0123456789.0123456789E09DL; 157 158 0123456789.0123456789E+09df; 0123456789.0123456789E+09dd; 0123456789.0123456789E+09dl; 0123456789.0123456789E+09DF; 0123456789.0123456789E+09DD; 0123456789.0123456789E+09DL; 159 +0123456789.0123456789E+09df; +0123456789.0123456789E+09dd; +0123456789.0123456789E+09dl; +0123456789.0123456789E+09DF; +0123456789.0123456789E+09DD; +0123456789.0123456789E+09DL; 160 -0123456789.0123456789E+09df; -0123456789.0123456789E+09dd; -0123456789.0123456789E+09dl; -0123456789.0123456789E+09DF; -0123456789.0123456789E+09DD; -0123456789.0123456789E+09DL; 161 162 0123456789.0123456789E-09df; 0123456789.0123456789E-09dd; 0123456789.0123456789E-09dl; 0123456789.0123456789E-09DF; 0123456789.0123456789E-09DD; 0123456789.0123456789E-09DL; 163 +0123456789.0123456789E-09df; +0123456789.0123456789E-09dd; +0123456789.0123456789E-09dl; +0123456789.0123456789E-09DF; +0123456789.0123456789E-09DD; +0123456789.0123456789E-09DL; 164 -0123456789.0123456789E-09df; -0123456789.0123456789E-09dd; -0123456789.0123456789E-09dl; -0123456789.0123456789E-09DF; -0123456789.0123456789E-09DD; -0123456789.0123456789E-09DL; 165 #endif // ! __aarch64__ 114 166 115 167 // hexadecimal floating literals, must have exponent -
tests/math.cfa
r33e1c91 r929d925 10 10 // Created On : Fri Apr 22 14:59:21 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Tue Apr 13 21:04:48202113 // Update Count : 12 312 // Last Modified On : Fri Jun 18 17:02:44 2021 13 // Update Count : 124 14 14 // 15 15 … … 40 40 41 41 sout | "exp:" | exp( 1.0F ) | exp( 1.0D ) | exp( 1.0L ) | nonl; 42 sout | exp( 1.0F+1.0FI ) | exp( 1.0D+1.0DI ) | exp( 1.0 DL+1.0LI );42 sout | exp( 1.0F+1.0FI ) | exp( 1.0D+1.0DI ) | exp( 1.0L+1.0LI ); 43 43 sout | "exp2:" | exp2( 1.0F ) | exp2( 1.0D ) | exp2( 1.0L ); 44 44 sout | "expm1:" | expm1( 1.0F ) | expm1( 1.0D ) | expm1( 1.0L ); 45 45 sout | "pow:" | pow( 1.0F, 1.0F ) | pow( 1.0D, 1.0D ) | pow( 1.0L, 1.0L ) | nonl; 46 sout | pow( 1.0F+1.0FI, 1.0F+1.0FI ) | pow( 1.0D+1.0DI, 1.0D+1.0DI ) | pow( 1.5 DL+1.5LI, 1.5DL+1.5LI );46 sout | pow( 1.0F+1.0FI, 1.0F+1.0FI ) | pow( 1.0D+1.0DI, 1.0D+1.0DI ) | pow( 1.5L+1.5LI, 1.5L+1.5LI ); 47 47 48 48 int b = 4; … … 68 68 69 69 sout | "log:" | log( 1.0F ) | log( 1.0D ) | log( 1.0L ) | nonl; 70 sout | log( 1.0F+1.0FI ) | log( 1.0D+1.0DI ) | log( 1.0 DL+1.0LI );70 sout | log( 1.0F+1.0FI ) | log( 1.0D+1.0DI ) | log( 1.0L+1.0LI ); 71 71 sout | "log2:" | log2( 1024 ) | log2( 2 \ 17u ) | log2( 2 \ 23u ); 72 72 sout | "log2:" | log2( 1024l ) | log2( 2l \ 17u ) | log2( 2l \ 23u ); … … 82 82 83 83 sout | "sqrt:" | sqrt( 1.0F ) | sqrt( 1.0D ) | sqrt( 1.0L ) | nonl; 84 sout | sqrt( 1.0F+1.0FI ) | sqrt( 1.0D+1.0DI ) | sqrt( 1.0 DL+1.0LI );84 sout | sqrt( 1.0F+1.0FI ) | sqrt( 1.0D+1.0DI ) | sqrt( 1.0L+1.0LI ); 85 85 sout | "cbrt:" | cbrt( 27.0F ) | cbrt( 27.0D ) | cbrt( 27.0L ); 86 86 sout | "hypot:" | hypot( 1.0F, -1.0F ) | hypot( 1.0D, -1.0D ) | hypot( 1.0L, -1.0L ); … … 89 89 90 90 sout | "sin:" | sin( 1.0F ) | sin( 1.0D ) | sin( 1.0L ) | nonl; 91 sout | sin( 1.0F+1.0FI ) | sin( 1.0D+1.0DI ) | sin( 1.0 DL+1.0LI );91 sout | sin( 1.0F+1.0FI ) | sin( 1.0D+1.0DI ) | sin( 1.0L+1.0LI ); 92 92 sout | "cos:" | cos( 1.0F ) | cos( 1.0D ) | cos( 1.0L ) | nonl; 93 sout | cos( 1.0F+1.0FI ) | cos( 1.0D+1.0DI ) | cos( 1.0 DL+1.0LI );93 sout | cos( 1.0F+1.0FI ) | cos( 1.0D+1.0DI ) | cos( 1.0L+1.0LI ); 94 94 sout | "tan:" | tan( 1.0F ) | tan( 1.0D ) | tan( 1.0L ) | nonl; 95 sout | tan( 1.0F+1.0FI ) | tan( 1.0D+1.0DI ) | tan( 1.0 DL+1.0LI );95 sout | tan( 1.0F+1.0FI ) | tan( 1.0D+1.0DI ) | tan( 1.0L+1.0LI ); 96 96 sout | "asin:" | asin( 1.0F ) | asin( 1.0D ) | asin( 1.0L ) | nonl; 97 sout | asin( 1.0F+1.0FI ) | asin( 1.0D+1.0DI ) | asin( 1.0 DL+1.0LI );97 sout | asin( 1.0F+1.0FI ) | asin( 1.0D+1.0DI ) | asin( 1.0L+1.0LI ); 98 98 sout | "acos:" | acos( 1.0F ) | acos( 1.0D ) | acos( 1.0L ) | nonl; 99 sout | acos( 1.0F+1.0FI ) | acos( 1.0D+1.0DI ) | acos( 1.0 DL+1.0LI );99 sout | acos( 1.0F+1.0FI ) | acos( 1.0D+1.0DI ) | acos( 1.0L+1.0LI ); 100 100 sout | "atan:" | atan( 1.0F ) | atan( 1.0D ) | atan( 1.0L ) | nonl; 101 sout | atan( 1.0F+1.0FI ) | atan( 1.0D+1.0DI ) | atan( 1.0 DL+1.0LI );101 sout | atan( 1.0F+1.0FI ) | atan( 1.0D+1.0DI ) | atan( 1.0L+1.0LI ); 102 102 sout | "atan2:" | atan2( 1.0F, 1.0F ) | atan2( 1.0D, 1.0D ) | atan2( 1.0L, 1.0L ) | nonl; 103 103 sout | "atan:" | atan( 1.0F, 1.0F ) | atan( 1.0D, 1.0D ) | atan( 1.0L, 1.0L ); … … 106 106 107 107 sout | "sinh:" | sinh( 1.0F ) | sinh( 1.0D ) | sinh( 1.0L ) | nonl; 108 sout | sinh( 1.0F+1.0FI ) | sinh( 1.0D+1.0DI ) | sinh( 1.0 DL+1.0LI );108 sout | sinh( 1.0F+1.0FI ) | sinh( 1.0D+1.0DI ) | sinh( 1.0L+1.0LI ); 109 109 sout | "cosh:" | cosh( 1.0F ) | cosh( 1.0D ) | cosh( 1.0L ) | nonl; 110 sout | cosh( 1.0F+1.0FI ) | cosh( 1.0D+1.0DI ) | cosh( 1.0 DL+1.0LI );110 sout | cosh( 1.0F+1.0FI ) | cosh( 1.0D+1.0DI ) | cosh( 1.0L+1.0LI ); 111 111 sout | "tanh:" | tanh( 1.0F ) | tanh( 1.0D ) | tanh( 1.0L ) | nonl; 112 sout | tanh( 1.0F+1.0FI ) | tanh( 1.0D+1.0DI ) | tanh( 1.0 DL+1.0LI );112 sout | tanh( 1.0F+1.0FI ) | tanh( 1.0D+1.0DI ) | tanh( 1.0L+1.0LI ); 113 113 sout | "acosh:" | acosh( 1.0F ) | acosh( 1.0D ) | acosh( 1.0L ) | nonl; 114 sout | acosh( 1.0F+1.0FI ) | acosh( 1.0D+1.0DI ) | acosh( 1.0 DL+1.0LI );114 sout | acosh( 1.0F+1.0FI ) | acosh( 1.0D+1.0DI ) | acosh( 1.0L+1.0LI ); 115 115 sout | "asinh:" | asinh( 1.0F ) | asinh( 1.0D ) | asinh( 1.0L ) | nonl; 116 sout | asinh( 1.0F+1.0FI ) | asinh( 1.0D+1.0DI ) | asinh( 1.0 DL+1.0LI );116 sout | asinh( 1.0F+1.0FI ) | asinh( 1.0D+1.0DI ) | asinh( 1.0L+1.0LI ); 117 117 sout | "atanh:" | atanh( 1.0F ) | atanh( 1.0D ) | atanh( 1.0L ) | nonl; 118 sout | atanh( 1.0F+1.0FI ) | atanh( 1.0D+1.0DI ) | atanh( 1.0 DL+1.0LI );118 sout | atanh( 1.0F+1.0FI ) | atanh( 1.0D+1.0DI ) | atanh( 1.0L+1.0LI ); 119 119 120 120 //---------------------- Error / Gamma ---------------------- -
tests/test.py
r33e1c91 r929d925 13 13 14 14 import os 15 import psutil16 15 import signal 17 16 -
tests/typedefRedef.cfa
r33e1c91 r929d925 45 45 typedef int X2; 46 46 47 X2 value __attribute__((aligned(4 * sizeof(X2)))); 48 49 __attribute__((aligned(4 * sizeof(X2)))) struct rseq_cs { 50 int foo; 51 }; 52 47 53 // xxx - this doesn't work yet due to parsing problems with generic types 48 54 // #ifdef __CFA__
Note:
See TracChangeset
for help on using the changeset viewer.