Changes in / [33e1c91:929d925]


Ignore:
Files:
6 added
60 edited

Legend:

Unmodified
Added
Removed
  • Jenkins/FullBuild

    r33e1c91 r929d925  
    1818
    1919                                parallel (
    20                                         gcc_8_x86_new: { trigger_build( 'gcc-8',   'x86' ) },
    21                                         gcc_7_x86_new: { trigger_build( 'gcc-7',   'x86' ) },
    22                                         gcc_6_x86_new: { trigger_build( 'gcc-6',   'x86' ) },
    23                                         gcc_9_x64_new: { trigger_build( 'gcc-9',   'x64' ) },
    24                                         gcc_8_x64_new: { trigger_build( 'gcc-8',   'x64' ) },
    25                                         gcc_7_x64_new: { trigger_build( 'gcc-7',   'x64' ) },
    26                                         gcc_6_x64_new: { trigger_build( 'gcc-6',   'x64' ) },
    27                                         gcc_5_x64_new: { trigger_build( 'gcc-5',   'x64' ) },
    28                                         clang_x64_new: { trigger_build( 'clang',   'x64' ) },
     20                                        gcc_08_x86_new: { trigger_build( 'gcc-8',   'x86' ) },
     21                                        gcc_07_x86_new: { trigger_build( 'gcc-7',   'x86' ) },
     22                                        gcc_06_x86_new: { trigger_build( 'gcc-6',   'x86' ) },
     23                                        gcc_10_x64_new: { trigger_build( 'gcc-10',  'x64' ) },
     24                                        gcc_09_x64_new: { trigger_build( 'gcc-9',   'x64' ) },
     25                                        gcc_08_x64_new: { trigger_build( 'gcc-8',   'x64' ) },
     26                                        gcc_07_x64_new: { trigger_build( 'gcc-7',   'x64' ) },
     27                                        gcc_06_x64_new: { trigger_build( 'gcc-6',   'x64' ) },
     28                                        clang_x64_new:  { trigger_build( 'clang',   'x64' ) },
    2929                                )
    3030                        }
  • Jenkinsfile

    r33e1c91 r929d925  
    305305        BuildSettings(java.util.Collections$UnmodifiableMap param, String branch) {
    306306                switch( param.Compiler ) {
     307                        case 'gcc-11':
     308                                this.Compiler = new CC_Desc('gcc-11', 'g++-11', 'gcc-11', '-flto=auto')
     309                        break
     310                        case 'gcc-10':
     311                                this.Compiler = new CC_Desc('gcc-10', 'g++-10', 'gcc-10', '-flto=auto')
     312                        break
    307313                        case 'gcc-9':
    308314                                this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9', '-flto=auto')
     
    324330                        break
    325331                        case 'clang':
    326                                 this.Compiler = new CC_Desc('clang', 'clang++-10', 'gcc-9', '-flto=thin -flto-jobs=0')
     332                                this.Compiler = new CC_Desc('clang', 'clang++-10', 'gcc-10', '-flto=thin -flto-jobs=0')
    327333                        break
    328334                        default :
  • benchmark/io/io_uring.h

    r33e1c91 r929d925  
    11extern "C" {
    2         #ifndef _GNU_SOURCE         /* See feature_test_macros(7) */
    3         #define _GNU_SOURCE         /* See feature_test_macros(7) */
    4         #endif
    52        #include <errno.h>
    63        #include <stdio.h>
  • doc/theses/mubeen_zulfiqar_MMath/.gitignore

    r33e1c91 r929d925  
    11# Intermediate Results:
    2 out/
     2build/
    33
    44# Final Files:
  • doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex

    r33e1c91 r929d925  
    3535====================
    3636
    37 \section Performance Matrices of Memory Allocators
     37%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     38%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     39%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Performance Matrices
     40%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     41%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     42
     43\section{Performance Matrices of Memory Allocators}
    3844
    3945When it comes to memory allocators, there are no set standards of performance. Performance of a memory allocator depends highly on the usage pattern of the application. A memory allocator that is the best performer for a certain application X might be the worst for some other application which has completely different memory usage pattern compared to the application X. It is extremely difficult to make one universally best memory allocator which will outperform every other memory allocator for every usage pattern. So, there is a lack of a set of standard benchmarks that are used to evaluate a memory allocators's performance.
    4046
    4147If we breakdown the goals of a memory allocator, there are two basic matrices on which a memory allocator's performance is evaluated.
    42 
    43 1. Memory Overhead
    44 2. Speed
    45 
    46         /subsection Memory Overhead
    47         Memory overhead is the extra memory that a memory allocator takes from OS which is not requested by the application. Ideally, an allocator should get just enough memory from OS that can fulfill application's request and should return this memory to OS as soon as applications frees it. But, allocators retain more memory compared to what application has asked for which causes memory overhead. Memory overhead can happen for various reasons.
    48 
    49                 /subsubsection Fragmentation
    50                 Fragmentation is one of the major reasons behind memory overhead. Fragmentation happens because of situations that are either necassary for proper functioning of the allocator such as internal memory management and book-keeping or are out of allocator's control such as application's usage pattern.
    51 
    52                         /subsubsubsection Internal Fragmentation
    53                         For internal book-keeping, allocators divide raw memory given by OS into chunks, blocks, or lists that can fulfill application's requested size. Allocators use memory given by OS for creating headers, footers etc. to store information about these chunks, blocks, or lists. This increases usage of memory in-addition to the memory requested by application as the allocators need to store their book-keeping information. This extra usage of memory for allocator's own book-keeping is called Internal Fragmentation. Although it cases memory overhead but this overhead is necassary for an allocator's proper funtioning.
    54 
     48\begin{enumerate}
     49\item
     50Memory Overhead
     51\item
     52Speed
     53\end{enumerate}
     54
     55\subsection{Memory Overhead}
     56Memory overhead is the extra memory that a memory allocator takes from OS which is not requested by the application. Ideally, an allocator should get just enough memory from OS that can fulfill application's request and should return this memory to OS as soon as applications frees it. But, allocators retain more memory compared to what application has asked for which causes memory overhead. Memory overhead can happen for various reasons.
     57
     58\subsubsection{Fragmentation}
     59Fragmentation is one of the major reasons behind memory overhead. Fragmentation happens because of situations that are either necassary for proper functioning of the allocator such as internal memory management and book-keeping or are out of allocator's control such as application's usage pattern.
     60
     61\paragraph{Internal Fragmentation}
     62For internal book-keeping, allocators divide raw memory given by OS into chunks, blocks, or lists that can fulfill application's requested size. Allocators use memory given by OS for creating headers, footers etc. to store information about these chunks, blocks, or lists. This increases usage of memory in-addition to the memory requested by application as the allocators need to store their book-keeping information. This extra usage of memory for allocator's own book-keeping is called Internal Fragmentation. Although it cases memory overhead but this overhead is necassary for an allocator's proper funtioning.
    5563
    5664*** FIX ME: Insert a figure of internal fragmentation with explanation
    5765
    58                         /subsubsubsection External Fragmentation
    59                         External fragmentation is the free bits of memory between or around chunks of memory that are currently in-use of the application. Segmentation in memory due to application's usage pattern causes external fragmentation. The memory which is part of external fragmentation is completely free as it is neither used by allocator's internal book-keeping nor by the application. Ideally, an allocator should return a segment of memory back to the OS as soon as application frees it. But, this is not always the case. Allocators get memory from OS in one of the two ways.
    60 
    61                         \begin{itemize}
    62                         \item
    63                         MMap: an allocator can ask OS for whole pages in mmap area. Then, the allocator segments the page internally and fulfills application's request.
    64                         \item
    65                         Heap: an allocator can ask OS for memory in heap area using system calls such as sbrk. Heap are grows downwards and shrinks upwards.
    66                         \begin{itemize}
    67 
    68                         If an allocator uses mmap area, it can only return extra memory back to OS if the whole page is free i.e. no chunk on the page is in-use of the application. Even if one chunk on the whole page is currently in-use of the application, the allocator has to retain the whole page.
    69 
    70                         If an allocator uses the heap area, it can only return the continous free memory at the end of the heap area that is currently in allocator's possession as heap area shrinks upwards. If there are free bits of memory in-between chunks of memory that are currently in-use of the application, the allocator can not return these free bits.
    71 
    72 *** FIX ME: Insert a figure of above scenrio with explanation
    73 
    74                         Even if the entire heap area is free except one small chunk at the end of heap area that is being used by the application, the allocator cannot return the free heap area back to the OS as it is not a continous region at the end of heap area.
    75 
    76 *** FIX ME: Insert a figure of above scenrio with explanation
    77 
    78                         Such scenerios cause external fragmentation but it is out of the allocator's control and depend on application's usage pattern.
    79 
    80                 /subsubsection Internal Memory Management
    81                 Allocators such as je-malloc (FIX ME: insert reference) pro-actively get some memory from the OS and divide it into chunks of certain sizes that can be used in-future to fulfill application's request. This causes memory overhead as these chunks are made before application's request. There is also the possibility that an application may not even request memory of these sizes during their whole life-time.
    82 
    83 *** FIX ME: Insert a figure of above scenrio with explanation
    84 
    85                 Allocators such as rp-malloc (FIX ME: insert reference) maintain lists or blocks of sized memory segments that is freed by the application for future use. These lists are maintained without any guarantee that application will even request these sizes again.
    86 
    87                 Such tactics are usually used to gain speed as allocator will not have to get raw memory from OS and manage it at the time of application's request but they do cause memory overhead.
    88 
    89         Fragmentation and managed sized chunks of free memory can lead to Heap Blowup as the allocator may not be able to use the fragments or sized free chunks of free memory to fulfill application's requests of other sizes.
    90 
    91         /subsection Speed
    92         When it comes to performance evaluation of any piece of software, its runtime is usually the first thing that is evaluated. The same is true for memory allocators but, in case of memory allocators, speed does not only mean the runtime of memory allocator's routines but there are other factors too.
    93 
    94                 /subsubsection Runtime Speed
    95                 Low runtime is the main goal of a memory allocator when it comes it proving its speed. Runtime is the time that it takes for a routine of memory allocator to complete its execution. As mentioned in (FIX ME: refernce to routines' list), there four basic routines that are used in memory allocation. Ideally, each routine of a memory allocator should be fast. Some memory allocator designs use pro-active measures (FIX ME: local refernce) to gain speed when allocating some memory to the application. Some memory allocators do memory allocation faster than memory freeing (FIX ME: graph refernce) while others show similar speed whether memory is allocated or freed.
    96 
    97                 /subsubsection Memory Access Speed
    98                 Runtime speed is not the only speed matrix in memory allocators. The memory that a memory allocator has allocated to the application also needs to be accessible as quick as possible. The application should be able to read/write allocated memory quickly. The allocation method of a memory allocator may introduce some delays when it comes to memory access speed, which is specially important in concurrent applications. Ideally, a memory allocator should allocate all memory on a cache-line to only one thread and no cache-line should be shared among multiple threads. If a memory allocator allocates memory to multple threads on a same cache line, then cache may get invalidated more frequesntly when two different threads running on two different processes will try to read/write the same memory region. On the other hand, if one cache-line is used by only one thread then the cache may get invalidated less frequently. This sharing of one cache-line among multiple threads is called false sharing (FIX ME: cite wasik).
    99 
    100                         /subsubsubsection Active False Sharing
    101                         Active false sharing is the sharing of one cache-line among multiple threads that is caused by memory allocator. It happens when two threads request memory from memory allocator and the allocator allocates memory to both of them on the same cache-line. After that, if the threads are running on different processes who have their own caches and both threads start reading/writing the allocated memory simultanously, their caches will start getting invalidated every time the other thread writes something to the memory. This will cause the application to slow down as the process has to load cache much more frequently.
    102 
    103 *** FIX ME: Insert a figure of above scenrio with explanation
    104 
    105                         /subsubsubsection Passive False Sharing
    106                         Passive false sharing is the kind of false sharing which is caused by the application and not the memory allocator. The memory allocator may preservce passive false sharing in future instead of eradicating it. But, passive false sharing is initiated by the application.
    107 
    108                                 /subsubsubsubsection Program Induced Passive False Sharing
    109                                 Program induced false sharing is completely out of memory allocator's control and is purely caused by the application. When a thread in the application creates multiple objects in the dynamic area and allocator allocates memory for these objects on the same cache-line as the objects are created by the same thread. Passive false sharing will occur if this thread passes one of these objects to another thread but it retains the rest of these objects or it passes some/all of the remaining objects to some third thread(s). Now, one cache-line is shared among multiple threads but it is caused by the application and not the allocator. It is out of allocator's control and has the similar performance impact as Active False Sharing (FIX ME: cite local) if these threads, who are sharing the same cache-line, start reading/writing the given objects simultanously.
     66\paragraph{External Fragmentation}
     67External fragmentation is the free bits of memory between or around chunks of memory that are currently in-use of the application. Segmentation in memory due to application's usage pattern causes external fragmentation. The memory which is part of external fragmentation is completely free as it is neither used by allocator's internal book-keeping nor by the application. Ideally, an allocator should return a segment of memory back to the OS as soon as application frees it. But, this is not always the case. Allocators get memory from OS in one of the two ways.
     68
     69\begin{itemize}
     70\item
     71MMap: an allocator can ask OS for whole pages in mmap area. Then, the allocator segments the page internally and fulfills application's request.
     72\item
     73Heap: an allocator can ask OS for memory in heap area using system calls such as sbrk. Heap are grows downwards and shrinks upwards.
     74\begin{itemize}
     75\item
     76If an allocator uses mmap area, it can only return extra memory back to OS if the whole page is free i.e. no chunk on the page is in-use of the application. Even if one chunk on the whole page is currently in-use of the application, the allocator has to retain the whole page.
     77\item
     78If an allocator uses the heap area, it can only return the continous free memory at the end of the heap area that is currently in allocator's possession as heap area shrinks upwards. If there are free bits of memory in-between chunks of memory that are currently in-use of the application, the allocator can not return these free bits.
     79
     80*** FIX ME: Insert a figure of above scenrio with explanation
     81\item
     82Even if the entire heap area is free except one small chunk at the end of heap area that is being used by the application, the allocator cannot return the free heap area back to the OS as it is not a continous region at the end of heap area.
     83
     84*** FIX ME: Insert a figure of above scenrio with explanation
     85
     86\item
     87Such scenerios cause external fragmentation but it is out of the allocator's control and depend on application's usage pattern.
     88\end{itemize}
     89\end{itemize}
     90
     91\subsubsection{Internal Memory Management}
     92Allocators such as je-malloc (FIX ME: insert reference) pro-actively get some memory from the OS and divide it into chunks of certain sizes that can be used in-future to fulfill application's request. This causes memory overhead as these chunks are made before application's request. There is also the possibility that an application may not even request memory of these sizes during their whole life-time.
     93
     94*** FIX ME: Insert a figure of above scenrio with explanation
     95
     96Allocators such as rp-malloc (FIX ME: insert reference) maintain lists or blocks of sized memory segments that is freed by the application for future use. These lists are maintained without any guarantee that application will even request these sizes again.
     97
     98Such tactics are usually used to gain speed as allocator will not have to get raw memory from OS and manage it at the time of application's request but they do cause memory overhead.
     99
     100Fragmentation and managed sized chunks of free memory can lead to Heap Blowup as the allocator may not be able to use the fragments or sized free chunks of free memory to fulfill application's requests of other sizes.
     101
     102\subsection{Speed}
     103When it comes to performance evaluation of any piece of software, its runtime is usually the first thing that is evaluated. The same is true for memory allocators but, in case of memory allocators, speed does not only mean the runtime of memory allocator's routines but there are other factors too.
     104
     105\subsubsection{Runtime Speed}
     106Low runtime is the main goal of a memory allocator when it comes it proving its speed. Runtime is the time that it takes for a routine of memory allocator to complete its execution. As mentioned in (FIX ME: refernce to routines' list), there four basic routines that are used in memory allocation. Ideally, each routine of a memory allocator should be fast. Some memory allocator designs use pro-active measures (FIX ME: local refernce) to gain speed when allocating some memory to the application. Some memory allocators do memory allocation faster than memory freeing (FIX ME: graph refernce) while others show similar speed whether memory is allocated or freed.
     107
     108\subsubsection{Memory Access Speed}
     109Runtime speed is not the only speed matrix in memory allocators. The memory that a memory allocator has allocated to the application also needs to be accessible as quick as possible. The application should be able to read/write allocated memory quickly. The allocation method of a memory allocator may introduce some delays when it comes to memory access speed, which is specially important in concurrent applications. Ideally, a memory allocator should allocate all memory on a cache-line to only one thread and no cache-line should be shared among multiple threads. If a memory allocator allocates memory to multple threads on a same cache line, then cache may get invalidated more frequesntly when two different threads running on two different processes will try to read/write the same memory region. On the other hand, if one cache-line is used by only one thread then the cache may get invalidated less frequently. This sharing of one cache-line among multiple threads is called false sharing (FIX ME: cite wasik).
     110
     111\paragraph{Active False Sharing}
     112Active false sharing is the sharing of one cache-line among multiple threads that is caused by memory allocator. It happens when two threads request memory from memory allocator and the allocator allocates memory to both of them on the same cache-line. After that, if the threads are running on different processes who have their own caches and both threads start reading/writing the allocated memory simultanously, their caches will start getting invalidated every time the other thread writes something to the memory. This will cause the application to slow down as the process has to load cache much more frequently.
     113
     114*** FIX ME: Insert a figure of above scenrio with explanation
     115
     116\paragraph{Passive False Sharing}
     117Passive false sharing is the kind of false sharing which is caused by the application and not the memory allocator. The memory allocator may preservce passive false sharing in future instead of eradicating it. But, passive false sharing is initiated by the application.
     118
     119\subparagraph{Program Induced Passive False Sharing}
     120Program induced false sharing is completely out of memory allocator's control and is purely caused by the application. When a thread in the application creates multiple objects in the dynamic area and allocator allocates memory for these objects on the same cache-line as the objects are created by the same thread. Passive false sharing will occur if this thread passes one of these objects to another thread but it retains the rest of these objects or it passes some/all of the remaining objects to some third thread(s). Now, one cache-line is shared among multiple threads but it is caused by the application and not the allocator. It is out of allocator's control and has the similar performance impact as Active False Sharing (FIX ME: cite local) if these threads, who are sharing the same cache-line, start reading/writing the given objects simultanously.
    110121
    111122*** FIX ME: Insert a figure of above scenrio 1 with explanation
     
    113124*** FIX ME: Insert a figure of above scenrio 2 with explanation
    114125
    115                                 /subsubsubsubsection Program Induced Allocator Preserved Passive False Sharing
    116                                 Program induced allocator preserved passive false sharing is another interesting case of passive false sharing. Both the application and the allocator are partially responsible for it. It starts the same as Program Induced False Sharing (FIX ME: cite local). Once, an application thread has created multiple dynamic objects on the same cache-line and ditributed these objects among multiple threads causing sharing of one cache-line among multiple threads (Program Induced Passive False Sharing). This kind of false sharing occurs when one of these threads, which got the object on the shared cache-line, frees the passed object then re-allocates another object but the allocator returns the same object (on the shared cache-line) that this thread just freed. Although, the application caused the false sharing to happen in the frst place however, to prevent furthur false sharing, the allocator should have returned the new object on some other cache-line which is only shared by the allocating thread. When it comes to performnce impact, this passive false sharing will slow down the application just like any other kind of false sharing if the threads sharing the cache-line start reading/writing the objects simultanously.
    117 
    118 *** FIX ME: Insert a figure of above scenrio with explanation
     126\subparagraph{Program Induced Allocator Preserved Passive False Sharing}
     127Program induced allocator preserved passive false sharing is another interesting case of passive false sharing. Both the application and the allocator are partially responsible for it. It starts the same as Program Induced False Sharing (FIX ME: cite local). Once, an application thread has created multiple dynamic objects on the same cache-line and ditributed these objects among multiple threads causing sharing of one cache-line among multiple threads (Program Induced Passive False Sharing). This kind of false sharing occurs when one of these threads, which got the object on the shared cache-line, frees the passed object then re-allocates another object but the allocator returns the same object (on the shared cache-line) that this thread just freed. Although, the application caused the false sharing to happen in the frst place however, to prevent furthur false sharing, the allocator should have returned the new object on some other cache-line which is only shared by the allocating thread. When it comes to performnce impact, this passive false sharing will slow down the application just like any other kind of false sharing if the threads sharing the cache-line start reading/writing the objects simultanously.
     128
     129
     130*** FIX ME: Insert a figure of above scenrio with explanation
     131
     132%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     133%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     134%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Micro Benchmark Suite
     135%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     136%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     137
     138\section{Micro Benchmark Suite}
     139The aim of micro benchmark suite is to create a set of programs that can evaluate a memory allocator based on the performance matrices described in (FIX ME: local cite). These programs can be taken as a standard to benchmark an allocator's basic goals. These programs give details of an allocator's memory overhead and speed under a certain allocation pattern. The speed of the allocator is benchmarked in different ways. Similarly, false sharing happening in an allocator is also measured in multiple ways. These benchmarks evalute the allocator under a certain allocation pattern which is configurable and can be changed using a few knobs to benchmark observe an allocator's performance under a desired allocation pattern.
     140
     141Micro Benchmark Suite benchmarks an allocator's performance by allocating dynamic objects and, then, measuring specifc matrices. The benchmark suite evaluates an allocator with a certain allocation pattern. Bnechmarks have different knobs that can be used to change allocation pattern and evaluate an allocator under desired conditions. These can be set by giving commandline arguments to the benchmark on execution.
     142
     143Following is the list of avalable knobs.
     144
     145*** FIX ME: Add knobs items after finalize
     146
     147\subsection{Memory Benchmark}
     148Memory benchmark measures memory overhead of an allocator. It allocates a number of dynamic objects. Then, by reading /self/proc/maps, gets the total memory that the allocator has reuested from the OS. Finally, it calculates the memory head by taking the difference between the memory the allocator has requested from the OS and the memory that program has allocated.
     149*** FIX ME: Insert a figure of above benchmark with description
     150
     151\subsubsection{Relevant Knobs}
     152*** FIX ME: Insert Relevant Knobs
     153
     154\subsection{Speed Benchmark}
     155Speed benchmark calculates the runtime speed of an allocator's functions (FIX ME: cite allocator routines). It does by measuring the runtime of allocator routines in two different ways.
     156
     157\subsubsection{Speed Time}
     158The time method does a certain amount of work by calling each routine of the allocator (FIX ME: cite allocator routines) a specific time. It calculates the total time it took to perform this workload. Then, it divides the time it took by the workload and calculates the average time taken by the allocator's routine.
     159*** FIX ME: Insert a figure of above benchmark with description
     160
     161\paragraph{Relevant Knobs}
     162*** FIX ME: Insert Relevant Knobs
     163
     164\subsubsection{Speed Workload}
     165The worload method uses the opposite approach. It calls the allocator's routines for a specific amount of time and measures how much work was done during that time. Then, similar to the time method, it divides the time by the workload done during that time and calculates the average time taken by the allocator's routine.
     166*** FIX ME: Insert a figure of above benchmark with description
     167
     168\paragraph{Relevant Knobs}
     169*** FIX ME: Insert Relevant Knobs
     170
     171\subsection{Cache Scratch}
     172Cache Scratch benchmark measures program induced allocator preserved passive false sharing (FIX ME CITE) in an allocator. It does so in two ways.
     173
     174\subsubsection{Cache Scratch Time}
     175Cache Scratch Time allocates dynamic objects. Then, it benchmarks program induced allocator preserved passive false sharing (FIX ME CITE) in an allocator by measuring the time it takes to read/write these objects.
     176*** FIX ME: Insert a figure of above benchmark with description
     177
     178\paragraph{Relevant Knobs}
     179*** FIX ME: Insert Relevant Knobs
     180
     181\subsubsection{Cache Scratch Layout}
     182Cache Scratch Layout also allocates dynamic objects. Then, it benchmarks program induced allocator preserved passive false sharing (FIX ME CITE) by using heap addresses returned by the allocator. It calculates how many objects were allocated to different threads on the same cache line.
     183*** FIX ME: Insert a figure of above benchmark with description
     184
     185\paragraph{Relevant Knobs}
     186*** FIX ME: Insert Relevant Knobs
     187
     188\subsection{Cache Thrash}
     189Cache Thrash benchmark measures allocator induced passive false sharing (FIX ME CITE) in an allocator. It also does so in two ways.
     190
     191\subsubsection{Cache Thrash Time}
     192Cache Thrash Time allocates dynamic objects. Then, it benchmarks allocator induced false sharing (FIX ME CITE) in an allocator by measuring the time it takes to read/write these objects.
     193*** FIX ME: Insert a figure of above benchmark with description
     194
     195\paragraph{Relevant Knobs}
     196*** FIX ME: Insert Relevant Knobs
     197
     198\subsubsection{Cache Thrash Layout}
     199Cache Thrash Layout also allocates dynamic objects. Then, it benchmarks allocator induced false sharing (FIX ME CITE) by using heap addresses returned by the allocator. It calculates how many objects were allocated to different threads on the same cache line.
     200*** FIX ME: Insert a figure of above benchmark with description
     201
     202\paragraph{Relevant Knobs}
     203*** FIX ME: Insert Relevant Knobs
     204
     205\section{Results}
     206*** FIX ME: add configuration details of memory allocators
     207
     208\subsection{Memory Benchmark}
     209
     210\subsubsection{Relevant Knobs}
     211
     212\subsection{Speed Benchmark}
     213
     214\subsubsection{Speed Time}
     215
     216\paragraph{Relevant Knobs}
     217
     218\subsubsection{Speed Workload}
     219
     220\paragraph{Relevant Knobs}
     221
     222\subsection{Cache Scratch}
     223
     224\subsubsection{Cache Scratch Time}
     225
     226\paragraph{Relevant Knobs}
     227
     228\subsubsection{Cache Scratch Layout}
     229
     230\paragraph{Relevant Knobs}
     231
     232\subsection{Cache Thrash}
     233
     234\subsubsection{Cache Thrash Time}
     235
     236\paragraph{Relevant Knobs}
     237
     238\subsubsection{Cache Thrash Layout}
     239
     240\paragraph{Relevant Knobs}
  • doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex

    r33e1c91 r929d925  
    165165% cfa macros used in the document
    166166\input{common}
     167%\usepackageinput{common}
    167168\CFAStyle                                               % CFA code-style for all languages
    168 \lstset{language=CFA,basicstyle=\linespread{0.9}\tt}    % CFA default language
     169\lstset{basicstyle=\linespread{0.9}\tt}                 % CFA typewriter font
    169170\newcommand{\PAB}[1]{{\color{red}PAB: #1}}
    170171
  • libcfa/prelude/defines.hfa.in

    r33e1c91 r929d925  
    171171#undef CFA_HAVE_LINUX_IO_URING_H
    172172
     173/* Defined if librseq support is present when compiling libcfathread. */
     174#undef CFA_HAVE_LINUX_LIBRSEQ
     175
     176/* Defined if rseq support is present when compiling libcfathread. */
     177#undef CFA_HAVE_LINUX_RSEQ_H
     178
    173179/* Defined if openat2 support is present when compiling libcfathread. */
    174180#undef CFA_HAVE_OPENAT2
     
    205211#undef HAVE_LINUX_IO_URING_H
    206212
     213/* Define to 1 if you have the <linux/rseq.h> header file. */
     214#undef HAVE_LINUX_RSEQ_H
     215
    207216/* Define to 1 if you have the <memory.h> header file. */
    208217#undef HAVE_MEMORY_H
  • libcfa/src/bits/signal.hfa

    r33e1c91 r929d925  
    2020
    2121#include <errno.h>
    22 #define __USE_GNU
    2322#include <signal.h>
    24 #undef __USE_GNU
    2523#include <stdlib.h>
    2624#include <string.h>
  • libcfa/src/concurrency/coroutine.cfa

    r33e1c91 r929d925  
    1515
    1616#define __cforall_thread__
     17#define _GNU_SOURCE
    1718
    1819#include "coroutine.hfa"
  • libcfa/src/concurrency/io.cfa

    r33e1c91 r929d925  
    1515
    1616#define __cforall_thread__
     17#define _GNU_SOURCE
    1718
    1819#if defined(__CFA_DEBUG__)
     
    2324
    2425#if defined(CFA_HAVE_LINUX_IO_URING_H)
    25         #define _GNU_SOURCE         /* See feature_test_macros(7) */
    2626        #include <errno.h>
    2727        #include <signal.h>
  • libcfa/src/concurrency/io/setup.cfa

    r33e1c91 r929d925  
    1515
    1616#define __cforall_thread__
    17 #define _GNU_SOURCE         /* See feature_test_macros(7) */
     17#define _GNU_SOURCE
    1818
    1919#if defined(__CFA_DEBUG__)
  • libcfa/src/concurrency/kernel.cfa

    r33e1c91 r929d925  
    1515
    1616#define __cforall_thread__
     17#define _GNU_SOURCE
     18
    1719// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
    1820
     
    278280
    279281                                // Spin a little on I/O, just in case
    280                                         for(5) {
     282                                for(5) {
    281283                                        __maybe_io_drain( this );
    282284                                        readyThread = pop_fast( this->cltr );
     
    285287
    286288                                // no luck, try stealing a few times
    287                                         for(5) {
     289                                for(5) {
    288290                                        if( __maybe_io_drain( this ) ) {
    289291                                                readyThread = pop_fast( this->cltr );
  • libcfa/src/concurrency/kernel.hfa

    r33e1c91 r929d925  
    6666                unsigned id;
    6767                unsigned target;
     68                unsigned last;
    6869                unsigned long long int cutoff;
    6970        } rdq;
  • libcfa/src/concurrency/kernel/startup.cfa

    r33e1c91 r929d925  
    1515
    1616#define __cforall_thread__
     17#define _GNU_SOURCE
    1718
    1819// C Includes
    1920#include <errno.h>              // errno
     21#include <signal.h>
    2022#include <string.h>             // strerror
    2123#include <unistd.h>             // sysconf
     24
    2225extern "C" {
    2326      #include <limits.h>       // PTHREAD_STACK_MIN
     27        #include <unistd.h>       // syscall
    2428        #include <sys/eventfd.h>  // eventfd
    2529      #include <sys/mman.h>     // mprotect
     
    136140};
    137141
     142#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
     143        // No data needed
     144#elif defined(CFA_HAVE_LINUX_RSEQ_H)
     145        extern "Cforall" {
     146                __attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq @= {
     147                        .cpu_id : RSEQ_CPU_ID_UNINITIALIZED,
     148                };
     149        }
     150#else
     151        // No data needed
     152#endif
     153
    138154//-----------------------------------------------------------------------------
    139155// Struct to steal stack
     
    468484        self_mon_p = &self_mon;
    469485        link.next = 0p;
    470         link.ts   = 0;
     486        link.ts   = -1llu;
    471487        preferred = -1u;
    472488        last_proc = 0p;
     
    497513        this.rdq.id  = -1u;
    498514        this.rdq.target = -1u;
     515        this.rdq.last = -1u;
    499516        this.rdq.cutoff = 0ull;
    500517        do_terminate = false;
  • libcfa/src/concurrency/kernel_private.hfa

    r33e1c91 r929d925  
    1616#pragma once
    1717
     18#if !defined(__cforall_thread__)
     19        #error kernel_private.hfa should only be included in libcfathread source
     20#endif
     21
    1822#include "kernel.hfa"
    1923#include "thread.hfa"
     
    2226#include "stats.hfa"
    2327
     28extern "C" {
     29#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
     30        #include <rseq/rseq.h>
     31#elif defined(CFA_HAVE_LINUX_RSEQ_H)
     32        #include <linux/rseq.h>
     33#else
     34        #ifndef _GNU_SOURCE
     35        #error kernel_private requires gnu_source
     36        #endif
     37        #include <sched.h>
     38#endif
     39}
     40
    2441//-----------------------------------------------------------------------------
    2542// Scheduler
    26 
    27 
    2843extern "C" {
    2944        void disable_interrupts() OPTIONAL_THREAD;
     
    3954
    4055//-----------------------------------------------------------------------------
     56// Hardware
     57
     58#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
     59        // No data needed
     60#elif defined(CFA_HAVE_LINUX_RSEQ_H)
     61        extern "Cforall" {
     62                extern __attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq;
     63        }
     64#else
     65        // No data needed
     66#endif
     67
     68static inline int __kernel_getcpu() {
     69        /* paranoid */ verify( ! __preemption_enabled() );
     70#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
     71        return rseq_current_cpu();
     72#elif defined(CFA_HAVE_LINUX_RSEQ_H)
     73        int r = __cfaabi_rseq.cpu_id;
     74        /* paranoid */ verify( r >= 0 );
     75        return r;
     76#else
     77        return sched_getcpu();
     78#endif
     79}
     80
     81//-----------------------------------------------------------------------------
    4182// Processor
    4283void main(processorCtx_t *);
     
    4485void * __create_pthread( pthread_t *, void * (*)(void *), void * );
    4586void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
    46 
    47 
    4887
    4988extern cluster * mainCluster;
  • libcfa/src/concurrency/locks.cfa

    r33e1c91 r929d925  
    1616
    1717#define __cforall_thread__
     18#define _GNU_SOURCE
    1819
    1920#include "locks.hfa"
  • libcfa/src/concurrency/monitor.cfa

    r33e1c91 r929d925  
    1515
    1616#define __cforall_thread__
     17#define _GNU_SOURCE
    1718
    1819#include "monitor.hfa"
  • libcfa/src/concurrency/mutex.cfa

    r33e1c91 r929d925  
    1717
    1818#define __cforall_thread__
     19#define _GNU_SOURCE
    1920
    2021#include "mutex.hfa"
  • libcfa/src/concurrency/preemption.cfa

    r33e1c91 r929d925  
    1515
    1616#define __cforall_thread__
     17#define _GNU_SOURCE
     18
    1719// #define __CFA_DEBUG_PRINT_PREEMPTION__
    1820
  • libcfa/src/concurrency/ready_queue.cfa

    r33e1c91 r929d925  
    1515
    1616#define __cforall_thread__
     17#define _GNU_SOURCE
     18
    1719// #define __CFA_DEBUG_PRINT_READY_QUEUE__
    1820
    1921
    20 #define USE_RELAXED_FIFO
     22// #define USE_RELAXED_FIFO
    2123// #define USE_WORK_STEALING
     24#define USE_CPU_WORK_STEALING
    2225
    2326#include "bits/defs.hfa"
     27#include "device/cpu.hfa"
    2428#include "kernel_private.hfa"
    2529
    26 #define _GNU_SOURCE
    2730#include "stdlib.hfa"
    2831#include "math.hfa"
    2932
     33#include <errno.h>
    3034#include <unistd.h>
     35
     36extern "C" {
     37        #include <sys/syscall.h>  // __NR_xxx
     38}
    3139
    3240#include "ready_subqueue.hfa"
     
    4654#endif
    4755
    48 #if   defined(USE_RELAXED_FIFO)
     56#if   defined(USE_CPU_WORK_STEALING)
     57        #define READYQ_SHARD_FACTOR 2
     58#elif defined(USE_RELAXED_FIFO)
    4959        #define BIAS 4
    5060        #define READYQ_SHARD_FACTOR 4
     
    8595}
    8696
     97#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
     98        // No forward declaration needed
     99        #define __kernel_rseq_register rseq_register_current_thread
     100        #define __kernel_rseq_unregister rseq_unregister_current_thread
     101#elif defined(CFA_HAVE_LINUX_RSEQ_H)
     102        void __kernel_raw_rseq_register  (void);
     103        void __kernel_raw_rseq_unregister(void);
     104
     105        #define __kernel_rseq_register __kernel_raw_rseq_register
     106        #define __kernel_rseq_unregister __kernel_raw_rseq_unregister
     107#else
     108        // No forward declaration needed
     109        // No initialization needed
     110        static inline void noop(void) {}
     111
     112        #define __kernel_rseq_register noop
     113        #define __kernel_rseq_unregister noop
     114#endif
     115
    87116//=======================================================================
    88117// Cluster wide reader-writer lock
     
    107136// Lock-Free registering/unregistering of threads
    108137unsigned register_proc_id( void ) with(*__scheduler_lock) {
     138        __kernel_rseq_register();
     139
    109140        __cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
    110141        bool * handle = (bool *)&kernelTLS().sched_lock;
     
    161192
    162193        __cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc);
     194
     195        __kernel_rseq_unregister();
    163196}
    164197
     
    214247//=======================================================================
    215248void ?{}(__ready_queue_t & this) with (this) {
    216         lanes.data  = 0p;
    217         lanes.tscs  = 0p;
    218         lanes.count = 0;
     249        #if defined(USE_CPU_WORK_STEALING)
     250                lanes.count = cpu_info.hthrd_count * READYQ_SHARD_FACTOR;
     251                lanes.data = alloc( lanes.count );
     252                lanes.tscs = alloc( lanes.count );
     253
     254                for( idx; (size_t)lanes.count ) {
     255                        (lanes.data[idx]){};
     256                        lanes.tscs[idx].tv = rdtscl();
     257                }
     258        #else
     259                lanes.data  = 0p;
     260                lanes.tscs  = 0p;
     261                lanes.count = 0;
     262        #endif
    219263}
    220264
    221265void ^?{}(__ready_queue_t & this) with (this) {
    222         verify( SEQUENTIAL_SHARD == lanes.count );
     266        #if !defined(USE_CPU_WORK_STEALING)
     267                verify( SEQUENTIAL_SHARD == lanes.count );
     268        #endif
     269
    223270        free(lanes.data);
    224271        free(lanes.tscs);
     
    226273
    227274//-----------------------------------------------------------------------
     275#if defined(USE_CPU_WORK_STEALING)
     276        __attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) {
     277                __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
     278
     279                processor * const proc = kernelTLS().this_processor;
     280                const bool external = !push_local || (!proc) || (cltr != proc->cltr);
     281
     282                const int cpu = __kernel_getcpu();
     283                /* paranoid */ verify(cpu >= 0);
     284                /* paranoid */ verify(cpu < cpu_info.hthrd_count);
     285                /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count);
     286
     287                const cpu_map_entry_t & map = cpu_info.llc_map[cpu];
     288                /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count);
     289                /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count);
     290                /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR);
     291
     292                const int start = map.self * READYQ_SHARD_FACTOR;
     293                unsigned i;
     294                do {
     295                        unsigned r;
     296                        if(unlikely(external)) { r = __tls_rand(); }
     297                        else { r = proc->rdq.its++; }
     298                        i = start + (r % READYQ_SHARD_FACTOR);
     299                        // If we can't lock it retry
     300                } while( !__atomic_try_acquire( &lanes.data[i].lock ) );
     301
     302                // Actually push it
     303                push(lanes.data[i], thrd);
     304
     305                // Unlock and return
     306                __atomic_unlock( &lanes.data[i].lock );
     307
     308                #if !defined(__CFA_NO_STATISTICS__)
     309                        if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED);
     310                        else __tls_stats()->ready.push.local.success++;
     311                #endif
     312
     313                __cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first);
     314
     315        }
     316
     317        // Pop from the ready queue from a given cluster
     318        __attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
     319                /* paranoid */ verify( lanes.count > 0 );
     320                /* paranoid */ verify( kernelTLS().this_processor );
     321
     322                const int cpu = __kernel_getcpu();
     323                /* paranoid */ verify(cpu >= 0);
     324                /* paranoid */ verify(cpu < cpu_info.hthrd_count);
     325                /* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count);
     326
     327                const cpu_map_entry_t & map = cpu_info.llc_map[cpu];
     328                /* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count);
     329                /* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count);
     330                /* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR);
     331
     332                processor * const proc = kernelTLS().this_processor;
     333                const int start = map.self * READYQ_SHARD_FACTOR;
     334
     335                // Did we already have a help target
     336                if(proc->rdq.target == -1u) {
     337                        // if We don't have a
     338                        unsigned long long min = ts(lanes.data[start]);
     339                        for(i; READYQ_SHARD_FACTOR) {
     340                                unsigned long long tsc = ts(lanes.data[start + i]);
     341                                if(tsc < min) min = tsc;
     342                        }
     343                        proc->rdq.cutoff = min;
     344
     345                        /* paranoid */ verify(lanes.count < 65536); // The following code assumes max 65536 cores.
     346                        /* paranoid */ verify(map.count < 65536); // The following code assumes max 65536 cores.
     347                        uint64_t chaos = __tls_rand();
     348                        uint64_t high_chaos = (chaos >> 32);
     349                        uint64_t  mid_chaos = (chaos >> 16) & 0xffff;
     350                        uint64_t  low_chaos = chaos & 0xffff;
     351
     352                        unsigned me = map.self;
     353                        unsigned cpu_chaos = map.start + (mid_chaos % map.count);
     354                        bool global = cpu_chaos == me;
     355
     356                        if(global) {
     357                                proc->rdq.target = high_chaos % lanes.count;
     358                        } else {
     359                                proc->rdq.target = (cpu_chaos * READYQ_SHARD_FACTOR) + (low_chaos % READYQ_SHARD_FACTOR);
     360                                /* paranoid */ verify(proc->rdq.target >= (map.start * READYQ_SHARD_FACTOR));
     361                                /* paranoid */ verify(proc->rdq.target <  ((map.start + map.count) * READYQ_SHARD_FACTOR));
     362                        }
     363
     364                        /* paranoid */ verify(proc->rdq.target != -1u);
     365                }
     366                else {
     367                        const unsigned long long bias = 0; //2_500_000_000;
     368                        const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff;
     369                        {
     370                                unsigned target = proc->rdq.target;
     371                                proc->rdq.target = -1u;
     372                                if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) {
     373                                        $thread * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
     374                                        proc->rdq.last = target;
     375                                        if(t) return t;
     376                                }
     377                        }
     378
     379                        unsigned last = proc->rdq.last;
     380                        if(last != -1u && lanes.tscs[last].tv < cutoff && ts(lanes.data[last]) < cutoff) {
     381                                $thread * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
     382                                if(t) return t;
     383                        }
     384                        else {
     385                                proc->rdq.last = -1u;
     386                        }
     387                }
     388
     389                for(READYQ_SHARD_FACTOR) {
     390                        unsigned i = start + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);
     391                        if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
     392                }
     393
     394                // All lanes where empty return 0p
     395                return 0p;
     396        }
     397
     398        __attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
     399                processor * const proc = kernelTLS().this_processor;
     400                unsigned last = proc->rdq.last;
     401                if(last != -1u) {
     402                        struct $thread * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.steal));
     403                        if(t) return t;
     404                        proc->rdq.last = -1u;
     405                }
     406
     407                unsigned i = __tls_rand() % lanes.count;
     408                return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
     409        }
     410        __attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) {
     411                return search(cltr);
     412        }
     413#endif
    228414#if defined(USE_RELAXED_FIFO)
    229415        //-----------------------------------------------------------------------
     
    519705                                        if(is_empty(sl)) {
    520706                                                assert( sl.anchor.next == 0p );
    521                                                 assert( sl.anchor.ts   == 0 );
     707                                                assert( sl.anchor.ts   == -1llu );
    522708                                                assert( mock_head(sl)  == sl.prev );
    523709                                        } else {
    524710                                                assert( sl.anchor.next != 0p );
    525                                                 assert( sl.anchor.ts   != 0 );
     711                                                assert( sl.anchor.ts   != -1llu );
    526712                                                assert( mock_head(sl)  != sl.prev );
    527713                                        }
     
    573759                lanes.tscs = alloc(lanes.count, lanes.tscs`realloc);
    574760                for(i; lanes.count) {
    575                         unsigned long long tsc = ts(lanes.data[i]);
    576                         lanes.tscs[i].tv = tsc != 0 ? tsc : rdtscl();
     761                        unsigned long long tsc1 = ts(lanes.data[i]);
     762                        unsigned long long tsc2 = rdtscl()
     763                        lanes.tscs[i].tv = min(tsc1, tsc2);
    577764                }
    578765        #endif
    579766}
    580767
    581 // Grow the ready queue
    582 void ready_queue_grow(struct cluster * cltr) {
    583         size_t ncount;
    584         int target = cltr->procs.total;
    585 
    586         /* paranoid */ verify( ready_mutate_islocked() );
    587         __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
    588 
    589         // Make sure that everything is consistent
    590         /* paranoid */ check( cltr->ready_queue );
    591 
    592         // grow the ready queue
    593         with( cltr->ready_queue ) {
    594                 // Find new count
    595                 // Make sure we always have atleast 1 list
    596                 if(target >= 2) {
    597                         ncount = target * READYQ_SHARD_FACTOR;
    598                 } else {
    599                         ncount = SEQUENTIAL_SHARD;
    600                 }
    601 
    602                 // Allocate new array (uses realloc and memcpies the data)
    603                 lanes.data = alloc( ncount, lanes.data`realloc );
    604 
    605                 // Fix the moved data
    606                 for( idx; (size_t)lanes.count ) {
    607                         fix(lanes.data[idx]);
    608                 }
    609 
    610                 // Construct new data
    611                 for( idx; (size_t)lanes.count ~ ncount) {
    612                         (lanes.data[idx]){};
    613                 }
    614 
    615                 // Update original
    616                 lanes.count = ncount;
    617         }
    618 
    619         fix_times(cltr);
    620 
    621         reassign_cltr_id(cltr);
    622 
    623         // Make sure that everything is consistent
    624         /* paranoid */ check( cltr->ready_queue );
    625 
    626         __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
    627 
    628         /* paranoid */ verify( ready_mutate_islocked() );
    629 }
    630 
    631 // Shrink the ready queue
    632 void ready_queue_shrink(struct cluster * cltr) {
    633         /* paranoid */ verify( ready_mutate_islocked() );
    634         __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
    635 
    636         // Make sure that everything is consistent
    637         /* paranoid */ check( cltr->ready_queue );
    638 
    639         int target = cltr->procs.total;
    640 
    641         with( cltr->ready_queue ) {
    642                 // Remember old count
    643                 size_t ocount = lanes.count;
    644 
    645                 // Find new count
    646                 // Make sure we always have atleast 1 list
    647                 lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD;
    648                 /* paranoid */ verify( ocount >= lanes.count );
    649                 /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 );
    650 
    651                 // for printing count the number of displaced threads
    652                 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
    653                         __attribute__((unused)) size_t displaced = 0;
    654                 #endif
    655 
    656                 // redistribute old data
    657                 for( idx; (size_t)lanes.count ~ ocount) {
    658                         // Lock is not strictly needed but makes checking invariants much easier
    659                         __attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock);
    660                         verify(locked);
    661 
    662                         // As long as we can pop from this lane to push the threads somewhere else in the queue
    663                         while(!is_empty(lanes.data[idx])) {
    664                                 struct $thread * thrd;
    665                                 unsigned long long _;
    666                                 [thrd, _] = pop(lanes.data[idx]);
    667 
    668                                 push(cltr, thrd, true);
    669 
    670                                 // for printing count the number of displaced threads
    671                                 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
    672                                         displaced++;
    673                                 #endif
    674                         }
    675 
    676                         // Unlock the lane
    677                         __atomic_unlock(&lanes.data[idx].lock);
    678 
    679                         // TODO print the queue statistics here
    680 
    681                         ^(lanes.data[idx]){};
    682                 }
    683 
    684                 __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);
    685 
    686                 // Allocate new array (uses realloc and memcpies the data)
    687                 lanes.data = alloc( lanes.count, lanes.data`realloc );
    688 
    689                 // Fix the moved data
    690                 for( idx; (size_t)lanes.count ) {
    691                         fix(lanes.data[idx]);
    692                 }
    693         }
    694 
    695         fix_times(cltr);
    696 
    697         reassign_cltr_id(cltr);
    698 
    699         // Make sure that everything is consistent
    700         /* paranoid */ check( cltr->ready_queue );
    701 
    702         __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
    703         /* paranoid */ verify( ready_mutate_islocked() );
    704 }
     768#if defined(USE_CPU_WORK_STEALING)
     769        // ready_queue size is fixed in this case
     770        void ready_queue_grow(struct cluster * cltr) {}
     771        void ready_queue_shrink(struct cluster * cltr) {}
     772#else
     773        // Grow the ready queue
     774        void ready_queue_grow(struct cluster * cltr) {
     775                size_t ncount;
     776                int target = cltr->procs.total;
     777
     778                /* paranoid */ verify( ready_mutate_islocked() );
     779                __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
     780
     781                // Make sure that everything is consistent
     782                /* paranoid */ check( cltr->ready_queue );
     783
     784                // grow the ready queue
     785                with( cltr->ready_queue ) {
     786                        // Find new count
     787                        // Make sure we always have atleast 1 list
     788                        if(target >= 2) {
     789                                ncount = target * READYQ_SHARD_FACTOR;
     790                        } else {
     791                                ncount = SEQUENTIAL_SHARD;
     792                        }
     793
     794                        // Allocate new array (uses realloc and memcpies the data)
     795                        lanes.data = alloc( ncount, lanes.data`realloc );
     796
     797                        // Fix the moved data
     798                        for( idx; (size_t)lanes.count ) {
     799                                fix(lanes.data[idx]);
     800                        }
     801
     802                        // Construct new data
     803                        for( idx; (size_t)lanes.count ~ ncount) {
     804                                (lanes.data[idx]){};
     805                        }
     806
     807                        // Update original
     808                        lanes.count = ncount;
     809                }
     810
     811                fix_times(cltr);
     812
     813                reassign_cltr_id(cltr);
     814
     815                // Make sure that everything is consistent
     816                /* paranoid */ check( cltr->ready_queue );
     817
     818                __cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
     819
     820                /* paranoid */ verify( ready_mutate_islocked() );
     821        }
     822
     823        // Shrink the ready queue
     824        void ready_queue_shrink(struct cluster * cltr) {
     825                /* paranoid */ verify( ready_mutate_islocked() );
     826                __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
     827
     828                // Make sure that everything is consistent
     829                /* paranoid */ check( cltr->ready_queue );
     830
     831                int target = cltr->procs.total;
     832
     833                with( cltr->ready_queue ) {
     834                        // Remember old count
     835                        size_t ocount = lanes.count;
     836
     837                        // Find new count
     838                        // Make sure we always have atleast 1 list
     839                        lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD;
     840                        /* paranoid */ verify( ocount >= lanes.count );
     841                        /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 );
     842
     843                        // for printing count the number of displaced threads
     844                        #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
     845                                __attribute__((unused)) size_t displaced = 0;
     846                        #endif
     847
     848                        // redistribute old data
     849                        for( idx; (size_t)lanes.count ~ ocount) {
     850                                // Lock is not strictly needed but makes checking invariants much easier
     851                                __attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock);
     852                                verify(locked);
     853
     854                                // As long as we can pop from this lane to push the threads somewhere else in the queue
     855                                while(!is_empty(lanes.data[idx])) {
     856                                        struct $thread * thrd;
     857                                        unsigned long long _;
     858                                        [thrd, _] = pop(lanes.data[idx]);
     859
     860                                        push(cltr, thrd, true);
     861
     862                                        // for printing count the number of displaced threads
     863                                        #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
     864                                                displaced++;
     865                                        #endif
     866                                }
     867
     868                                // Unlock the lane
     869                                __atomic_unlock(&lanes.data[idx].lock);
     870
     871                                // TODO print the queue statistics here
     872
     873                                ^(lanes.data[idx]){};
     874                        }
     875
     876                        __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);
     877
     878                        // Allocate new array (uses realloc and memcpies the data)
     879                        lanes.data = alloc( lanes.count, lanes.data`realloc );
     880
     881                        // Fix the moved data
     882                        for( idx; (size_t)lanes.count ) {
     883                                fix(lanes.data[idx]);
     884                        }
     885                }
     886
     887                fix_times(cltr);
     888
     889                reassign_cltr_id(cltr);
     890
     891                // Make sure that everything is consistent
     892                /* paranoid */ check( cltr->ready_queue );
     893
     894                __cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
     895                /* paranoid */ verify( ready_mutate_islocked() );
     896        }
     897#endif
    705898
    706899#if !defined(__CFA_NO_STATISTICS__)
     
    710903        }
    711904#endif
     905
     906
     907#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
     908        // No definition needed
     909#elif defined(CFA_HAVE_LINUX_RSEQ_H)
     910
     911        #if defined( __x86_64 ) || defined( __i386 )
     912                #define RSEQ_SIG        0x53053053
     913        #elif defined( __ARM_ARCH )
     914                #ifdef __ARMEB__
     915                #define RSEQ_SIG    0xf3def5e7      /* udf    #24035    ; 0x5de3 (ARMv6+) */
     916                #else
     917                #define RSEQ_SIG    0xe7f5def3      /* udf    #24035    ; 0x5de3 */
     918                #endif
     919        #endif
     920
     921        extern void __disable_interrupts_hard();
     922        extern void __enable_interrupts_hard();
     923
     924        void __kernel_raw_rseq_register  (void) {
     925                /* paranoid */ verify( __cfaabi_rseq.cpu_id == RSEQ_CPU_ID_UNINITIALIZED );
     926
     927                // int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, (sigset_t *)0p, _NSIG / 8);
     928                int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, RSEQ_SIG);
     929                if(ret != 0) {
     930                        int e = errno;
     931                        switch(e) {
     932                        case EINVAL: abort("KERNEL ERROR: rseq register invalid argument");
     933                        case ENOSYS: abort("KERNEL ERROR: rseq register no supported");
     934                        case EFAULT: abort("KERNEL ERROR: rseq register with invalid argument");
     935                        case EBUSY : abort("KERNEL ERROR: rseq register already registered");
     936                        case EPERM : abort("KERNEL ERROR: rseq register sig  argument  on unregistration does not match the signature received on registration");
     937                        default: abort("KERNEL ERROR: rseq register unexpected return %d", e);
     938                        }
     939                }
     940        }
     941
     942        void __kernel_raw_rseq_unregister(void) {
     943                /* paranoid */ verify( __cfaabi_rseq.cpu_id >= 0 );
     944
     945                // int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, (sigset_t *)0p, _NSIG / 8);
     946                int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
     947                if(ret != 0) {
     948                        int e = errno;
     949                        switch(e) {
     950                        case EINVAL: abort("KERNEL ERROR: rseq unregister invalid argument");
     951                        case ENOSYS: abort("KERNEL ERROR: rseq unregister no supported");
     952                        case EFAULT: abort("KERNEL ERROR: rseq unregister with invalid argument");
     953                        case EBUSY : abort("KERNEL ERROR: rseq unregister already registered");
     954                        case EPERM : abort("KERNEL ERROR: rseq unregister sig  argument  on unregistration does not match the signature received on registration");
     955                        default: abort("KERNEL ERROR: rseq unregisteunexpected return %d", e);
     956                        }
     957                }
     958        }
     959#else
     960        // No definition needed
     961#endif
  • libcfa/src/concurrency/ready_subqueue.hfa

    r33e1c91 r929d925  
    3232        this.prev = mock_head(this);
    3333        this.anchor.next = 0p;
    34         this.anchor.ts   = 0;
     34        this.anchor.ts   = -1llu;
    3535        #if !defined(__CFA_NO_STATISTICS__)
    3636                this.cnt  = 0;
     
    4444        /* paranoid */ verify( &mock_head(this)->link.ts   == &this.anchor.ts   );
    4545        /* paranoid */ verify( mock_head(this)->link.next == 0p );
    46         /* paranoid */ verify( mock_head(this)->link.ts   == 0  );
     46        /* paranoid */ verify( mock_head(this)->link.ts   == -1llu  );
    4747        /* paranoid */ verify( mock_head(this) == this.prev );
    4848        /* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 );
     
    5555        // Make sure the list is empty
    5656        /* paranoid */ verify( this.anchor.next == 0p );
    57         /* paranoid */ verify( this.anchor.ts   == 0 );
     57        /* paranoid */ verify( this.anchor.ts   == -1llu );
    5858        /* paranoid */ verify( mock_head(this)  == this.prev );
    5959}
     
    6464        /* paranoid */ verify( this.lock );
    6565        /* paranoid */ verify( node->link.next == 0p );
    66         /* paranoid */ verify( node->link.ts   == 0  );
     66        /* paranoid */ verify( node->link.ts   == -1llu  );
    6767        /* paranoid */ verify( this.prev->link.next == 0p );
    68         /* paranoid */ verify( this.prev->link.ts   == 0  );
     68        /* paranoid */ verify( this.prev->link.ts   == -1llu  );
    6969        if( this.anchor.next == 0p ) {
    7070                /* paranoid */ verify( this.anchor.next == 0p );
    71                 /* paranoid */ verify( this.anchor.ts   == 0  );
     71                /* paranoid */ verify( this.anchor.ts   == -1llu );
     72                /* paranoid */ verify( this.anchor.ts   != 0  );
    7273                /* paranoid */ verify( this.prev == mock_head( this ) );
    7374        } else {
    7475                /* paranoid */ verify( this.anchor.next != 0p );
     76                /* paranoid */ verify( this.anchor.ts   != -1llu );
    7577                /* paranoid */ verify( this.anchor.ts   != 0  );
    7678                /* paranoid */ verify( this.prev != mock_head( this ) );
     
    9294        /* paranoid */ verify( this.lock );
    9395        /* paranoid */ verify( this.anchor.next != 0p );
     96        /* paranoid */ verify( this.anchor.ts   != -1llu );
    9497        /* paranoid */ verify( this.anchor.ts   != 0  );
    9598
     
    99102        this.anchor.next = node->link.next;
    100103        this.anchor.ts   = node->link.ts;
    101         bool is_empty = this.anchor.ts == 0;
     104        bool is_empty = this.anchor.next == 0p;
    102105        node->link.next = 0p;
    103         node->link.ts   = 0;
     106        node->link.ts   = -1llu;
    104107        #if !defined(__CFA_NO_STATISTICS__)
    105108                this.cnt--;
     
    110113
    111114        /* paranoid */ verify( node->link.next == 0p );
    112         /* paranoid */ verify( node->link.ts   == 0  );
     115        /* paranoid */ verify( node->link.ts   == -1llu  );
     116        /* paranoid */ verify( node->link.ts   != 0  );
     117        /* paranoid */ verify( this.anchor.ts  != 0  );
    113118        return [node, ts];
    114119}
     
    116121// Check whether or not list is empty
    117122static inline bool is_empty(__intrusive_lane_t & this) {
    118         return this.anchor.ts == 0;
     123        return this.anchor.next == 0p;
    119124}
    120125
     
    122127static inline unsigned long long ts(__intrusive_lane_t & this) {
    123128        // Cannot verify here since it may not be locked
     129        /* paranoid */ verify(this.anchor.ts != 0);
    124130        return this.anchor.ts;
    125131}
  • libcfa/src/concurrency/thread.cfa

    r33e1c91 r929d925  
    1515
    1616#define __cforall_thread__
     17#define _GNU_SOURCE
    1718
    1819#include "thread.hfa"
     
    3940        curr_cluster = &cl;
    4041        link.next = 0p;
    41         link.ts   = 0;
     42        link.ts   = -1llu;
    4243        preferred = -1u;
    4344        last_proc = 0p;
  • libcfa/src/containers/array.hfa

    r33e1c91 r929d925  
    11
    22
    3 // a type whose size is n
    4 #define Z(n) char[n]
    5 
    6 // the inverse of Z(-)
    7 #define z(N) sizeof(N)
    8 
    9 forall( T & ) struct tag {};
     3forall( __CFA_tysys_id_only_X & ) struct tag {};
    104#define ttag(T) ((tag(T)){})
    11 #define ztag(n) ttag(Z(n))
     5#define ztag(n) ttag(n)
    126
    137
     
    1812forall( [N], S & | sized(S), Timmed &, Tbase & ) {
    1913    struct arpk {
    20         S strides[z(N)];
     14        S strides[N];
    2115    };
    2216
     
    5650
    5751    static inline size_t ?`len( arpk(N, S, Timmed, Tbase) & a ) {
    58         return z(N);
     52        return N;
    5953    }
    6054
    6155    // workaround #226 (and array relevance thereof demonstrated in mike102/otype-slow-ndims.cfa)
    6256    static inline void ?{}( arpk(N, S, Timmed, Tbase) & this ) {
    63         void ?{}( S (&inner)[z(N)] ) {}
     57        void ?{}( S (&inner)[N] ) {}
    6458        ?{}(this.strides);
    6559    }
    6660    static inline void ^?{}( arpk(N, S, Timmed, Tbase) & this ) {
    67         void ^?{}( S (&inner)[z(N)] ) {}
     61        void ^?{}( S (&inner)[N] ) {}
    6862        ^?{}(this.strides);
    6963    }
  • libcfa/src/device/cpu.cfa

    r33e1c91 r929d925  
    253253        }
    254254
    255         #if defined(__CFA_WITH_VERIFY__)
    256                 // Check widths are consistent
    257                 for(i; 1~cpus) {
    258                         for(j; cache_levels) {
    259                                 verifyf(raw[0][j].width == raw[i][j].width, "Unexpected width %u for cpu %u, index %u. Expected %u.", raw[i][j].width, i, j, raw[0][j].width);
    260                         }
    261                 }
    262         #endif
    263 
    264255        return raw;
    265256}
    266257
     258struct llc_map_t {
     259        raw_cache_instance * raw;
     260        unsigned count;
     261        unsigned start;
     262};
     263
    267264// returns an allocate list of all the different distinct last level caches
    268 static [*idx_range_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {
     265static [*llc_map_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {
    269266        // Allocate at least one element
    270         idx_range_t * ranges = alloc();
     267        llc_map_t* ranges = alloc();
    271268        size_t range_cnt = 1;
    272269
    273270        // Initialize with element 0
    274         *ranges = raw[0][llc_idx].range;
     271        ranges->raw = &raw[0][llc_idx];
     272        ranges->count = 0;
     273        ranges->start = -1u;
    275274
    276275        // Go over all other cpus
    277276        CPU_LOOP: for(i; 1~cpus) {
    278277                // Check if the range is already there
    279                 idx_range_t candidate = raw[i][llc_idx].range;
     278                raw_cache_instance * candidate = &raw[i][llc_idx];
    280279                for(j; range_cnt) {
    281                         idx_range_t exist = ranges[j];
     280                        llc_map_t & exist = ranges[j];
    282281                        // If the range is already there just jump to the next cpu
    283                         if(0 == strcmp(candidate, exist)) continue CPU_LOOP;
     282                        if(0 == strcmp(candidate->range, exist.raw->range)) continue CPU_LOOP;
    284283                }
    285284
    286285                // The range wasn't there, added to the list
    287286                ranges = alloc(range_cnt + 1, ranges`realloc);
    288                 ranges[range_cnt] = candidate;
     287                ranges[range_cnt].raw = candidate;
     288                ranges[range_cnt].count = 0;
     289                ranges[range_cnt].start = -1u;
    289290                range_cnt++;
    290291        }
     
    296297struct cpu_pairing_t {
    297298        unsigned cpu;
    298         unsigned llc_id;
     299        unsigned id;
    299300};
    300301
    301302int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) {
    302         return lhs.llc_id < rhs.llc_id;
    303 }
    304 
    305 static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, idx_range_t * maps, size_t map_cnt) {
     303        return lhs.id < rhs.id;
     304}
     305
     306static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) {
    306307        cpu_pairing_t * pairings = alloc(cpus);
    307308
     
    310311                idx_range_t want = raw[i][0].range;
    311312                MAP_LOOP: for(j; map_cnt) {
    312                         if(0 != strcmp(want, maps[j])) continue MAP_LOOP;
    313 
    314                         pairings[i].llc_id = j;
     313                        if(0 != strcmp(want, maps[j].raw->range)) continue MAP_LOOP;
     314
     315                        pairings[i].id = j;
    315316                        continue CPU_LOOP;
    316317                }
     
    321322        return pairings;
    322323}
     324
     325#include <fstream.hfa>
    323326
    324327extern "C" {
     
    345348
    346349                // Find number of distinct cache instances
    347                 idx_range_t * maps;
     350                llc_map_t * maps;
    348351                size_t map_cnt;
    349352                [maps, map_cnt] =  distinct_llcs(cpus, cache_levels - llc, raw);
    350353
    351                 /* paranoid */ verify((map_cnt * raw[0][cache_levels - llc].width) == cpus);
     354                #if defined(__CFA_WITH_VERIFY__)
     355                // Verify that the caches cover the all the cpus
     356                {
     357                        unsigned width1 = 0;
     358                        unsigned width2 = 0;
     359                        for(i; map_cnt) {
     360                                const char * _;
     361                                width1 += read_width(maps[i].raw->range, strlen(maps[i].raw->range), &_);
     362                                width2 += maps[i].raw->width;
     363                        }
     364                        verify(width1 == cpus);
     365                        verify(width2 == cpus);
     366                }
     367                #endif
    352368
    353369                // Get mappings from cpu to cache instance
     
    357373                qsort(pairings, cpus);
    358374
    359                 unsigned llc_width = raw[0][cache_levels - llc].width;
    360 
    361                 // From the mappins build the actual cpu map we want
     375                {
     376                        unsigned it = 0;
     377                        for(i; cpus) {
     378                                unsigned llc_id = pairings[i].id;
     379                                if(maps[llc_id].start == -1u) {
     380                                        maps[llc_id].start = it;
     381                                        it += maps[llc_id].raw->width;
     382                                        /* paranoid */ verify(maps[llc_id].start < it);
     383                                        /* paranoid */ verify(it != -1u);
     384                                }
     385                        }
     386                        /* paranoid */ verify(it == cpus);
     387                }
     388
     389                // From the mappings build the actual cpu map we want
    362390                struct cpu_map_entry_t * entries = alloc(cpus);
    363391                for(i; cpus) { entries[i].count = 0; }
    364392                for(i; cpus) {
     393                        /* paranoid */ verify(pairings[i].id < map_cnt);
    365394                        unsigned c = pairings[i].cpu;
    366                         entries[c].start = pairings[i].llc_id * llc_width;
    367                         entries[c].count = llc_width;
     395                        unsigned llc_id = pairings[i].id;
     396                        unsigned width = maps[llc_id].raw->width;
     397                        unsigned start = maps[llc_id].start;
     398                        unsigned self  = start + (maps[llc_id].count++);
     399                        entries[c].count = width;
     400                        entries[c].start = start;
     401                        entries[c].self  = self;
    368402                }
    369403
  • libcfa/src/device/cpu.hfa

    r33e1c91 r929d925  
    1717
    1818struct cpu_map_entry_t {
     19        unsigned self;
    1920        unsigned start;
    2021        unsigned count;
     
    2223
    2324struct cpu_info_t {
     25         // array of size [hthrd_count]
    2426        const cpu_map_entry_t * llc_map;
     27
     28         // Number of _hardware_ threads present in the system
    2529        size_t hthrd_count;
    2630};
  • libcfa/src/exception.c

    r33e1c91 r929d925  
    256256        // the whole stack.
    257257
     258#if defined( __x86_64 ) || defined( __i386 )
    258259        // We did not simply reach the end of the stack without finding a handler. This is an error.
    259260        if ( ret != _URC_END_OF_STACK ) {
     261#else // defined( __ARM_ARCH )
     262        // The return code from _Unwind_RaiseException seems to be corrupt on ARM at end of stack.
     263        // This workaround tries to keep default exception handling working.
     264        if ( ret == _URC_FATAL_PHASE1_ERROR || ret == _URC_FATAL_PHASE2_ERROR ) {
     265#endif
    260266                printf("UNWIND ERROR %d after raise exception\n", ret);
    261267                abort();
  • src/AST/Convert.cpp

    r33e1c91 r929d925  
    24152415        }
    24162416
     2417        virtual void visit( const DimensionExpr * old ) override final {
     2418                // DimensionExpr gets desugared away in Validate.
     2419                // As long as new-AST passes don't use it, this cheap-cheerful error
     2420                // detection helps ensure that these occurrences have been compiled
     2421                // away, as expected.  To move the DimensionExpr boundary downstream
     2422                // or move the new-AST translation boundary upstream, implement
     2423                // DimensionExpr in the new AST and implement a conversion.
     2424                (void) old;
     2425                assert(false && "DimensionExpr should not be present at new-AST boundary");
     2426        }
     2427
    24172428        virtual void visit( const AsmExpr * old ) override final {
    24182429                this->node = visitBaseExpr( old,
  • src/AST/Decl.cpp

    r33e1c91 r929d925  
    7878
    7979const char * TypeDecl::typeString() const {
    80         static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array length type" };
     80        static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized length value" };
    8181        static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." );
    8282        assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." );
  • src/AST/Decl.hpp

    r33e1c91 r929d925  
    175175class TypeDecl final : public NamedTypeDecl {
    176176  public:
    177         enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS };
     177        enum Kind { Dtype, DStype, Otype, Ftype, Ttype, Dimension, NUMBER_OF_KINDS };
    178178
    179179        Kind kind;
  • src/AST/Pass.impl.hpp

    r33e1c91 r929d925  
    479479                        guard_symtab guard { *this };
    480480                        // implicit add __func__ identifier as specified in the C manual 6.4.2.2
    481                         static ast::ptr< ast::ObjectDecl > func{ new ast::ObjectDecl{ 
     481                        static ast::ptr< ast::ObjectDecl > func{ new ast::ObjectDecl{
    482482                                CodeLocation{}, "__func__",
    483483                                new ast::ArrayType{
     
    522522        VISIT({
    523523                guard_symtab guard { * this };
    524                 maybe_accept( node, &StructDecl::params  );
    525                 maybe_accept( node, &StructDecl::members );
     524                maybe_accept( node, &StructDecl::params     );
     525                maybe_accept( node, &StructDecl::members    );
     526                maybe_accept( node, &StructDecl::attributes );
    526527        })
    527528
     
    543544        VISIT({
    544545                guard_symtab guard { * this };
    545                 maybe_accept( node, &UnionDecl::params  );
    546                 maybe_accept( node, &UnionDecl::members );
     546                maybe_accept( node, &UnionDecl::params     );
     547                maybe_accept( node, &UnionDecl::members    );
     548                maybe_accept( node, &UnionDecl::attributes );
    547549        })
    548550
     
    562564        VISIT(
    563565                // unlike structs, traits, and unions, enums inject their members into the global scope
    564                 maybe_accept( node, &EnumDecl::params  );
    565                 maybe_accept( node, &EnumDecl::members );
     566                maybe_accept( node, &EnumDecl::params     );
     567                maybe_accept( node, &EnumDecl::members    );
     568                maybe_accept( node, &EnumDecl::attributes );
    566569        )
    567570
     
    577580        VISIT({
    578581                guard_symtab guard { *this };
    579                 maybe_accept( node, &TraitDecl::params  );
    580                 maybe_accept( node, &TraitDecl::members );
     582                maybe_accept( node, &TraitDecl::params     );
     583                maybe_accept( node, &TraitDecl::members    );
     584                maybe_accept( node, &TraitDecl::attributes );
    581585        })
    582586
  • src/CodeGen/CodeGenerator.cc

    r33e1c91 r929d925  
    589589                        output << nameExpr->get_name();
    590590                } // if
     591        }
     592
     593        void CodeGenerator::postvisit( DimensionExpr * dimensionExpr ) {
     594                extension( dimensionExpr );
     595                output << "/*non-type*/" << dimensionExpr->get_name();
    591596        }
    592597
  • src/CodeGen/CodeGenerator.h

    r33e1c91 r929d925  
    9292                void postvisit( TupleIndexExpr * tupleExpr );
    9393                void postvisit( TypeExpr *typeExpr );
     94                void postvisit( DimensionExpr *dimensionExpr );
    9495                void postvisit( AsmExpr * );
    9596                void postvisit( StmtExpr * );
  • src/Common/PassVisitor.h

    r33e1c91 r929d925  
    167167        virtual void visit( TypeExpr * typeExpr ) override final;
    168168        virtual void visit( const TypeExpr * typeExpr ) override final;
     169        virtual void visit( DimensionExpr * dimensionExpr ) override final;
     170        virtual void visit( const DimensionExpr * dimensionExpr ) override final;
    169171        virtual void visit( AsmExpr * asmExpr ) override final;
    170172        virtual void visit( const AsmExpr * asmExpr ) override final;
     
    309311        virtual Expression * mutate( CommaExpr * commaExpr ) override final;
    310312        virtual Expression * mutate( TypeExpr * typeExpr ) override final;
     313        virtual Expression * mutate( DimensionExpr * dimensionExpr ) override final;
    311314        virtual Expression * mutate( AsmExpr * asmExpr ) override final;
    312315        virtual Expression * mutate( ImplicitCopyCtorExpr * impCpCtorExpr ) override final;
     
    542545class WithIndexer {
    543546protected:
    544         WithIndexer() {}
     547        WithIndexer( bool trackIdentifiers = true ) : indexer(trackIdentifiers) {}
    545548        ~WithIndexer() {}
    546549
  • src/Common/PassVisitor.impl.h

    r33e1c91 r929d925  
    636636                maybeAccept_impl( node->parameters, *this );
    637637                maybeAccept_impl( node->members   , *this );
     638                maybeAccept_impl( node->attributes, *this );
    638639        }
    639640
     
    656657                maybeAccept_impl( node->parameters, *this );
    657658                maybeAccept_impl( node->members   , *this );
     659                maybeAccept_impl( node->attributes, *this );
    658660        }
    659661
     
    676678                maybeMutate_impl( node->parameters, *this );
    677679                maybeMutate_impl( node->members   , *this );
     680                maybeMutate_impl( node->attributes, *this );
    678681        }
    679682
     
    697700                maybeAccept_impl( node->parameters, *this );
    698701                maybeAccept_impl( node->members   , *this );
     702                maybeAccept_impl( node->attributes, *this );
    699703        }
    700704
     
    714718                maybeAccept_impl( node->parameters, *this );
    715719                maybeAccept_impl( node->members   , *this );
     720                maybeAccept_impl( node->attributes, *this );
    716721        }
    717722
     
    732737                maybeMutate_impl( node->parameters, *this );
    733738                maybeMutate_impl( node->members   , *this );
     739                maybeMutate_impl( node->attributes, *this );
    734740        }
    735741
     
    750756        maybeAccept_impl( node->parameters, *this );
    751757        maybeAccept_impl( node->members   , *this );
     758        maybeAccept_impl( node->attributes, *this );
    752759
    753760        VISIT_END( node );
     
    763770        maybeAccept_impl( node->parameters, *this );
    764771        maybeAccept_impl( node->members   , *this );
     772        maybeAccept_impl( node->attributes, *this );
    765773
    766774        VISIT_END( node );
     
    776784        maybeMutate_impl( node->parameters, *this );
    777785        maybeMutate_impl( node->members   , *this );
     786        maybeMutate_impl( node->attributes, *this );
    778787
    779788        MUTATE_END( Declaration, node );
     
    790799                maybeAccept_impl( node->parameters, *this );
    791800                maybeAccept_impl( node->members   , *this );
     801                maybeAccept_impl( node->attributes, *this );
    792802        }
    793803
     
    805815                maybeAccept_impl( node->parameters, *this );
    806816                maybeAccept_impl( node->members   , *this );
     817                maybeAccept_impl( node->attributes, *this );
    807818        }
    808819
     
    820831                maybeMutate_impl( node->parameters, *this );
    821832                maybeMutate_impl( node->members   , *this );
     833                maybeMutate_impl( node->attributes, *this );
    822834        }
    823835
     
    25072519
    25082520//--------------------------------------------------------------------------
     2521// DimensionExpr
     2522template< typename pass_type >
     2523void PassVisitor< pass_type >::visit( DimensionExpr * node ) {
     2524        VISIT_START( node );
     2525
     2526        indexerScopedAccept( node->result, *this );
     2527
     2528        VISIT_END( node );
     2529}
     2530
     2531template< typename pass_type >
     2532void PassVisitor< pass_type >::visit( const DimensionExpr * node ) {
     2533        VISIT_START( node );
     2534
     2535        indexerScopedAccept( node->result, *this );
     2536
     2537        VISIT_END( node );
     2538}
     2539
     2540template< typename pass_type >
     2541Expression * PassVisitor< pass_type >::mutate( DimensionExpr * node ) {
     2542        MUTATE_START( node );
     2543
     2544        indexerScopedMutate( node->env   , *this );
     2545        indexerScopedMutate( node->result, *this );
     2546
     2547        MUTATE_END( Expression, node );
     2548}
     2549
     2550//--------------------------------------------------------------------------
    25092551// AsmExpr
    25102552template< typename pass_type >
     
    31453187
    31463188        maybeAccept_impl( node->forall, *this );
    3147         // xxx - should PointerType visit/mutate dimension?
     3189        maybeAccept_impl( node->dimension, *this );
    31483190        maybeAccept_impl( node->base, *this );
    31493191
     
    31563198
    31573199        maybeAccept_impl( node->forall, *this );
    3158         // xxx - should PointerType visit/mutate dimension?
     3200        maybeAccept_impl( node->dimension, *this );
    31593201        maybeAccept_impl( node->base, *this );
    31603202
     
    31673209
    31683210        maybeMutate_impl( node->forall, *this );
    3169         // xxx - should PointerType visit/mutate dimension?
     3211        maybeMutate_impl( node->dimension, *this );
    31703212        maybeMutate_impl( node->base, *this );
    31713213
     
    38563898
    38573899//--------------------------------------------------------------------------
    3858 // Attribute
     3900// Constant
    38593901template< typename pass_type >
    38603902void PassVisitor< pass_type >::visit( Constant * node ) {
  • src/InitTweak/InitTweak.cc

    r33e1c91 r929d925  
    1010// Created On       : Fri May 13 11:26:36 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Dec 13 23:15:52 2019
    13 // Update Count     : 8
     12// Last Modified On : Wed Jun 16 20:57:22 2021
     13// Update Count     : 18
    1414//
    1515
     
    12171217        void addDataSectonAttribute( ObjectDecl * objDecl ) {
    12181218                objDecl->attributes.push_back(new Attribute("section", {
    1219                         new ConstantExpr( Constant::from_string(".data#") ),
    1220                 }));
     1219                        new ConstantExpr( Constant::from_string(".data"
     1220#if defined( __x86_64 ) || defined( __i386 ) // assembler comment to prevent assembler warning message
     1221                                        "#"
     1222#else // defined( __ARM_ARCH )
     1223                                        "//"
     1224#endif
     1225                                ))}));
    12211226        }
    12221227
    12231228        void addDataSectionAttribute( ast::ObjectDecl * objDecl ) {
    12241229                objDecl->attributes.push_back(new ast::Attribute("section", {
    1225                         ast::ConstantExpr::from_string(objDecl->location, ".data#"),
    1226                 }));
     1230                        ast::ConstantExpr::from_string(objDecl->location, ".data"
     1231#if defined( __x86_64 ) || defined( __i386 ) // assembler comment to prevent assembler warning message
     1232                                        "#"
     1233#else // defined( __ARM_ARCH )
     1234                                        "//"
     1235#endif
     1236                                )}));
    12271237        }
    12281238
  • src/Parser/DeclarationNode.cc

    r33e1c91 r929d925  
    10761076        if ( variable.tyClass != TypeDecl::NUMBER_OF_KINDS ) {
    10771077                // otype is internally converted to dtype + otype parameters
    1078                 static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::DStype, TypeDecl::Dtype, TypeDecl::Ftype, TypeDecl::Ttype, TypeDecl::Dtype };
     1078                static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::DStype, TypeDecl::Dtype, TypeDecl::Ftype, TypeDecl::Ttype, TypeDecl::Dimension };
    10791079                static_assert( sizeof(kindMap) / sizeof(kindMap[0]) == TypeDecl::NUMBER_OF_KINDS, "DeclarationNode::build: kindMap is out of sync." );
    10801080                assertf( variable.tyClass < sizeof(kindMap)/sizeof(kindMap[0]), "Variable's tyClass is out of bounds." );
    1081                 TypeDecl * ret = new TypeDecl( *name, Type::StorageClasses(), nullptr, kindMap[ variable.tyClass ], variable.tyClass == TypeDecl::Otype || variable.tyClass == TypeDecl::ALtype, variable.initializer ? variable.initializer->buildType() : nullptr );
     1081                TypeDecl * ret = new TypeDecl( *name, Type::StorageClasses(), nullptr, kindMap[ variable.tyClass ], variable.tyClass == TypeDecl::Otype, variable.initializer ? variable.initializer->buildType() : nullptr );
    10821082                buildList( variable.assertions, ret->get_assertions() );
    10831083                return ret;
  • src/Parser/ExpressionNode.cc

    r33e1c91 r929d925  
    509509} // build_varref
    510510
     511DimensionExpr * build_dimensionref( const string * name ) {
     512        DimensionExpr * expr = new DimensionExpr( *name );
     513        delete name;
     514        return expr;
     515} // build_varref
    511516// TODO: get rid of this and OperKinds and reuse code from OperatorTable
    512517static const char * OperName[] = {                                              // must harmonize with OperKinds
  • src/Parser/ParseNode.h

    r33e1c91 r929d925  
    183183
    184184NameExpr * build_varref( const std::string * name );
     185DimensionExpr * build_dimensionref( const std::string * name );
    185186
    186187Expression * build_cast( DeclarationNode * decl_node, ExpressionNode * expr_node );
  • src/Parser/TypedefTable.cc

    r33e1c91 r929d925  
    1010// Created On       : Sat May 16 15:20:13 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Mar 15 20:56:47 2021
    13 // Update Count     : 260
     12// Last Modified On : Wed May 19 08:30:14 2021
     13// Update Count     : 262
    1414//
    1515
     
    3131        switch ( kind ) {
    3232          case IDENTIFIER: return "identifier";
     33          case TYPEDIMname: return "typedim";
    3334          case TYPEDEFname: return "typedef";
    3435          case TYPEGENname: return "typegen";
  • src/Parser/lex.ll

    r33e1c91 r929d925  
    1010 * Created On       : Sat Sep 22 08:58:10 2001
    1111 * Last Modified By : Peter A. Buhr
    12  * Last Modified On : Thu Apr  1 13:22:31 2021
    13  * Update Count     : 754
     12 * Last Modified On : Sun Jun 20 18:41:09 2021
     13 * Update Count     : 759
    1414 */
    1515
     
    117117hex_constant {hex_prefix}{hex_digits}{integer_suffix_opt}
    118118
    119                                 // GCC: D (double) and iI (imaginary) suffixes, and DL (long double)
     119                                // GCC: floating D (double), imaginary iI, and decimal floating DF, DD, DL
    120120exponent "_"?[eE]"_"?[+-]?{decimal_digits}
    121121floating_size 16|32|32x|64|64x|80|128|128x
    122122floating_length ([fFdDlLwWqQ]|[fF]{floating_size})
    123123floating_suffix ({floating_length}?[iI]?)|([iI]{floating_length})
    124 floating_suffix_opt ("_"?({floating_suffix}|"DL"))?
     124decimal_floating_suffix [dD][fFdDlL]
     125floating_suffix_opt ("_"?({floating_suffix}|{decimal_floating_suffix}))?
    125126decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
    126127floating_decimal {decimal_digits}"."{exponent}?{floating_suffix_opt}
     
    234235continue                { KEYWORD_RETURN(CONTINUE); }
    235236coroutine               { KEYWORD_RETURN(COROUTINE); }                  // CFA
     237_Decimal32              { KEYWORD_RETURN(DECIMAL32); }                  // GCC
     238_Decimal64              { KEYWORD_RETURN(DECIMAL64); }                  // GCC
     239_Decimal128             { KEYWORD_RETURN(DECIMAL128); }                 // GCC
    236240default                 { KEYWORD_RETURN(DEFAULT); }
    237241disable                 { KEYWORD_RETURN(DISABLE); }                    // CFA
  • src/Parser/parser.yy

    r33e1c91 r929d925  
    1010// Created On       : Sat Sep  1 20:22:55 2001
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Apr 26 18:41:54 2021
    13 // Update Count     : 4990
     12// Last Modified On : Sun Jun 20 18:46:51 2021
     13// Update Count     : 5023
    1414//
    1515
     
    269269%token INT128 UINT128 uuFLOAT80 uuFLOAT128                              // GCC
    270270%token uFLOAT16 uFLOAT32 uFLOAT32X uFLOAT64 uFLOAT64X uFLOAT128 // GCC
     271%token DECIMAL32 DECIMAL64 DECIMAL128                                   // GCC
    271272%token ZERO_T ONE_T                                                                             // CFA
    272273%token SIZEOF TYPEOF VALIST AUTO_TYPE                                   // GCC
     
    287288
    288289// names and constants: lexer differentiates between identifier and typedef names
    289 %token<tok> IDENTIFIER          QUOTED_IDENTIFIER       TYPEDEFname             TYPEGENname
     290%token<tok> IDENTIFIER          QUOTED_IDENTIFIER       TYPEDIMname             TYPEDEFname             TYPEGENname
    290291%token<tok> TIMEOUT                     WOR                                     CATCH                   RECOVER                 CATCHRESUME             FIXUP           FINALLY         // CFA
    291292%token<tok> INTEGERconstant     CHARACTERconstant       STRINGliteral
     
    586587        | quasi_keyword
    587588                { $$ = new ExpressionNode( build_varref( $1 ) ); }
     589        | TYPEDIMname                                                                           // CFA, generic length argument
     590                // { $$ = new ExpressionNode( new TypeExpr( maybeMoveBuildType( DeclarationNode::newFromTypedef( $1 ) ) ) ); }
     591                // { $$ = new ExpressionNode( build_varref( $1 ) ); }
     592                { $$ = new ExpressionNode( build_dimensionref( $1 ) ); }
    588593        | tuple
    589594        | '(' comma_expression ')'
     
    18871892        | uFLOAT128
    18881893                { $$ = DeclarationNode::newBasicType( DeclarationNode::uFloat128 ); }
     1894        | DECIMAL32
     1895                { SemanticError( yylloc, "_Decimal32 is currently unimplemented." ); $$ = nullptr; }
     1896        | DECIMAL64
     1897                { SemanticError( yylloc, "_Decimal64 is currently unimplemented." ); $$ = nullptr; }
     1898        | DECIMAL128
     1899                { SemanticError( yylloc, "_Decimal128 is currently unimplemented." ); $$ = nullptr; }
    18891900        | COMPLEX                                                                                       // C99
    18901901                { $$ = DeclarationNode::newComplexType( DeclarationNode::Complex ); }
     
    25352546        | '[' identifier_or_type_name ']'
    25362547                {
    2537                         typedefTable.addToScope( *$2, TYPEDEFname, "9" );
    2538                         $$ = DeclarationNode::newTypeParam( TypeDecl::ALtype, $2 );
     2548                        typedefTable.addToScope( *$2, TYPEDIMname, "9" );
     2549                        $$ = DeclarationNode::newTypeParam( TypeDecl::Dimension, $2 );
    25392550                }
    25402551        // | type_specifier identifier_parameter_declarator
     
    25902601                { $$ = new ExpressionNode( new TypeExpr( maybeMoveBuildType( $1 ) ) ); }
    25912602        | assignment_expression
    2592                 { SemanticError( yylloc, toString("Expression generic parameters are currently unimplemented: ", $1->build()) ); $$ = nullptr; }
    25932603        | type_list ',' type
    25942604                { $$ = (ExpressionNode *)($1->set_last( new ExpressionNode( new TypeExpr( maybeMoveBuildType( $3 ) ) ) )); }
    25952605        | type_list ',' assignment_expression
    2596                 { SemanticError( yylloc, toString("Expression generic parameters are currently unimplemented: ", $3->build()) ); $$ = nullptr; }
    2597                 // { $$ = (ExpressionNode *)( $1->set_last( $3 )); }
     2606                { $$ = (ExpressionNode *)( $1->set_last( $3 )); }
    25982607        ;
    25992608
  • src/SymTab/Indexer.cc

    r33e1c91 r929d925  
    7474        }
    7575
    76         Indexer::Indexer()
     76        Indexer::Indexer( bool trackIdentifiers )
    7777        : idTable(), typeTable(), structTable(), enumTable(), unionTable(), traitTable(),
    78           prevScope(), scope( 0 ), repScope( 0 ) { ++* stats().count; }
     78          prevScope(), scope( 0 ), repScope( 0 ), trackIdentifiers( trackIdentifiers ) { ++* stats().count; }
    7979
    8080        Indexer::~Indexer() {
     
    110110
    111111        void Indexer::lookupId( const std::string & id, std::list< IdData > &out ) const {
     112                assert( trackIdentifiers );
     113
    112114                ++* stats().lookup_calls;
    113115                if ( ! idTable ) return;
     
    434436                        const Declaration * deleteStmt ) {
    435437                ++* stats().add_calls;
     438                if ( ! trackIdentifiers ) return;
    436439                const std::string &name = decl->name;
    437440                if ( name == "" ) return;
  • src/SymTab/Indexer.h

    r33e1c91 r929d925  
    3131        class Indexer : public std::enable_shared_from_this<SymTab::Indexer> {
    3232        public:
    33                 explicit Indexer();
     33                explicit Indexer( bool trackIdentifiers = true );
    3434                virtual ~Indexer();
    3535
     
    180180                /// returns true if there exists a declaration with C linkage and the given name with a different mangled name
    181181                bool hasIncompatibleCDecl( const std::string & id, const std::string & mangleName ) const;
     182
     183            bool trackIdentifiers;
    182184        };
    183185} // namespace SymTab
  • src/SymTab/Validate.cc

    r33e1c91 r929d925  
    105105
    106106        struct FixQualifiedTypes final : public WithIndexer {
     107                FixQualifiedTypes() : WithIndexer(false) {}
    107108                Type * postmutate( QualifiedType * );
    108109        };
     
    174175        };
    175176
     177        /// Does early resolution on the expressions that give enumeration constants their values
     178        struct ResolveEnumInitializers final : public WithIndexer, public WithGuards, public WithVisitorRef<ResolveEnumInitializers>, public WithShortCircuiting {
     179                ResolveEnumInitializers( const Indexer * indexer );
     180                void postvisit( EnumDecl * enumDecl );
     181
     182          private:
     183                const Indexer * local_indexer;
     184
     185        };
     186
    176187        /// Replaces array and function types in forall lists by appropriate pointer type and assigns each Object and Function declaration a unique ID.
    177188        struct ForallPointerDecay_old final {
     
    260271                void previsit( StructInstType * inst );
    261272                void previsit( UnionInstType * inst );
     273        };
     274
     275        /// desugar declarations and uses of dimension paramaters like [N],
     276        /// from type-system managed values, to tunnneling via ordinary types,
     277        /// as char[-] in and sizeof(-) out
     278        struct TranslateDimensionGenericParameters : public WithIndexer, public WithGuards {
     279                static void translateDimensions( std::list< Declaration * > &translationUnit );
     280                TranslateDimensionGenericParameters();
     281
     282                bool nextVisitedNodeIsChildOfSUIT = false; // SUIT = Struct or Union -Inst Type
     283                bool visitingChildOfSUIT = false;
     284                void changeState_ChildOfSUIT( bool newVal );
     285                void premutate( StructInstType * sit );
     286                void premutate( UnionInstType * uit );
     287                void premutate( BaseSyntaxNode * node );
     288
     289                TypeDecl * postmutate( TypeDecl * td );
     290                Expression * postmutate( DimensionExpr * de );
     291                Expression * postmutate( Expression * e );
    262292        };
    263293
     
    307337                PassVisitor<EnumAndPointerDecay_old> epc;
    308338                PassVisitor<LinkReferenceToTypes_old> lrt( nullptr );
     339                PassVisitor<ResolveEnumInitializers> rei( nullptr );
    309340                PassVisitor<ForallPointerDecay_old> fpd;
    310341                PassVisitor<CompoundLiteral> compoundliteral;
     
    326357                        Stats::Heap::newPass("validate-B");
    327358                        Stats::Time::BlockGuard guard("validate-B");
    328                         Stats::Time::TimeBlock("Link Reference To Types", [&]() {
    329                                 acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions
    330                         });
    331                         Stats::Time::TimeBlock("Fix Qualified Types", [&]() {
    332                                 mutateAll( translationUnit, fixQual ); // must happen after LinkReferenceToTypes_old, because aggregate members are accessed
    333                         });
    334                         Stats::Time::TimeBlock("Hoist Structs", [&]() {
    335                                 HoistStruct::hoistStruct( translationUnit ); // must happen after EliminateTypedef, so that aggregate typedefs occur in the correct order
    336                         });
    337                         Stats::Time::TimeBlock("Eliminate Typedefs", [&]() {
    338                                 EliminateTypedef::eliminateTypedef( translationUnit ); //
    339                         });
     359                        acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions
     360                        mutateAll( translationUnit, fixQual ); // must happen after LinkReferenceToTypes_old, because aggregate members are accessed
     361                        HoistStruct::hoistStruct( translationUnit );
     362                        EliminateTypedef::eliminateTypedef( translationUnit );
    340363                }
    341364                {
    342365                        Stats::Heap::newPass("validate-C");
    343366                        Stats::Time::BlockGuard guard("validate-C");
    344                         acceptAll( translationUnit, genericParams );  // check as early as possible - can't happen before LinkReferenceToTypes_old
    345                         ReturnChecker::checkFunctionReturns( translationUnit );
    346                         InitTweak::fixReturnStatements( translationUnit ); // must happen before autogen
     367                        Stats::Time::TimeBlock("Validate Generic Parameters", [&]() {
     368                                acceptAll( translationUnit, genericParams );  // check as early as possible - can't happen before LinkReferenceToTypes_old; observed failing when attempted before eliminateTypedef
     369                        });
     370                        Stats::Time::TimeBlock("Translate Dimensions", [&]() {
     371                                TranslateDimensionGenericParameters::translateDimensions( translationUnit );
     372                        });
     373                        Stats::Time::TimeBlock("Resolve Enum Initializers", [&]() {
     374                                acceptAll( translationUnit, rei ); // must happen after translateDimensions because rei needs identifier lookup, which needs name mangling
     375                        });
     376                        Stats::Time::TimeBlock("Check Function Returns", [&]() {
     377                                ReturnChecker::checkFunctionReturns( translationUnit );
     378                        });
     379                        Stats::Time::TimeBlock("Fix Return Statements", [&]() {
     380                                InitTweak::fixReturnStatements( translationUnit ); // must happen before autogen
     381                        });
    347382                }
    348383                {
     
    644679        }
    645680
    646         LinkReferenceToTypes_old::LinkReferenceToTypes_old( const Indexer * other_indexer ) {
     681        LinkReferenceToTypes_old::LinkReferenceToTypes_old( const Indexer * other_indexer ) : WithIndexer( false ) {
    647682                if ( other_indexer ) {
    648683                        local_indexer = other_indexer;
     
    664699        }
    665700
    666         void checkGenericParameters( ReferenceToType * inst ) {
    667                 for ( Expression * param : inst->parameters ) {
    668                         if ( ! dynamic_cast< TypeExpr * >( param ) ) {
    669                                 SemanticError( inst, "Expression parameters for generic types are currently unsupported: " );
    670                         }
    671                 }
    672         }
    673 
    674701        void LinkReferenceToTypes_old::postvisit( StructInstType * structInst ) {
    675702                const StructDecl * st = local_indexer->lookupStruct( structInst->name );
     
    682709                        forwardStructs[ structInst->name ].push_back( structInst );
    683710                } // if
    684                 checkGenericParameters( structInst );
    685711        }
    686712
     
    695721                        forwardUnions[ unionInst->name ].push_back( unionInst );
    696722                } // if
    697                 checkGenericParameters( unionInst );
    698723        }
    699724
     
    807832                                forwardEnums.erase( fwds );
    808833                        } // if
    809 
    810                         for ( Declaration * member : enumDecl->members ) {
    811                                 ObjectDecl * field = strict_dynamic_cast<ObjectDecl *>( member );
    812                                 if ( field->init ) {
    813                                         // need to resolve enumerator initializers early so that other passes that determine if an expression is constexpr have the appropriate information.
    814                                         SingleInit * init = strict_dynamic_cast<SingleInit *>( field->init );
    815                                         ResolvExpr::findSingleExpression( init->value, new BasicType( Type::Qualifiers(), BasicType::SignedInt ), indexer );
    816                                 }
    817                         }
    818834                } // if
    819835        }
     
    878894                                typeInst->set_isFtype( typeDecl->kind == TypeDecl::Ftype );
    879895                        } // if
     896                } // if
     897        }
     898
     899        ResolveEnumInitializers::ResolveEnumInitializers( const Indexer * other_indexer ) : WithIndexer( true ) {
     900                if ( other_indexer ) {
     901                        local_indexer = other_indexer;
     902                } else {
     903                        local_indexer = &indexer;
     904                } // if
     905        }
     906
     907        void ResolveEnumInitializers::postvisit( EnumDecl * enumDecl ) {
     908                if ( enumDecl->body ) {
     909                        for ( Declaration * member : enumDecl->members ) {
     910                                ObjectDecl * field = strict_dynamic_cast<ObjectDecl *>( member );
     911                                if ( field->init ) {
     912                                        // need to resolve enumerator initializers early so that other passes that determine if an expression is constexpr have the appropriate information.
     913                                        SingleInit * init = strict_dynamic_cast<SingleInit *>( field->init );
     914                                        ResolvExpr::findSingleExpression( init->value, new BasicType( Type::Qualifiers(), BasicType::SignedInt ), indexer );
     915                                }
     916                        }
    880917                } // if
    881918        }
     
    11521189                GuardScope( typedeclNames );
    11531190                mutateAll( aggr->parameters, * visitor );
     1191                mutateAll( aggr->attributes, * visitor );
    11541192
    11551193                // unroll mutateAll for aggr->members so that implicit typedefs for nested types are added to the aggregate body.
     
    12201258                        }
    12211259                }
     1260        }
     1261
     1262        // Test for special name on a generic parameter.  Special treatment for the
     1263        // special name is a bootstrapping hack.  In most cases, the worlds of T's
     1264        // and of N's don't overlap (normal treamtemt).  The foundations in
     1265        // array.hfa use tagging for both types and dimensions.  Tagging treats
     1266        // its subject parameter even more opaquely than T&, which assumes it is
     1267        // possible to have a pointer/reference to such an object.  Tagging only
     1268        // seeks to identify the type-system resident at compile time.  Both N's
     1269        // and T's can make tags.  The tag definition uses the special name, which
     1270        // is treated as "an N or a T."  This feature is not inteded to be used
     1271        // outside of the definition and immediate uses of a tag.
     1272        static inline bool isReservedTysysIdOnlyName( const std::string & name ) {
     1273                // name's prefix was __CFA_tysys_id_only, before it got wrapped in __..._generic
     1274                int foundAt = name.find("__CFA_tysys_id_only");
     1275                if (foundAt == 0) return true;
     1276                if (foundAt == 2 && name[0] == '_' && name[1] == '_') return true;
     1277                return false;
    12221278        }
    12231279
     
    12381294                        TypeSubstitution sub;
    12391295                        auto paramIter = params->begin();
    1240                         for ( size_t i = 0; paramIter != params->end(); ++paramIter, ++i ) {
    1241                                 if ( i < args.size() ) {
    1242                                         TypeExpr * expr = strict_dynamic_cast< TypeExpr * >( * std::next( args.begin(), i ) );
    1243                                         sub.add( (* paramIter)->get_name(), expr->get_type()->clone() );
    1244                                 } else if ( i == args.size() ) {
     1296                        auto argIter = args.begin();
     1297                        for ( ; paramIter != params->end(); ++paramIter, ++argIter ) {
     1298                                if ( argIter != args.end() ) {
     1299                                        TypeExpr * expr = dynamic_cast< TypeExpr * >( * argIter );
     1300                                        if ( expr ) {
     1301                                                sub.add( (* paramIter)->get_name(), expr->get_type()->clone() );
     1302                                        }
     1303                                } else {
    12451304                                        Type * defaultType = (* paramIter)->get_init();
    12461305                                        if ( defaultType ) {
    12471306                                                args.push_back( new TypeExpr( defaultType->clone() ) );
    12481307                                                sub.add( (* paramIter)->get_name(), defaultType->clone() );
     1308                                                argIter = std::prev(args.end());
     1309                                        } else {
     1310                                                SemanticError( inst, "Too few type arguments in generic type " );
    12491311                                        }
    12501312                                }
     1313                                assert( argIter != args.end() );
     1314                                bool typeParamDeclared = (*paramIter)->kind != TypeDecl::Kind::Dimension;
     1315                                bool typeArgGiven;
     1316                                if ( isReservedTysysIdOnlyName( (*paramIter)->name ) ) {
     1317                                        // coerce a match when declaration is reserved name, which means "either"
     1318                                        typeArgGiven = typeParamDeclared;
     1319                                } else {
     1320                                        typeArgGiven = dynamic_cast< TypeExpr * >( * argIter );
     1321                                }
     1322                                if ( ! typeParamDeclared &&   typeArgGiven ) SemanticError( inst, "Type argument given for value parameter: " );
     1323                                if (   typeParamDeclared && ! typeArgGiven ) SemanticError( inst, "Expression argument given for type parameter: " );
    12511324                        }
    12521325
    12531326                        sub.apply( inst );
    1254                         if ( args.size() < params->size() ) SemanticError( inst, "Too few type arguments in generic type " );
    12551327                        if ( args.size() > params->size() ) SemanticError( inst, "Too many type arguments in generic type " );
    12561328                }
     
    12631335        void ValidateGenericParameters::previsit( UnionInstType * inst ) {
    12641336                validateGeneric( inst );
     1337        }
     1338
     1339        void TranslateDimensionGenericParameters::translateDimensions( std::list< Declaration * > &translationUnit ) {
     1340                PassVisitor<TranslateDimensionGenericParameters> translator;
     1341                mutateAll( translationUnit, translator );
     1342        }
     1343
     1344        TranslateDimensionGenericParameters::TranslateDimensionGenericParameters() : WithIndexer( false ) {}
     1345
     1346        // Declaration of type variable:           forall( [N] )          ->  forall( N & | sized( N ) )
     1347        TypeDecl * TranslateDimensionGenericParameters::postmutate( TypeDecl * td ) {
     1348                if ( td->kind == TypeDecl::Dimension ) {
     1349                        td->kind = TypeDecl::Dtype;
     1350                        if ( ! isReservedTysysIdOnlyName( td->name ) ) {
     1351                                td->sized = true;
     1352                        }
     1353                }
     1354                return td;
     1355        }
     1356
     1357        // Situational awareness:
     1358        // array( float, [[currentExpr]]     )  has  visitingChildOfSUIT == true
     1359        // array( float, [[currentExpr]] - 1 )  has  visitingChildOfSUIT == false
     1360        // size_t x =    [[currentExpr]]        has  visitingChildOfSUIT == false
     1361        void TranslateDimensionGenericParameters::changeState_ChildOfSUIT( bool newVal ) {
     1362                GuardValue( nextVisitedNodeIsChildOfSUIT );
     1363                GuardValue( visitingChildOfSUIT );
     1364                visitingChildOfSUIT = nextVisitedNodeIsChildOfSUIT;
     1365                nextVisitedNodeIsChildOfSUIT = newVal;
     1366        }
     1367        void TranslateDimensionGenericParameters::premutate( StructInstType * sit ) {
     1368                (void) sit;
     1369                changeState_ChildOfSUIT(true);
     1370        }
     1371        void TranslateDimensionGenericParameters::premutate( UnionInstType * uit ) {
     1372                (void) uit;
     1373                changeState_ChildOfSUIT(true);
     1374        }
     1375        void TranslateDimensionGenericParameters::premutate( BaseSyntaxNode * node ) {
     1376                (void) node;
     1377                changeState_ChildOfSUIT(false);
     1378        }
     1379
     1380        // Passing values as dimension arguments:  array( float,     7 )  -> array( float, char[             7 ] )
     1381        // Consuming dimension parameters:         size_t x =    N - 1 ;  -> size_t x =          sizeof(N) - 1   ;
     1382        // Intertwined reality:                    array( float, N     )  -> array( float,              N        )
     1383        //                                         array( float, N - 1 )  -> array( float, char[ sizeof(N) - 1 ] )
     1384        // Intertwined case 1 is not just an optimization.
     1385        // Avoiding char[sizeof(-)] is necessary to enable the call of f to bind the value of N, in:
     1386        //   forall([N]) void f( array(float, N) & );
     1387        //   array(float, 7) a;
     1388        //   f(a);
     1389
     1390        Expression * TranslateDimensionGenericParameters::postmutate( DimensionExpr * de ) {
     1391                // Expression de is an occurrence of N in LHS of above examples.
     1392                // Look up the name that de references.
     1393                // If we are in a struct body, then this reference can be to an entry of the stuct's forall list.
     1394                // Whether or not we are in a struct body, this reference can be to an entry of a containing function's forall list.
     1395                // If we are in a struct body, then the stuct's forall declarations are innermost (functions don't occur in structs).
     1396                // Thus, a potential struct's declaration is highest priority.
     1397                // A struct's forall declarations are already renamed with _generic_ suffix.  Try that name variant first.
     1398
     1399                std::string useName = "__" + de->name + "_generic_";
     1400                TypeDecl * namedParamDecl = const_cast<TypeDecl *>( strict_dynamic_cast<const TypeDecl *, nullptr >( indexer.lookupType( useName ) ) );
     1401
     1402                if ( ! namedParamDecl ) {
     1403                        useName = de->name;
     1404                        namedParamDecl = const_cast<TypeDecl *>( strict_dynamic_cast<const TypeDecl *, nullptr >( indexer.lookupType( useName ) ) );
     1405                }
     1406
     1407                // Expect to find it always.  A misspelled name would have been parsed as an identifier.
     1408                assert( namedParamDecl && "Type-system-managed value name not found in symbol table" );
     1409
     1410                delete de;
     1411
     1412                TypeInstType * refToDecl = new TypeInstType( 0, useName, namedParamDecl );
     1413
     1414                if ( visitingChildOfSUIT ) {
     1415                        // As in postmutate( Expression * ), topmost expression needs a TypeExpr wrapper
     1416                        // But avoid ArrayType-Sizeof
     1417                        return new TypeExpr( refToDecl );
     1418                } else {
     1419                        // the N occurrence is being used directly as a runtime value,
     1420                        // if we are in a type instantiation, then the N is within a bigger value computation
     1421                        return new SizeofExpr( refToDecl );
     1422                }
     1423        }
     1424
     1425        Expression * TranslateDimensionGenericParameters::postmutate( Expression * e ) {
     1426                if ( visitingChildOfSUIT ) {
     1427                        // e is an expression used as an argument to instantiate a type
     1428                        if (! dynamic_cast< TypeExpr * >( e ) ) {
     1429                                // e is a value expression
     1430                                // but not a DimensionExpr, which has a distinct postmutate
     1431                                Type * typeExprContent = new ArrayType( 0, new BasicType( 0, BasicType::Char ), e, true, false );
     1432                                TypeExpr * result = new TypeExpr( typeExprContent );
     1433                                return result;
     1434                        }
     1435                }
     1436                return e;
    12651437        }
    12661438
  • src/SynTree/Declaration.h

    r33e1c91 r929d925  
    201201        typedef NamedTypeDecl Parent;
    202202  public:
    203         enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS };
     203        enum Kind { Dtype, DStype, Otype, Ftype, Ttype, Dimension, NUMBER_OF_KINDS };
    204204
    205205        Kind kind;
  • src/SynTree/Expression.h

    r33e1c91 r929d925  
    587587};
    588588
     589/// DimensionExpr represents a type-system provided value used in an expression ( forrall([N]) ... N + 1 )
     590class DimensionExpr : public Expression {
     591  public:
     592        std::string name;
     593
     594        DimensionExpr( std::string name );
     595        DimensionExpr( const DimensionExpr & other );
     596        virtual ~DimensionExpr();
     597
     598        const std::string & get_name() const { return name; }
     599        void set_name( std::string newValue ) { name = newValue; }
     600
     601        virtual DimensionExpr * clone() const override { return new DimensionExpr( * this ); }
     602        virtual void accept( Visitor & v ) override { v.visit( this ); }
     603        virtual void accept( Visitor & v ) const override { v.visit( this ); }
     604        virtual Expression * acceptMutator( Mutator & m ) override { return m.mutate( this ); }
     605        virtual void print( std::ostream & os, Indenter indent = {} ) const override;
     606};
     607
    589608/// AsmExpr represents a GCC 'asm constraint operand' used in an asm statement: [output] "=f" (result)
    590609class AsmExpr : public Expression {
  • src/SynTree/Mutator.h

    r33e1c91 r929d925  
    8080        virtual Expression * mutate( CommaExpr * commaExpr ) = 0;
    8181        virtual Expression * mutate( TypeExpr * typeExpr ) = 0;
     82        virtual Expression * mutate( DimensionExpr * dimensionExpr ) = 0;
    8283        virtual Expression * mutate( AsmExpr * asmExpr ) = 0;
    8384        virtual Expression * mutate( ImplicitCopyCtorExpr * impCpCtorExpr ) = 0;
  • src/SynTree/SynTree.h

    r33e1c91 r929d925  
    8585class CommaExpr;
    8686class TypeExpr;
     87class DimensionExpr;
    8788class AsmExpr;
    8889class ImplicitCopyCtorExpr;
  • src/SynTree/TypeDecl.cc

    r33e1c91 r929d925  
    3333
    3434const char * TypeDecl::typeString() const {
    35         static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array length type" };
     35        static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized length value" };
    3636        static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." );
    3737        assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." );
  • src/SynTree/TypeExpr.cc

    r33e1c91 r929d925  
    3535}
    3636
     37DimensionExpr::DimensionExpr( std::string name ) : Expression(), name(name) {
     38        assertf(name != "0", "Zero is not a valid name");
     39        assertf(name != "1", "One is not a valid name");
     40}
     41
     42DimensionExpr::DimensionExpr( const DimensionExpr & other ) : Expression( other ), name( other.name ) {
     43}
     44
     45DimensionExpr::~DimensionExpr() {}
     46
     47void DimensionExpr::print( std::ostream & os, Indenter indent ) const {
     48        os << "Type-Sys Value: " << get_name();
     49        Expression::print( os, indent );
     50}
    3751// Local Variables: //
    3852// tab-width: 4 //
  • src/SynTree/Visitor.h

    r33e1c91 r929d925  
    135135        virtual void visit( TypeExpr * node ) { visit( const_cast<const TypeExpr *>(node) ); }
    136136        virtual void visit( const TypeExpr * typeExpr ) = 0;
     137        virtual void visit( DimensionExpr * node ) { visit( const_cast<const DimensionExpr *>(node) ); }
     138        virtual void visit( const DimensionExpr * typeExpr ) = 0;
    137139        virtual void visit( AsmExpr * node ) { visit( const_cast<const AsmExpr *>(node) ); }
    138140        virtual void visit( const AsmExpr * asmExpr ) = 0;
  • tests/.expect/typedefRedef-ERR1.txt

    r33e1c91 r929d925  
    1 typedefRedef.cfa:69:25: warning: Compiled
     1typedefRedef.cfa:75:25: warning: Compiled
    22typedefRedef.cfa:4:1 error: Cannot redefine typedef: Foo
    3 typedefRedef.cfa:59:1 error: Cannot redefine typedef: ARR
     3typedefRedef.cfa:65:1 error: Cannot redefine typedef: ARR
  • tests/.expect/typedefRedef.txt

    r33e1c91 r929d925  
    1 typedefRedef.cfa:69:25: warning: Compiled
     1typedefRedef.cfa:75:25: warning: Compiled
  • tests/array-container/array-basic.cfa

    r33e1c91 r929d925  
    6161forall( [Nw], [Nx], [Ny], [Nz] )
    6262void fillHelloData( array( float, Nw, Nx, Ny, Nz ) & wxyz ) {
    63     for (w; z(Nw))
    64     for (x; z(Nx))
    65     for (y; z(Ny))
    66     for (z; z(Nz))
     63    for (w; Nw)
     64    for (x; Nx)
     65    for (y; Ny)
     66    for (z; Nz)
    6767        wxyz[w][x][y][z] = getMagicNumber(w, x, y, z);
    6868}
    6969
    70 forall( [Zn]
     70forall( [N]
    7171      , S & | sized(S)
    7272      )
    73 float total1d_low( arpk(Zn, S, float, float ) & a ) {
     73float total1d_low( arpk(N, S, float, float ) & a ) {
    7474    float total = 0.0f;
    75     for (i; z(Zn))
     75    for (i; N)
    7676        total += a[i];
    7777    return total;
     
    9898
    9999    expect = 0;
    100     for (i; z(Nw))
     100    for (i; Nw)
    101101        expect += getMagicNumber( i, slice_ix, slice_ix, slice_ix );
    102102    printf("expect Ws             = %f\n", expect);
     
    117117
    118118    expect = 0;
    119     for (i; z(Nx))
     119    for (i; Nx)
    120120        expect += getMagicNumber( slice_ix, i, slice_ix, slice_ix );
    121121    printf("expect Xs             = %f\n", expect);
  • tests/array-container/array-md-sbscr-cases.cfa

    r33e1c91 r929d925  
    2020forall( [Nw], [Nx], [Ny], [Nz] )
    2121void fillHelloData( array( float, Nw, Nx, Ny, Nz ) & wxyz ) {
    22     for (w; z(Nw))
    23     for (x; z(Nx))
    24     for (y; z(Ny))
    25     for (z; z(Nz))
     22    for (w; Nw)
     23    for (x; Nx)
     24    for (y; Ny)
     25    for (z; Nz)
    2626        wxyz[w][x][y][z] = getMagicNumber(w, x, y, z);
    2727}
     
    246246    assert(( wxyz[[2,  3,  4,  5]] == valExpected ));
    247247
    248     for ( i; z(Nw) ) {
     248    for ( i; Nw ) {
    249249        assert(( wxyz[[ i, 3, 4, 5 ]] == getMagicNumber(i, 3, 4, 5) ));
    250250    }
    251251
    252     for ( i; z(Nx) ) {
     252    for ( i; Nx ) {
    253253        assert(( wxyz[[ 2, i, 4, 5 ]] == getMagicNumber(2, i, 4, 5) ));
    254254    }
    255255
    256     for ( i; z(Ny) ) {
     256    for ( i; Ny ) {
    257257        assert(( wxyz[[ 2, 3, i, 5 ]] == getMagicNumber(2, 3, i, 5) ));
    258258    }
    259259
    260     for ( i; z(Nz) ) {
     260    for ( i; Nz ) {
    261261        assert(( wxyz[[ 2, 3, 4, i ]] == getMagicNumber(2, 3, 4, i) ));
    262262    }
    263263
    264     for ( i; z(Nw) ) {
     264    for ( i; Nw ) {
    265265        assert(( wxyz[[ i, all, 4, 5 ]][3] == getMagicNumber(i, 3, 4, 5) ));
    266266    }
    267267
    268     for ( i; z(Nw) ) {
     268    for ( i; Nw ) {
    269269        assert(( wxyz[[ all, 3, 4, 5 ]][i] == getMagicNumber(i, 3, 4, 5) ));
    270270    }
  • tests/device/cpu.cfa

    r33e1c91 r929d925  
    1717#include <fstream.hfa>
    1818#include <device/cpu.hfa>
     19#include <stdlib.hfa>
     20
     21#include <errno.h>
     22#include <stdio.h>
     23#include <string.h>
     24#include <unistd.h>
     25
    1926extern "C" {
     27        #include <dirent.h>
     28        #include <sys/types.h>
     29        #include <sys/stat.h>
    2030        #include <sys/sysinfo.h>
     31        #include <fcntl.h>
     32}
     33
     34// go through a directory calling fn on each file
     35static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
     36        // open the directory
     37        DIR *dir = opendir(path);
     38        if(dir == 0p) { return ENOTDIR; }
     39
     40        // call fn for each
     41        struct dirent * ent;
     42        while ((ent = readdir(dir)) != 0p) {
     43                fn( ent );
     44        }
     45
     46        // no longer need this
     47        closedir(dir);
     48        return 0;
     49}
     50
     51// count the number of directories with the specified prefix
     52// the directories counted have the form '[prefix]N' where prefix is the parameter
     53// and N is an base 10 integer.
     54static int count_prefix_dirs(const char * path, const char * prefix) {
     55        // read the directory and find the cpu count
     56        // and make sure everything is as expected
     57        int max = -1;
     58        int count = 0;
     59        void lambda(struct dirent * ent) {
     60                // were are looking for prefixX, where X is a number
     61                // check that it starts with 'cpu
     62                char * s = strstr(ent->d_name, prefix);
     63                if(s == 0p) { return; }
     64                if(s != ent->d_name) { return; }
     65
     66                // check that the next part is a number
     67                s += strlen(prefix);
     68                char * end;
     69                long int val = strtol(s, &end, 10);
     70                if(*end != '\0' || val < 0) { return; }
     71
     72                // check that it's a directory
     73                if(ent->d_type != DT_DIR) { return; }
     74
     75                // it's a match!
     76                max = max(val, max);
     77                count++;
     78        }
     79        iterate_dir(path, lambda);
     80
     81        /* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
     82
     83        return count;
     84}
     85
     86// Count number of cache *indexes* in the system
     87// cache indexes are distinct from cache level as Data or Instruction cache
     88// can share a level but not an index
     89// PITFALL: assumes all cpus have the same indexes as cpu0
     90static int count_cache_indexes(void) {
     91        return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
     92}
     93
     94// read information about a spcficic cache index/cpu file into the output buffer
     95static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
     96        // Pick the file we want and read it
     97        char buf[128];
     98        /* paranoid */ __attribute__((unused)) int len =
     99        snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
     100        /* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
     101
     102        int fd = open(buf, 0, O_RDONLY);
     103        /* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
     104
     105        ssize_t r = read(fd, out, out_len);
     106        /* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
     107
     108        /* paranoid */ __attribute__((unused)) int ret =
     109        close(fd);
     110        /* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
     111
     112        out[r-1] = '\0';
     113        return r-1;
     114}
     115
     116unsigned find_idx() {
     117        int idxs = count_cache_indexes();
     118
     119        unsigned found_level = 0;
     120        unsigned found = -1u;
     121        for(i; idxs) {
     122                unsigned idx = idxs - 1 - i;
     123                char buf[32];
     124
     125                // Level is the cache level: higher means bigger and slower
     126                read_cpuidxinfo_into(0, idx, "level", buf, 32);
     127                char * end;
     128                unsigned long level = strtoul(buf, &end, 10);
     129                /* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, that doesn't sound right", 0);
     130                /* paranoid */ verify(*end == '\0');
     131
     132                if(found_level < level) {
     133                        found_level = level;
     134                        found = idx;
     135                }
     136        }
     137
     138        /* paranoid */ verify(found != -1u);
     139        return found;
    21140}
    22141
    23142int main() {
     143        //-----------------------------------------------------------------------
    24144        int ret1 = get_nprocs();
    25145        int ret2 = cpu_info.hthrd_count;
     
    31151        }
    32152
     153        //-----------------------------------------------------------------------
     154        // Make sure no one has the same self
     155        for(ime; cpu_info.hthrd_count) {
     156                unsigned me = cpu_info.llc_map[ime].self;
     157                {
     158                        unsigned s = cpu_info.llc_map[ime].start;
     159                        unsigned e = s + cpu_info.llc_map[ime].count;
     160                        if(me < s || me >= e) {
     161                                sout | "CPU" | ime | "outside of it's own map: " | s | "<=" | me | "<" | e;
     162                        }
     163                }
     164
     165
     166                for(ithem; cpu_info.hthrd_count) {
     167                        if(ime == ithem) continue;
     168
     169                        unsigned them = cpu_info.llc_map[ithem].self;
     170                        if(me == them) {
     171                                sout | "CPU" | ime | "has conflicting self id with" | ithem | "(" | me | ")";
     172                        }
     173                }
     174        }
     175
     176
     177        //-----------------------------------------------------------------------
     178        unsigned idx = find_idx();
     179        // For all procs check mapping is consistent
     180        for(cpu_me; cpu_info.hthrd_count) {
     181                char buf_me[32];
     182                size_t len_me = read_cpuidxinfo_into(cpu_me, idx, "shared_cpu_list", buf_me, 32);
     183                for(cpu_them; cpu_info.hthrd_count) {
     184                        if(cpu_me == cpu_them) continue;
     185                        char buf_them[32];
     186                        size_t len_them = read_cpuidxinfo_into(cpu_them, idx, "shared_cpu_list", buf_them, 32);
     187
     188                        bool match_file = len_them == len_me && 0 == strncmp(buf_them, buf_me, len_me);
     189                        bool match_info = cpu_info.llc_map[cpu_me].start == cpu_info.llc_map[cpu_them].start && cpu_info.llc_map[cpu_me].count == cpu_info.llc_map[cpu_them].count;
     190
     191                        if(match_file != match_info) {
     192                                sout | "CPU" | cpu_me | "and" | cpu_them | "have inconsitent file and cpu_info";
     193                                sout | cpu_me | ": <" | cpu_info.llc_map[cpu_me  ].start | "," | cpu_info.llc_map[cpu_me  ].count | "> '" | buf_me   | "'";
     194                                sout | cpu_me | ": <" | cpu_info.llc_map[cpu_them].start | "," | cpu_info.llc_map[cpu_them].count | "> '" | buf_them | "'";
     195                        }
     196                }
     197        }
    33198}
  • tests/literals.cfa

    r33e1c91 r929d925  
    1010// Created On       : Sat Sep  9 16:34:38 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Aug 29 10:57:56 2020
    13 // Update Count     : 226
     12// Last Modified On : Sat Jun 19 15:47:49 2021
     13// Update Count     : 237
    1414//
    1515
     
    6363        -0X0123456789ABCDEF;  -0X0123456789ABCDEFu;  -0X0123456789ABCDEFl;  -0X0123456789ABCDEFll;  -0X0123456789ABCDEFul;  -0X0123456789ABCDEFlu;  -0X0123456789ABCDEFull;  -0X0123456789ABCDEFllu;
    6464
     65// floating literals
     66
     67         0123456789.;   0123456789.f;   0123456789.d;   0123456789.l;   0123456789.F;   0123456789.D;   0123456789.L;
     68        +0123456789.;  +0123456789.f;  +0123456789.d;  +0123456789.l;  +0123456789.F;  +0123456789.D;  +0123456789.L;
     69        -0123456789.;  -0123456789.f;  -0123456789.d;  -0123456789.l;  -0123456789.F;  -0123456789.D;  -0123456789.L;
     70
     71         0123456789.e09;   0123456789.e09f;   0123456789.e09d;   0123456789.e09l;   0123456789.e09F;   0123456789.e09D;   0123456789.e09L;
     72        +0123456789.e09;  +0123456789.e09f;  +0123456789.e09d;  +0123456789.e09l;  +0123456789.e09F;  +0123456789.e09D;  +0123456789.e09L;
     73        -0123456789.e09;  -0123456789.e09f;  -0123456789.e09d;  -0123456789.e09l;  -0123456789.e09F;  -0123456789.e09D;  -0123456789.e09L;
     74                                                             
     75         0123456789.e+09;   0123456789.e+09f;   0123456789.e+09d;   0123456789.e+09l;   0123456789.e+09F;   0123456789.e+09D;   0123456789.e+09L;
     76        +0123456789.e+09;  +0123456789.e+09f;  +0123456789.e+09d;  +0123456789.e+09l;  +0123456789.e+09F;  +0123456789.e+09D;  +0123456789.e+09L;
     77        -0123456789.e+09;  -0123456789.e+09f;  -0123456789.e+09d;  -0123456789.e+09l;  -0123456789.e+09F;  -0123456789.e+09D;  -0123456789.e+09L;
     78                                                             
     79         0123456789.e-09;   0123456789.e-09f;   0123456789.e-09d;   0123456789.e-09l;   0123456789.e-09F;   0123456789.e-09D;   0123456789.e-09L;
     80        +0123456789.e-09;  +0123456789.e-09f;  +0123456789.e-09d;  +0123456789.e-09l;  +0123456789.e-09F;  +0123456789.e-09D;  +0123456789.e-09L;
     81        -0123456789.e-09;  -0123456789.e-09f;  -0123456789.e-09d;  -0123456789.e-09l;  -0123456789.e-09F;  -0123456789.e-09D;  -0123456789.e-09L;
     82
     83         .0123456789;   .0123456789f;   .0123456789d;   .0123456789l;   .0123456789F;   .0123456789D;   .0123456789L;
     84        +.0123456789;  +.0123456789f;  +.0123456789d;  +.0123456789l;  +.0123456789F;  +.0123456789D;  +.0123456789L;
     85        -.0123456789;  -.0123456789f;  -.0123456789d;  -.0123456789l;  -.0123456789F;  -.0123456789D;  -.0123456789L;
     86
     87         .0123456789e09;   .0123456789e09f;   .0123456789e09d;   .0123456789e09l;   .0123456789e09F;   .0123456789e09D;   .0123456789e09L;
     88        +.0123456789e09;  +.0123456789e09f;  +.0123456789e09d;  +.0123456789e09l;  +.0123456789e09F;  +.0123456789e09D;  +.0123456789e09L;
     89        -.0123456789e09;  -.0123456789e09f;  -.0123456789e09d;  -.0123456789e09l;  -.0123456789e09F;  -.0123456789e09D;  -.0123456789e09L;
     90                                                             
     91         .0123456789E+09;   .0123456789E+09f;   .0123456789E+09d;   .0123456789E+09l;   .0123456789E+09F;   .0123456789E+09D;   .0123456789E+09L;
     92        +.0123456789E+09;  +.0123456789E+09f;  +.0123456789E+09d;  +.0123456789E+09l;  +.0123456789E+09F;  +.0123456789E+09D;  +.0123456789E+09L;
     93        -.0123456789E+09;  -.0123456789E+09f;  -.0123456789E+09d;  -.0123456789E+09l;  -.0123456789E+09F;  -.0123456789E+09D;  -.0123456789E+09L;
     94                                                             
     95         .0123456789E-09;   .0123456789E-09f;   .0123456789E-09d;   .0123456789E-09l;   .0123456789E-09F;   .0123456789E-09D;   .0123456789E-09L;
     96        -.0123456789E-09;  -.0123456789E-09f;  -.0123456789E-09d;  -.0123456789E-09l;  -.0123456789E-09F;  -.0123456789E-09D;  -.0123456789E-09L;
     97        -.0123456789E-09;  -.0123456789E-09f;  -.0123456789E-09d;  -.0123456789E-09l;  -.0123456789E-09F;  -.0123456789E-09D;  -.0123456789E-09L;
     98
     99         0123456789.0123456789;   0123456789.0123456789f;   0123456789.0123456789d;   0123456789.0123456789l;   0123456789.0123456789F;   0123456789.0123456789D;   0123456789.0123456789L;
     100        +0123456789.0123456789;  +0123456789.0123456789f;  +0123456789.0123456789d;  +0123456789.0123456789l;  +0123456789.0123456789F;  +0123456789.0123456789D;  +0123456789.0123456789L;
     101        -0123456789.0123456789;  -0123456789.0123456789f;  -0123456789.0123456789d;  -0123456789.0123456789l;  -0123456789.0123456789F;  -0123456789.0123456789D;  -0123456789.0123456789L;
     102
     103         0123456789.0123456789E09;   0123456789.0123456789E09f;   0123456789.0123456789E09d;   0123456789.0123456789E09l;   0123456789.0123456789E09F;   0123456789.0123456789E09D;   0123456789.0123456789E09L;
     104        +0123456789.0123456789E09;  +0123456789.0123456789E09f;  +0123456789.0123456789E09d;  +0123456789.0123456789E09l;  +0123456789.0123456789E09F;  +0123456789.0123456789E09D;  +0123456789.0123456789E09L;
     105        -0123456789.0123456789E09;  -0123456789.0123456789E09f;  -0123456789.0123456789E09d;  -0123456789.0123456789E09l;  -0123456789.0123456789E09F;  -0123456789.0123456789E09D;  -0123456789.0123456789E09L;
     106                                                                                         
     107         0123456789.0123456789E+09;   0123456789.0123456789E+09f;   0123456789.0123456789E+09d;   0123456789.0123456789E+09l;   0123456789.0123456789E+09F;   0123456789.0123456789E+09D;   0123456789.0123456789E+09L;
     108        +0123456789.0123456789E+09;  +0123456789.0123456789E+09f;  +0123456789.0123456789E+09d;  +0123456789.0123456789E+09l;  +0123456789.0123456789E+09F;  +0123456789.0123456789E+09D;  +0123456789.0123456789E+09L;
     109        -0123456789.0123456789E+09;  -0123456789.0123456789E+09f;  -0123456789.0123456789E+09d;  -0123456789.0123456789E+09l;  -0123456789.0123456789E+09F;  -0123456789.0123456789E+09D;  -0123456789.0123456789E+09L;
     110                                                                                         
     111         0123456789.0123456789E-09;   0123456789.0123456789E-09f;   0123456789.0123456789E-09d;   0123456789.0123456789E-09l;   0123456789.0123456789E-09F;   0123456789.0123456789E-09D;   0123456789.0123456789E-09L;
     112        +0123456789.0123456789E-09;  +0123456789.0123456789E-09f;  +0123456789.0123456789E-09d;  +0123456789.0123456789E-09l;  +0123456789.0123456789E-09F;  +0123456789.0123456789E-09D;  +0123456789.0123456789E-09L;
     113        -0123456789.0123456789E-09;  -0123456789.0123456789E-09f;  -0123456789.0123456789E-09d;  -0123456789.0123456789E-09l;  -0123456789.0123456789E-09F;  -0123456789.0123456789E-09D;  -0123456789.0123456789E-09L;
     114
    65115// decimal floating literals
    66116
    67          0123456789.;   0123456789.f;   0123456789.l;   0123456789.F;   0123456789.L;   0123456789.DL;
    68         +0123456789.;  +0123456789.f;  +0123456789.l;  +0123456789.F;  +0123456789.L;  +0123456789.DL;
    69         -0123456789.;  -0123456789.f;  -0123456789.l;  -0123456789.F;  -0123456789.L;  -0123456789.DL;
    70 
    71          0123456789.e09;   0123456789.e09f;   0123456789.e09l;   0123456789.e09F;   0123456789.e09L;   0123456789.e09DL;
    72         +0123456789.e09;  +0123456789.e09f;  +0123456789.e09l;  +0123456789.e09F;  +0123456789.e09L;  +0123456789.e09DL;
    73         -0123456789.e09;  -0123456789.e09f;  -0123456789.e09l;  -0123456789.e09F;  -0123456789.e09L;  -0123456789.e09DL;
    74 
    75          0123456789.e+09;   0123456789.e+09f;   0123456789.e+09l;   0123456789.e+09F;   0123456789.e+09L;   0123456789.e+09DL;
    76         +0123456789.e+09;  +0123456789.e+09f;  +0123456789.e+09l;  +0123456789.e+09F;  +0123456789.e+09L;  +0123456789.e+09DL;
    77         -0123456789.e+09;  -0123456789.e+09f;  -0123456789.e+09l;  -0123456789.e+09F;  -0123456789.e+09L;  -0123456789.e+09DL;
    78 
    79          0123456789.e-09;   0123456789.e-09f;   0123456789.e-09l;   0123456789.e-09F;   0123456789.e-09L;   0123456789.e-09DL;
    80         +0123456789.e-09;  +0123456789.e-09f;  +0123456789.e-09l;  +0123456789.e-09F;  +0123456789.e-09L;  +0123456789.e-09DL;
    81         -0123456789.e-09;  -0123456789.e-09f;  -0123456789.e-09l;  -0123456789.e-09F;  -0123456789.e-09L;  -0123456789.e-09DL;
    82 
    83          .0123456789;   .0123456789f;   .0123456789l;   .0123456789F;   .0123456789L;   .0123456789DL;
    84         +.0123456789;  +.0123456789f;  +.0123456789l;  +.0123456789F;  +.0123456789L;  +.0123456789DL;
    85         -.0123456789;  -.0123456789f;  -.0123456789l;  -.0123456789F;  -.0123456789L;  -.0123456789DL;
    86 
    87          .0123456789e09;   .0123456789e09f;   .0123456789e09l;   .0123456789e09F;   .0123456789e09L;   .0123456789e09DL;
    88         +.0123456789e09;  +.0123456789e09f;  +.0123456789e09l;  +.0123456789e09F;  +.0123456789e09L;  +.0123456789e09DL;
    89         -.0123456789e09;  -.0123456789e09f;  -.0123456789e09l;  -.0123456789e09F;  -.0123456789e09L;  -.0123456789e09DL;
    90 
    91          .0123456789E+09;   .0123456789E+09f;   .0123456789E+09l;   .0123456789E+09F;   .0123456789E+09L;   .0123456789E+09DL;
    92         +.0123456789E+09;  +.0123456789E+09f;  +.0123456789E+09l;  +.0123456789E+09F;  +.0123456789E+09L;  +.0123456789E+09DL;
    93         -.0123456789E+09;  -.0123456789E+09f;  -.0123456789E+09l;  -.0123456789E+09F;  -.0123456789E+09L;  -.0123456789E+09DL;
    94 
    95          .0123456789E-09;   .0123456789E-09f;   .0123456789E-09l;   .0123456789E-09F;   .0123456789E-09L;   .0123456789E-09DL;
    96         -.0123456789E-09;  -.0123456789E-09f;  -.0123456789E-09l;  -.0123456789E-09F;  -.0123456789E-09L;  -.0123456789E-09DL;
    97         -.0123456789E-09;  -.0123456789E-09f;  -.0123456789E-09l;  -.0123456789E-09F;  -.0123456789E-09L;  -.0123456789E-09DL;
    98 
    99          0123456789.0123456789;   0123456789.0123456789f;   0123456789.0123456789l;   0123456789.0123456789F;   0123456789.0123456789L;   0123456789.0123456789DL;
    100         +0123456789.0123456789;  +0123456789.0123456789f;  +0123456789.0123456789l;  +0123456789.0123456789F;  +0123456789.0123456789L;  +0123456789.0123456789DL;
    101         -0123456789.0123456789;  -0123456789.0123456789f;  -0123456789.0123456789l;  -0123456789.0123456789F;  -0123456789.0123456789L;  -0123456789.0123456789DL;
    102 
    103          0123456789.0123456789E09;   0123456789.0123456789E09f;   0123456789.0123456789E09l;   0123456789.0123456789E09F;   0123456789.0123456789E09L;   0123456789.0123456789E09DL;
    104         +0123456789.0123456789E09;  +0123456789.0123456789E09f;  +0123456789.0123456789E09l;  +0123456789.0123456789E09F;  +0123456789.0123456789E09L;  +0123456789.0123456789E09DL;
    105         -0123456789.0123456789E09;  -0123456789.0123456789E09f;  -0123456789.0123456789E09l;  -0123456789.0123456789E09F;  -0123456789.0123456789E09L;  -0123456789.0123456789E09DL;
    106 
    107          0123456789.0123456789E+09;   0123456789.0123456789E+09f;   0123456789.0123456789E+09l;   0123456789.0123456789E+09F;   0123456789.0123456789E+09L;   0123456789.0123456789E+09DL;
    108         +0123456789.0123456789E+09;  +0123456789.0123456789E+09f;  +0123456789.0123456789E+09l;  +0123456789.0123456789E+09F;  +0123456789.0123456789E+09L;  +0123456789.0123456789E+09DL;
    109         -0123456789.0123456789E+09;  -0123456789.0123456789E+09f;  -0123456789.0123456789E+09l;  -0123456789.0123456789E+09F;  -0123456789.0123456789E+09L;  -0123456789.0123456789E+09DL;
    110 
    111          0123456789.0123456789E-09;   0123456789.0123456789E-09f;   0123456789.0123456789E-09l;   0123456789.0123456789E-09F;   0123456789.0123456789E-09L;   0123456789.0123456789E-09DL;
    112         +0123456789.0123456789E-09;  +0123456789.0123456789E-09f;  +0123456789.0123456789E-09l;  +0123456789.0123456789E-09F;  +0123456789.0123456789E-09L;  +0123456789.0123456789E-09DL;
    113         -0123456789.0123456789E-09;  -0123456789.0123456789E-09f;  -0123456789.0123456789E-09l;  -0123456789.0123456789E-09F;  -0123456789.0123456789E-09L;  -0123456789.0123456789E-09DL;
     117#if ! defined( __aarch64__ )                                                    // unsupported on ARM after gcc-9
     118         0123456789.df;   0123456789.dd;   0123456789.dl;   0123456789.DF;   0123456789.DD;   0123456789.DL;
     119        +0123456789.df;  +0123456789.dd;  +0123456789.dl;  +0123456789.DF;  +0123456789.DD;  +0123456789.DL;
     120        -0123456789.df;  -0123456789.dd;  -0123456789.dl;  -0123456789.DF;  -0123456789.DD;  -0123456789.DL;
     121
     122         0123456789.e09df;   0123456789.e09dd;   0123456789.e09dl;   0123456789.e09DF;   0123456789.e09DD;   0123456789.e09DL;
     123        +0123456789.e09df;  +0123456789.e09dd;  +0123456789.e09dl;  +0123456789.e09DF;  +0123456789.e09DD;  +0123456789.e09DL;
     124        -0123456789.e09df;  -0123456789.e09dd;  -0123456789.e09dl;  -0123456789.e09DF;  -0123456789.e09DD;  -0123456789.e09DL;
     125                                                                     
     126         0123456789.e+09df;   0123456789.e+09dd;  0123456789.e+09dl;   0123456789.e+09DF;   0123456789.e+09DD;   0123456789.e+09DL;
     127        +0123456789.e+09df;  +0123456789.e+09dd; +0123456789.e+09dl;  +0123456789.e+09DF;  +0123456789.e+09DD;  +0123456789.e+09DL;
     128        -0123456789.e+09df;  -0123456789.e+09dd; -0123456789.e+09dl;  -0123456789.e+09DF;  -0123456789.e+09DD;  -0123456789.e+09DL;
     129                                                                     
     130         0123456789.e-09df;   0123456789.e-09dd;  0123456789.e-09dl;   0123456789.e-09DF;   0123456789.e-09DD;   0123456789.e-09DL;
     131        +0123456789.e-09df;  +0123456789.e-09dd; +0123456789.e-09dl;  +0123456789.e-09DF;  +0123456789.e-09DD;  +0123456789.e-09DL;
     132        -0123456789.e-09df;  -0123456789.e-09dd; -0123456789.e-09dl;  -0123456789.e-09DF;  -0123456789.e-09DD;  -0123456789.e-09DL;
     133
     134         .0123456789df;   .0123456789dd;   .0123456789dl;   .0123456789DF;   .0123456789DD;   .0123456789DL;
     135        +.0123456789df;  +.0123456789dd;  +.0123456789dl;  +.0123456789DF;  +.0123456789DD;  +.0123456789DL;
     136        -.0123456789df;  -.0123456789dd;  -.0123456789dl;  -.0123456789DF;  -.0123456789DD;  -.0123456789DL;
     137
     138         .0123456789e09df;   .0123456789e09dd;   .0123456789e09dl;   .0123456789e09DF;   .0123456789e09DD;   .0123456789e09DL;
     139        +.0123456789e09df;  +.0123456789e09dd;  +.0123456789e09dl;  +.0123456789e09DF;  +.0123456789e09DD;  +.0123456789e09DL;
     140        -.0123456789e09df;  -.0123456789e09dd;  -.0123456789e09dl;  -.0123456789e09DF;  -.0123456789e09DD;  -.0123456789e09DL;
     141                                                               
     142         .0123456789E+09df;   .0123456789E+09dd;   .0123456789E+09dl;   .0123456789E+09DF;   .0123456789E+09DD;   .0123456789E+09DL;
     143        +.0123456789E+09df;  +.0123456789E+09dd;  +.0123456789E+09dl;  +.0123456789E+09DF;  +.0123456789E+09DD;  +.0123456789E+09DL;
     144        -.0123456789E+09df;  -.0123456789E+09dd;  -.0123456789E+09dl;  -.0123456789E+09DF;  -.0123456789E+09DD;  -.0123456789E+09DL;
     145                                                               
     146         .0123456789E-09df;   .0123456789E-09dd;   .0123456789E-09dl;   .0123456789E-09DF;   .0123456789E-09DD;   .0123456789E-09DL;
     147        -.0123456789E-09df;  -.0123456789E-09dd;  -.0123456789E-09dl;  -.0123456789E-09DF;  -.0123456789E-09DD;  -.0123456789E-09DL;
     148        -.0123456789E-09df;  -.0123456789E-09dd;  -.0123456789E-09dl;  -.0123456789E-09DF;  -.0123456789E-09DD;  -.0123456789E-09DL;
     149
     150         0123456789.0123456789df;   0123456789.0123456789dd;   0123456789.0123456789dl;   0123456789.0123456789DF;   0123456789.0123456789DD;   0123456789.0123456789DL;
     151        +0123456789.0123456789df;  +0123456789.0123456789dd;  +0123456789.0123456789dl;  +0123456789.0123456789DF;  +0123456789.0123456789DD;  +0123456789.0123456789DL;
     152        -0123456789.0123456789df;  -0123456789.0123456789dd;  -0123456789.0123456789dl;  -0123456789.0123456789DF;  -0123456789.0123456789DD;  -0123456789.0123456789DL;
     153
     154         0123456789.0123456789E09df;   0123456789.0123456789E09dd;   0123456789.0123456789E09dl;   0123456789.0123456789E09DF;   0123456789.0123456789E09DD;   0123456789.0123456789E09DL;
     155        +0123456789.0123456789E09df;  +0123456789.0123456789E09dd;  +0123456789.0123456789E09dl;  +0123456789.0123456789E09DF;  +0123456789.0123456789E09DD;  +0123456789.0123456789E09DL;
     156        -0123456789.0123456789E09df;  -0123456789.0123456789E09dd;  -0123456789.0123456789E09dl;  -0123456789.0123456789E09DF;  -0123456789.0123456789E09DD;  -0123456789.0123456789E09DL;
     157                                                                                               
     158         0123456789.0123456789E+09df;   0123456789.0123456789E+09dd;   0123456789.0123456789E+09dl;   0123456789.0123456789E+09DF;   0123456789.0123456789E+09DD;   0123456789.0123456789E+09DL;
     159        +0123456789.0123456789E+09df;  +0123456789.0123456789E+09dd;  +0123456789.0123456789E+09dl;  +0123456789.0123456789E+09DF;  +0123456789.0123456789E+09DD;  +0123456789.0123456789E+09DL;
     160        -0123456789.0123456789E+09df;  -0123456789.0123456789E+09dd;  -0123456789.0123456789E+09dl;  -0123456789.0123456789E+09DF;  -0123456789.0123456789E+09DD;  -0123456789.0123456789E+09DL;
     161                                                                                               
     162         0123456789.0123456789E-09df;   0123456789.0123456789E-09dd;   0123456789.0123456789E-09dl;   0123456789.0123456789E-09DF;   0123456789.0123456789E-09DD;   0123456789.0123456789E-09DL;
     163        +0123456789.0123456789E-09df;  +0123456789.0123456789E-09dd;  +0123456789.0123456789E-09dl;  +0123456789.0123456789E-09DF;  +0123456789.0123456789E-09DD;  +0123456789.0123456789E-09DL;
     164        -0123456789.0123456789E-09df;  -0123456789.0123456789E-09dd;  -0123456789.0123456789E-09dl;  -0123456789.0123456789E-09DF;  -0123456789.0123456789E-09DD;  -0123456789.0123456789E-09DL;
     165#endif // ! __aarch64__
    114166
    115167// hexadecimal floating literals, must have exponent
  • tests/math.cfa

    r33e1c91 r929d925  
    1010// Created On       : Fri Apr 22 14:59:21 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Apr 13 21:04:48 2021
    13 // Update Count     : 123
     12// Last Modified On : Fri Jun 18 17:02:44 2021
     13// Update Count     : 124
    1414//
    1515
     
    4040
    4141        sout | "exp:" | exp( 1.0F ) | exp( 1.0D ) | exp( 1.0L ) | nonl;
    42         sout | exp( 1.0F+1.0FI ) | exp( 1.0D+1.0DI ) | exp( 1.0DL+1.0LI );
     42        sout | exp( 1.0F+1.0FI ) | exp( 1.0D+1.0DI ) | exp( 1.0L+1.0LI );
    4343        sout | "exp2:" | exp2( 1.0F ) | exp2( 1.0D ) | exp2( 1.0L );
    4444        sout | "expm1:" | expm1( 1.0F ) | expm1( 1.0D ) | expm1( 1.0L );
    4545        sout | "pow:" | pow( 1.0F, 1.0F ) | pow( 1.0D, 1.0D ) | pow( 1.0L, 1.0L ) | nonl;
    46         sout | pow( 1.0F+1.0FI, 1.0F+1.0FI ) | pow( 1.0D+1.0DI, 1.0D+1.0DI ) | pow( 1.5DL+1.5LI, 1.5DL+1.5LI );
     46        sout | pow( 1.0F+1.0FI, 1.0F+1.0FI ) | pow( 1.0D+1.0DI, 1.0D+1.0DI ) | pow( 1.5L+1.5LI, 1.5L+1.5LI );
    4747
    4848        int b = 4;
     
    6868
    6969        sout | "log:" | log( 1.0F ) | log( 1.0D ) | log( 1.0L ) | nonl;
    70         sout | log( 1.0F+1.0FI ) | log( 1.0D+1.0DI ) | log( 1.0DL+1.0LI );
     70        sout | log( 1.0F+1.0FI ) | log( 1.0D+1.0DI ) | log( 1.0L+1.0LI );
    7171        sout | "log2:" | log2( 1024 ) | log2( 2 \ 17u ) | log2( 2 \ 23u );
    7272        sout | "log2:" | log2( 1024l ) | log2( 2l \ 17u ) | log2( 2l \ 23u );
     
    8282
    8383        sout | "sqrt:" | sqrt( 1.0F ) | sqrt( 1.0D ) | sqrt( 1.0L ) | nonl;
    84         sout | sqrt( 1.0F+1.0FI ) | sqrt( 1.0D+1.0DI ) | sqrt( 1.0DL+1.0LI );
     84        sout | sqrt( 1.0F+1.0FI ) | sqrt( 1.0D+1.0DI ) | sqrt( 1.0L+1.0LI );
    8585        sout | "cbrt:" | cbrt( 27.0F ) | cbrt( 27.0D ) | cbrt( 27.0L );
    8686        sout | "hypot:" | hypot( 1.0F, -1.0F ) | hypot( 1.0D, -1.0D ) | hypot( 1.0L, -1.0L );
     
    8989
    9090        sout | "sin:" | sin( 1.0F ) | sin( 1.0D ) | sin( 1.0L ) | nonl;
    91         sout | sin( 1.0F+1.0FI ) | sin( 1.0D+1.0DI ) | sin( 1.0DL+1.0LI );
     91        sout | sin( 1.0F+1.0FI ) | sin( 1.0D+1.0DI ) | sin( 1.0L+1.0LI );
    9292        sout | "cos:" | cos( 1.0F ) | cos( 1.0D ) | cos( 1.0L ) | nonl;
    93         sout | cos( 1.0F+1.0FI ) | cos( 1.0D+1.0DI ) | cos( 1.0DL+1.0LI );
     93        sout | cos( 1.0F+1.0FI ) | cos( 1.0D+1.0DI ) | cos( 1.0L+1.0LI );
    9494        sout | "tan:" | tan( 1.0F ) | tan( 1.0D ) | tan( 1.0L ) | nonl;
    95         sout | tan( 1.0F+1.0FI ) | tan( 1.0D+1.0DI ) | tan( 1.0DL+1.0LI );
     95        sout | tan( 1.0F+1.0FI ) | tan( 1.0D+1.0DI ) | tan( 1.0L+1.0LI );
    9696        sout | "asin:" | asin( 1.0F ) | asin( 1.0D ) | asin( 1.0L ) | nonl;
    97         sout | asin( 1.0F+1.0FI ) | asin( 1.0D+1.0DI ) | asin( 1.0DL+1.0LI );
     97        sout | asin( 1.0F+1.0FI ) | asin( 1.0D+1.0DI ) | asin( 1.0L+1.0LI );
    9898        sout | "acos:" | acos( 1.0F ) | acos( 1.0D ) | acos( 1.0L ) | nonl;
    99         sout | acos( 1.0F+1.0FI ) | acos( 1.0D+1.0DI ) | acos( 1.0DL+1.0LI );
     99        sout | acos( 1.0F+1.0FI ) | acos( 1.0D+1.0DI ) | acos( 1.0L+1.0LI );
    100100        sout | "atan:" | atan( 1.0F ) | atan( 1.0D ) | atan( 1.0L ) | nonl;
    101         sout | atan( 1.0F+1.0FI ) | atan( 1.0D+1.0DI ) | atan( 1.0DL+1.0LI );
     101        sout | atan( 1.0F+1.0FI ) | atan( 1.0D+1.0DI ) | atan( 1.0L+1.0LI );
    102102        sout | "atan2:" | atan2( 1.0F, 1.0F ) | atan2( 1.0D, 1.0D ) | atan2( 1.0L, 1.0L ) | nonl;
    103103        sout | "atan:" | atan( 1.0F, 1.0F ) | atan( 1.0D, 1.0D ) | atan( 1.0L, 1.0L );
     
    106106
    107107        sout | "sinh:" | sinh( 1.0F ) | sinh( 1.0D ) | sinh( 1.0L ) | nonl;
    108         sout | sinh( 1.0F+1.0FI ) | sinh( 1.0D+1.0DI ) | sinh( 1.0DL+1.0LI );
     108        sout | sinh( 1.0F+1.0FI ) | sinh( 1.0D+1.0DI ) | sinh( 1.0L+1.0LI );
    109109        sout | "cosh:" | cosh( 1.0F ) | cosh( 1.0D ) | cosh( 1.0L ) | nonl;
    110         sout | cosh( 1.0F+1.0FI ) | cosh( 1.0D+1.0DI ) | cosh( 1.0DL+1.0LI );
     110        sout | cosh( 1.0F+1.0FI ) | cosh( 1.0D+1.0DI ) | cosh( 1.0L+1.0LI );
    111111        sout | "tanh:" | tanh( 1.0F ) | tanh( 1.0D ) | tanh( 1.0L ) | nonl;
    112         sout | tanh( 1.0F+1.0FI ) | tanh( 1.0D+1.0DI ) | tanh( 1.0DL+1.0LI );
     112        sout | tanh( 1.0F+1.0FI ) | tanh( 1.0D+1.0DI ) | tanh( 1.0L+1.0LI );
    113113        sout | "acosh:" | acosh( 1.0F ) | acosh( 1.0D ) | acosh( 1.0L ) | nonl;
    114         sout | acosh( 1.0F+1.0FI ) | acosh( 1.0D+1.0DI ) | acosh( 1.0DL+1.0LI );
     114        sout | acosh( 1.0F+1.0FI ) | acosh( 1.0D+1.0DI ) | acosh( 1.0L+1.0LI );
    115115        sout | "asinh:" | asinh( 1.0F ) | asinh( 1.0D ) | asinh( 1.0L ) | nonl;
    116         sout | asinh( 1.0F+1.0FI ) | asinh( 1.0D+1.0DI ) | asinh( 1.0DL+1.0LI );
     116        sout | asinh( 1.0F+1.0FI ) | asinh( 1.0D+1.0DI ) | asinh( 1.0L+1.0LI );
    117117        sout | "atanh:" | atanh( 1.0F ) | atanh( 1.0D ) | atanh( 1.0L ) | nonl;
    118         sout | atanh( 1.0F+1.0FI ) | atanh( 1.0D+1.0DI ) | atanh( 1.0DL+1.0LI );
     118        sout | atanh( 1.0F+1.0FI ) | atanh( 1.0D+1.0DI ) | atanh( 1.0L+1.0LI );
    119119
    120120        //---------------------- Error / Gamma ----------------------
  • tests/test.py

    r33e1c91 r929d925  
    1313
    1414import os
    15 import psutil
    1615import signal
    1716
  • tests/typedefRedef.cfa

    r33e1c91 r929d925  
    4545typedef int X2;
    4646
     47X2 value  __attribute__((aligned(4 * sizeof(X2))));
     48
     49__attribute__((aligned(4 * sizeof(X2)))) struct rseq_cs {
     50        int foo;
     51};
     52
    4753// xxx - this doesn't work yet due to parsing problems with generic types
    4854// #ifdef __CFA__
Note: See TracChangeset for help on using the changeset viewer.