Changeset cc7bbe6
- Timestamp:
- Feb 23, 2022, 11:24:34 AM (4 years ago)
- Branches:
- ADT, ast-experimental, enum, master, pthread-emulation, qualifiedEnum
- Children:
- 08ed947
- Parents:
- f5a51db (diff), 3a038fa (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 47 added
- 2 deleted
- 59 edited
- 2 moved
Legend:
- Unmodified
- Added
- Removed
-
Jenkinsfile
rf5a51db rcc7bbe6 161 161 Tools.BuildStage('Test: full', Settings.RunAllTests) { 162 162 dir (BuildDir) { 163 jopt = "" 164 if( Settings.Architecture.node == 'x86' ) { 165 jopt = "-j2" 166 } 163 167 //Run the tests from the tests directory 164 sh """make --no-print-directory -C tests timeouts="--timeout=600 --global-timeout=14400" all-tests debug=yes archiveerrors=${BuildDir}/tests/crashes/full-debug"""165 sh """make --no-print-directory -C tests timeouts="--timeout=600 --global-timeout=14400" all-tests debug=no archiveerrors=${BuildDir}/tests/crashes/full-nodebug"""168 sh """make ${jopt} --no-print-directory -C tests timeouts="--timeout=600 --global-timeout=14400" all-tests debug=yes archiveerrors=${BuildDir}/tests/crashes/full-debug""" 169 sh """make ${jopt} --no-print-directory -C tests timeouts="--timeout=600 --global-timeout=14400" all-tests debug=no archiveerrors=${BuildDir}/tests/crashes/full-nodebug""" 166 170 } 167 171 } -
benchmark/io/http/Makefile.am
rf5a51db rcc7bbe6 50 50 .dummy_hackxx.cpp: 51 51 @echo "int bar() { return 0; }" > ${@} 52 53 # add dependency of cfa files 54 nodist_httpforall_OBJECTS = $(addsuffix .o, $(basename $(filter %.cfa,$(nodist_httpforall_SOURCES)))) 55 $(nodist_httpforall_OBJECTS) : @CFACC@ @CFACPP@ 56 57 # .deps inclusion is not done automatically by automake for new languages 58 nodist_httpforall_DEPENDS = $(join \ 59 $(addsuffix $(DEPDIR)/ , $(dir $(nodist_httpforall_OBJECTS) ) ), \ 60 $(notdir ${nodist_httpforall_OBJECTS:.o=.Po}) \ 61 ) 62 63 -include $(nodist_httpforall_DEPENDS) 64 65 list_libdeps: 66 echo "objects: " $(nodist_httpforall_OBJECTS) 67 echo "depends: " $(nodist_httpforall_DEPENDS) -
doc/LaTeXmacros/common.sty
rf5a51db rcc7bbe6 11 11 %% Created On : Sat Apr 9 10:06:17 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : Mon May 31 09:08:37 202114 %% Update Count : 56 513 %% Last Modified On : Mon Feb 7 23:00:46 2022 14 %% Update Count : 569 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 43 43 \newcommand{\CCIcon}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} % C++ icon 44 44 \newcommand{\CC}[1][]{\protect\CCIcon{#1}\xspace} % C++ symbolic name 45 \newcommand{\Cpp}[1][]{\CC{#1}} % C++ synonym 45 46 % numbers disallowed in latex variables names => use number names 46 47 \newcommand{\CCeleven}{\protect\CCIcon{11}\xspace} % C++11 symbolic name … … 51 52 52 53 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 54 55 \usepackage{pslatex} % reduce size of san serif font 56 \usepackage{relsize} % must be after change to small or selects old size 57 \usepackage{rotating} 58 \usepackage{calc} % latex arithmetic 53 59 54 60 % remove special-character warning in PDF side-bar names … … 71 77 \newlength{\parindentlnth} 72 78 \setlength{\parindentlnth}{\parindent} 73 74 \usepackage{pslatex} % reduce size of san serif font75 \usepackage{relsize} % must be after change to small or selects old size76 \usepackage{rotating}77 \usepackage{calc} % latex arithmetic78 79 79 80 % reduce size of chapter/section titles … … 151 152 \newcommand{\@snewterm}[2][\@empty]{{\newtermFontInline{#2}}\ifx#1\@empty\index{#2}\else\index{#1@{\protect#2}}\fi} 152 153 154 % \snake{<identifier>} 155 % Improves writing of snake case (or any convention that uses _) by allowing 156 % line breaks after _. Disables commands inside the block and formats the 157 % identifier to look like code. 158 \newcommand*\snake[1]{\snakefont{\expandafter\snake@next\detokenize{#1}\@nil}} 159 160 % \snakefont{<text>} 161 % Command used by \snake, you may renew the command to change its formating. 162 \newcommand*\snakefont[1]{\LstBasicStyle{#1}} 163 164 % Thanks Manuel of TeX Stack exchange. (I got the base pattern from one of 165 % their answers.) Note: \@nil should never be defined. 166 \newcommand*\snake@next[1]{\ifx\@nil#1\else 167 \expandafter\ifx\string_#1\string_\allowbreak\else#1\fi 168 \expandafter\snake@next\fi 169 } 170 171 % \lang{<language>}{<code>} 172 % Use the listings package to format the snipit of <code> in <language>. 173 \newcommand{\lang}[2]{\lstinline[language=#1]{#2}} 174 153 175 % Latin abbreviation 154 176 \newcommand{\abbrevFont}{\textit} % set empty for no italics … … 263 285 extendedchars=true, % allow ASCII characters in the range 128-255 264 286 escapechar=\$, % LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-' 265 mathescape=false, % LaTeX math escape in CFA code $...$287 mathescape=false, % disable LaTeX math escape in CFA code $...$ 266 288 keepspaces=true, % 267 289 showstringspaces=false, % do not show spaces with cup … … 308 330 }{} 309 331 % inline code @...@ (at symbol) 310 \makeatother311 332 \lstMakeShortInline@ % single-character for \lstinline 312 \makeatletter313 333 \fi% 314 334 -
doc/LaTeXmacros/common.tex
rf5a51db rcc7bbe6 11 11 %% Created On : Sat Apr 9 10:06:17 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : Mon May 31 09:10:49 202114 %% Update Count : 5 4613 %% Last Modified On : Mon Feb 7 23:00:08 2022 14 %% Update Count : 552 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 … … 43 43 \newcommand{\CCIcon}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} % C++ icon 44 44 \newcommand{\CC}[1][]{\protect\CCIcon{#1}\xspace} % C++ symbolic name 45 \newcommand{\Cpp}[1][]{\CC{#1}} % C++ synonym 45 46 % numbers disallowed in latex variables names => use number names 46 47 \newcommand{\CCeleven}{\protect\CCIcon{11}\xspace} % C++11 symbolic name … … 51 52 52 53 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 54 55 \usepackage{pslatex} % reduce size of san serif font 56 \usepackage{relsize} % must be after change to small or selects old size 57 \usepackage{rotating} 58 \usepackage{calc} % latex arithmetic 53 59 54 60 % remove special-character warning in PDF side-bar names … … 72 78 \newlength{\parindentlnth} 73 79 \setlength{\parindentlnth}{\parindent} 74 75 \usepackage{pslatex} % reduce size of san serif font76 \usepackage{relsize} % must be after change to small or selects old size77 \usepackage{rotating}78 \usepackage{calc} % latex arithmetic79 80 80 81 % reduce size of chapter/section titles … … 152 153 \newcommand{\@snewterm}[2][\@empty]{{\newtermFontInline{#2}}\ifx#1\@empty\index{#2}\else\index{#1@{\protect#2}}\fi} 153 154 155 % \snake{<identifier>} 156 % Improves writing of snake case (or any convention that uses _) by allowing 157 % line breaks after _. Disables commands inside the block and formats the 158 % identifier to look like code. 159 \newcommand*\snake[1]{\snakefont{\expandafter\snake@next\detokenize{#1}\@nil}} 160 161 % \snakefont{<text>} 162 % Command used by \snake, you may renew the command to change its formating. 163 \newcommand*\snakefont[1]{\LstBasicStyle{#1}} 164 165 % Thanks Manuel of TeX Stack exchange. (I got the base pattern from one of 166 % their answers.) Note: \@nil should never be defined. 167 \newcommand*\snake@next[1]{\ifx\@nil#1\else 168 \expandafter\ifx\string_#1\string_\allowbreak\else#1\fi 169 \expandafter\snake@next\fi 170 } 171 172 % \lang{<language>}{<code>} 173 % Use the listings package to format the snipit of <code> in <language>. 174 \newcommand{\lang}[2]{\lstinline[language=#1]{#2}} 175 154 176 % Latin abbreviation 155 177 \newcommand{\abbrevFont}{\textit} % set empty for no italics … … 268 290 extendedchars=true, % allow ASCII characters in the range 128-255 269 291 escapechar=\$, % LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-' 270 mathescape=false, % LaTeX math escape in CFA code $...$292 mathescape=false, % disable LaTeX math escape in CFA code $...$ 271 293 keepspaces=true, % 272 294 showstringspaces=false, % do not show spaces with cup -
doc/theses/mubeen_zulfiqar_MMath/Makefile
rf5a51db rcc7bbe6 1 DOC = uw-ethesis.pdf2 BASE = ${DOC:%.pdf=%} # remove suffix3 1 # directory for latex clutter files 4 BUILD = build 5 TEXSRC = $(wildcard *.tex) 6 FIGSRC = $(wildcard *.fig) 7 BIBSRC = $(wildcard *.bib) 8 TEXLIB = .:../../LaTeXmacros:${BUILD}: # common latex macros 9 BIBLIB = .:../../bibliography # common citation repository 2 Build = build 3 Figures = figures 4 Pictures = pictures 5 TeXSRC = ${wildcard *.tex} 6 FigSRC = ${notdir ${wildcard ${Figures}/*.fig}} 7 PicSRC = ${notdir ${wildcard ${Pictures}/*.fig}} 8 BIBSRC = ${wildcard *.bib} 9 TeXLIB = .:../../LaTeXmacros:${Build}: # common latex macros 10 BibLIB = .:../../bibliography # common citation repository 10 11 11 12 MAKEFLAGS = --no-print-directory # --silent 12 VPATH = ${B UILD}13 VPATH = ${Build} ${Figures} ${Pictures} # extra search path for file names used in document 13 14 14 15 ### Special Rules: … … 18 19 19 20 ### Commands: 20 LATEX = TEXINPUTS=${TEXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=${BUILD} 21 BIBTEX = BIBINPUTS=${BIBLIB} bibtex 22 #GLOSSARY = INDEXSTYLE=${BUILD} makeglossaries-lite 21 22 LaTeX = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=${Build} 23 BibTeX = BIBINPUTS=${BibLIB} bibtex 24 #Glossary = INDEXSTYLE=${Build} makeglossaries-lite 23 25 24 26 ### Rules and Recipes: 25 27 28 DOC = uw-ethesis.pdf 29 BASE = ${DOC:%.pdf=%} # remove suffix 30 26 31 all: ${DOC} 27 32 28 ${BUILD}/%.dvi: ${TEXSRC} ${FIGSRC:%.fig=%.tex} ${BIBSRC} Makefile | ${BUILD} 29 ${LATEX} ${BASE} 30 ${BIBTEX} ${BUILD}/${BASE} 31 ${LATEX} ${BASE} 32 # ${GLOSSARY} ${BUILD}/${BASE} 33 # ${LATEX} ${BASE} 33 clean: 34 @rm -frv ${DOC} ${Build} 34 35 35 ${BUILD}: 36 # File Dependencies # 37 38 ${Build}/%.dvi : ${TeXSRC} ${FigSRC:%.fig=%.tex} ${PicSRC:%.fig=%.pstex} ${BIBSRC} Makefile | ${Build} 39 ${LaTeX} ${BASE} 40 ${BibTeX} ${Build}/${BASE} 41 ${LaTeX} ${BASE} 42 # if nedded, run latex again to get citations 43 if fgrep -s "LaTeX Warning: Citation" ${basename $@}.log ; then ${LaTeX} ${BASE} ; fi 44 # ${Glossary} ${Build}/${BASE} 45 # ${LaTeX} ${BASE} 46 47 ${Build}: 36 48 mkdir $@ 37 49 38 %.pdf : ${B UILD}/%.ps | ${BUILD}50 %.pdf : ${Build}/%.ps | ${Build} 39 51 ps2pdf $< 40 52 41 %.ps : %.dvi | ${B UILD}53 %.ps : %.dvi | ${Build} 42 54 dvips $< -o $@ 43 55 44 %.tex : %.fig | ${B UILD}45 fig2dev -L eepic $< > ${B UILD}/$@56 %.tex : %.fig | ${Build} 57 fig2dev -L eepic $< > ${Build}/$@ 46 58 47 %.ps : %.fig | ${B UILD}48 fig2dev -L ps $< > ${B UILD}/$@59 %.ps : %.fig | ${Build} 60 fig2dev -L ps $< > ${Build}/$@ 49 61 50 %.pstex : %.fig | ${BUILD} 51 fig2dev -L pstex $< > ${BUILD}/$@ 52 fig2dev -L pstex_t -p ${BUILD}/$@ $< > ${BUILD}/$@_t 53 54 clean: 55 @rm -frv ${DOC} ${BUILD} *.fig.bak 62 %.pstex : %.fig | ${Build} 63 fig2dev -L pstex $< > ${Build}/$@ 64 fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t -
doc/theses/mubeen_zulfiqar_MMath/allocator.tex
rf5a51db rcc7bbe6 24 24 \end{itemize} 25 25 26 The new features added to uHeapLmmm (incl. @malloc \_size@ routine)26 The new features added to uHeapLmmm (incl. @malloc_size@ routine) 27 27 \CFA alloc interface with examples. 28 28 … … 99 99 \begin{itemize} 100 100 \item 101 The bump allocation is concurrent as memory taken from sbrk is sharded across all heaps as bump allocation reserve. The lock on bump allocation (on memory taken from sbrk) will only be contended if KTs >N. The contention on sbrk area is less likely as it will only happen in the case if heaps assigned to two KTs get short of bump allocation reserve simultanously.102 \item 103 N heaps are created at the start of the program and destroyed at the end of program. When a KT is created, we only assign it to one of the heaps. When a KT is destroyed, we only dissociate it from the assigned heap but we do not destroy that heap. That heap will go back to our pool-of-heaps, ready to be used by some new KT. And if that heap was shared among multiple KTs (like the case of KTs >N) then, on deletion of one KT, that heap will be still in-use of the other KTs. This will prevent creation and deletion of heaps during run-time as heaps are re-usable which helps in keeping low-memory footprint.101 The bump allocation is concurrent as memory taken from sbrk is sharded across all heaps as bump allocation reserve. The lock on bump allocation (on memory taken from sbrk) will only be contended if KTs $<$ N. The contention on sbrk area is less likely as it will only happen in the case if heaps assigned to two KTs get short of bump allocation reserve simultanously. 102 \item 103 N heaps are created at the start of the program and destroyed at the end of program. When a KT is created, we only assign it to one of the heaps. When a KT is destroyed, we only dissociate it from the assigned heap but we do not destroy that heap. That heap will go back to our pool-of-heaps, ready to be used by some new KT. And if that heap was shared among multiple KTs (like the case of KTs $<$ N) then, on deletion of one KT, that heap will be still in-use of the other KTs. This will prevent creation and deletion of heaps during run-time as heaps are re-usable which helps in keeping low-memory footprint. 104 104 \item 105 105 It is possible to use sharing and stealing techniques to share/find unused storage, when a free list is unused or empty. … … 113 113 114 114 \section{Added Features and Methods} 115 To improve the UHeapLmmm allocator (FIX ME: cite uHeapLmmm) interface and make it more user friendly, we added a few more routines to the C allocator. Also, we built a CFA (FIX ME: cite cforall) interface on top of C interface to increase the usability of the allocator.115 To improve the UHeapLmmm allocator (FIX ME: cite uHeapLmmm) interface and make it more user friendly, we added a few more routines to the C allocator. Also, we built a \CFA (FIX ME: cite cforall) interface on top of C interface to increase the usability of the allocator. 116 116 117 117 \subsection{C Interface} 118 118 We added a few more features and routines to the allocator's C interface that can make the allocator more usable to the programmers. THese features will programmer more control on the dynamic memory allocation. 119 119 120 \subsubsection void * aalloc( size\_t dim, size\_t elemSize ) 121 aalloc is an extension of malloc. It allows programmer to allocate a dynamic array of objects without calculating the total size of array explicitly. The only alternate of this routine in the other allocators is calloc but calloc also fills the dynamic memory with 0 which makes it slower for a programmer who only wants to dynamically allocate an array of objects without filling it with 0. 122 \paragraph{Usage} 123 aalloc takes two parameters. 124 125 \begin{itemize} 126 \item 127 dim: number of objects in the array 128 \item 129 elemSize: size of the object in the array. 130 \end{itemize} 131 It returns address of dynamic object allocatoed on heap that can contain dim number of objects of the size elemSize. On failure, it returns NULL pointer. 132 133 \subsubsection void * resize( void * oaddr, size\_t size ) 134 resize is an extension of relloc. It allows programmer to reuse a cuurently allocated dynamic object with a new size requirement. Its alternate in the other allocators is realloc but relloc also copy the data in old object to the new object which makes it slower for the programmer who only wants to reuse an old dynamic object for a new size requirement but does not want to preserve the data in the old object to the new object. 135 \paragraph{Usage} 136 resize takes two parameters. 137 138 \begin{itemize} 139 \item 140 oaddr: the address of the old object that needs to be resized. 141 \item 142 size: the new size requirement of the to which the old object needs to be resized. 143 \end{itemize} 144 It returns an object that is of the size given but it does not preserve the data in the old object. On failure, it returns NULL pointer. 145 146 \subsubsection void * resize( void * oaddr, size\_t nalign, size\_t size ) 147 This resize is an extension of the above resize (FIX ME: cite above resize). In addition to resizing the size of of an old object, it can also realign the old object to a new alignment requirement. 120 \subsection{Out of Memory} 121 122 Most allocators use @nullptr@ to indicate an allocation failure, specifically out of memory; 123 hence the need to return an alternate value for a zero-sized allocation. 124 The alternative is to abort a program when out of memory. 125 In theory, notifying the programmer allows recovery; 126 in practice, it is almost impossible to gracefully when out of memory, so the cheaper approach of returning @nullptr@ for a zero-sized allocation is chosen. 127 128 129 \subsection{\lstinline{void * aalloc( size_t dim, size_t elemSize )}} 130 @aalloc@ is an extension of malloc. It allows programmer to allocate a dynamic array of objects without calculating the total size of array explicitly. The only alternate of this routine in the other allocators is calloc but calloc also fills the dynamic memory with 0 which makes it slower for a programmer who only wants to dynamically allocate an array of objects without filling it with 0. 131 \paragraph{Usage} 132 @aalloc@ takes two parameters. 133 134 \begin{itemize} 135 \item 136 @dim@: number of objects in the array 137 \item 138 @elemSize@: size of the object in the array. 139 \end{itemize} 140 It returns address of dynamic object allocatoed on heap that can contain dim number of objects of the size elemSize. On failure, it returns a @NULL@ pointer. 141 142 \subsection{\lstinline{void * resize( void * oaddr, size_t size )}} 143 @resize@ is an extension of relloc. It allows programmer to reuse a cuurently allocated dynamic object with a new size requirement. Its alternate in the other allocators is @realloc@ but relloc also copy the data in old object to the new object which makes it slower for the programmer who only wants to reuse an old dynamic object for a new size requirement but does not want to preserve the data in the old object to the new object. 144 \paragraph{Usage} 145 @resize@ takes two parameters. 146 147 \begin{itemize} 148 \item 149 @oaddr@: the address of the old object that needs to be resized. 150 \item 151 @size@: the new size requirement of the to which the old object needs to be resized. 152 \end{itemize} 153 It returns an object that is of the size given but it does not preserve the data in the old object. On failure, it returns a @NULL@ pointer. 154 155 \subsection{\lstinline{void * resize( void * oaddr, size_t nalign, size_t size )}} 156 This @resize@ is an extension of the above @resize@ (FIX ME: cite above resize). In addition to resizing the size of of an old object, it can also realign the old object to a new alignment requirement. 148 157 \paragraph{Usage} 149 158 This resize takes three parameters. It takes an additional parameter of nalign as compared to the above resize (FIX ME: cite above resize). … … 151 160 \begin{itemize} 152 161 \item 153 oaddr: the address of the old object that needs to be resized.154 \item 155 nalign: the new alignment to which the old object needs to be realigned.156 \item 157 size: the new size requirement of the to which the old object needs to be resized.158 \end{itemize} 159 It returns an object with the size and alignment given in the parameters. On failure, it returns a NULLpointer.160 161 \subs ubsection void * amemalign( size\_t alignment, size\_t dim, size\_t elemSize )162 @oaddr@: the address of the old object that needs to be resized. 163 \item 164 @nalign@: the new alignment to which the old object needs to be realigned. 165 \item 166 @size@: the new size requirement of the to which the old object needs to be resized. 167 \end{itemize} 168 It returns an object with the size and alignment given in the parameters. On failure, it returns a @NULL@ pointer. 169 170 \subsection{\lstinline{void * amemalign( size_t alignment, size_t dim, size_t elemSize )}} 162 171 amemalign is a hybrid of memalign and aalloc. It allows programmer to allocate an aligned dynamic array of objects without calculating the total size of the array explicitly. It frees the programmer from calculating the total size of the array. 163 172 \paragraph{Usage} … … 166 175 \begin{itemize} 167 176 \item 168 alignment: the alignment to which the dynamic array needs to be aligned.169 \item 170 dim: number of objects in the array171 \item 172 elemSize: size of the object in the array.173 \end{itemize} 174 It returns a dynamic array of objects that has the capacity to contain dim number of objects of the size of elemSize. The returned dynamic array is aligned to the given alignment. On failure, it returns NULLpointer.175 176 \subs ubsection void * cmemalign( size\_t alignment, size\_t dim, size\_t elemSize )177 @alignment@: the alignment to which the dynamic array needs to be aligned. 178 \item 179 @dim@: number of objects in the array 180 \item 181 @elemSize@: size of the object in the array. 182 \end{itemize} 183 It returns a dynamic array of objects that has the capacity to contain dim number of objects of the size of elemSize. The returned dynamic array is aligned to the given alignment. On failure, it returns a @NULL@ pointer. 184 185 \subsection{\lstinline{void * cmemalign( size_t alignment, size_t dim, size_t elemSize )}} 177 186 cmemalign is a hybrid of amemalign and calloc. It allows programmer to allocate an aligned dynamic array of objects that is 0 filled. The current way to do this in other allocators is to allocate an aligned object with memalign and then fill it with 0 explicitly. This routine provides both features of aligning and 0 filling, implicitly. 178 187 \paragraph{Usage} … … 181 190 \begin{itemize} 182 191 \item 183 alignment: the alignment to which the dynamic array needs to be aligned.184 \item 185 dim: number of objects in the array186 \item 187 elemSize: size of the object in the array.188 \end{itemize} 189 It returns a dynamic array of objects that has the capacity to contain dim number of objects of the size of elemSize. The returned dynamic array is aligned to the given alignment and is 0 filled. On failure, it returns NULLpointer.190 191 \subs ubsection size\_t malloc\_alignment( void * addr )192 malloc\_alignmentreturns the alignment of a currently allocated dynamic object. It allows the programmer in memory management and personal bookkeeping. It helps the programmer in verofying the alignment of a dynamic object especially in a scenerio similar to prudcer-consumer where a producer allocates a dynamic object and the consumer needs to assure that the dynamic object was allocated with the required alignment.193 \paragraph{Usage} 194 malloc\_alignmenttakes one parameters.195 196 \begin{itemize} 197 \item 198 addr: the address of the currently allocated dynamic object.199 \end{itemize} 200 malloc\_alignmentreturns the alignment of the given dynamic object. On failure, it return the value of default alignment of the uHeapLmmm allocator.201 202 \subs ubsection bool malloc\_zero\_fill( void * addr )203 malloc\_zero\_fillreturns whether a currently allocated dynamic object was initially zero filled at the time of allocation. It allows the programmer in memory management and personal bookkeeping. It helps the programmer in verifying the zero filled property of a dynamic object especially in a scenerio similar to prudcer-consumer where a producer allocates a dynamic object and the consumer needs to assure that the dynamic object was zero filled at the time of allocation.204 \paragraph{Usage} 205 malloc\_zero\_filltakes one parameters.206 207 \begin{itemize} 208 \item 209 addr: the address of the currently allocated dynamic object.210 \end{itemize} 211 malloc\_zero\_fillreturns true if the dynamic object was initially zero filled and return false otherwise. On failure, it returns false.212 213 \subs ubsection size\_t malloc\_size( void * addr )214 malloc\_size returns the allocation size of a currently allocated dynamic object. It allows the programmer in memory management and personal bookkeeping. It helps the programmer in verofying the alignment of a dynamic object especially in a scenerio similar to prudcer-consumer where a producer allocates a dynamic object and the consumer needs to assure that the dynamic object was allocated with the required size. Its current alternate in the other allocators is malloc\_usable\_size. But, malloc\_size is different from malloc\_usable\_size as malloc\_usabe\_size returns the total data capacity of dynamic object including the extra space at the end of the dynamic object. On the other hand, malloc\_sizereturns the size that was given to the allocator at the allocation of the dynamic object. This size is updated when an object is realloced, resized, or passed through a similar allocator routine.215 \paragraph{Usage} 216 malloc\_sizetakes one parameters.217 218 \begin{itemize} 219 \item 220 addr: the address of the currently allocated dynamic object.221 \end{itemize} 222 malloc\_sizereturns the allocation size of the given dynamic object. On failure, it return zero.223 224 \subs ubsection void * realloc( void * oaddr, size\_t nalign, size\_t size )225 This realloc is an extension of the default realloc (FIX ME: cite default realloc). In addition to reallocating an old object and preserving the data in old object, it can also realign the old object to a new alignment requirement.226 \paragraph{Usage} 227 This realloc takes three parameters. It takes an additional parameter of nalign as compared to the default realloc.228 229 \begin{itemize} 230 \item 231 oaddr: the address of the old object that needs to be reallocated.232 \item 233 nalign: the new alignment to which the old object needs to be realigned.234 \item 235 size: the new size requirement of the to which the old object needs to be resized.236 \end{itemize} 237 It returns an object with the size and alignment given in the parameters that preserves the data in the old object. On failure, it returns a NULLpointer.238 239 \subsection{ CFA Malloc Interface}240 We added some routines to the malloc interface of CFA. These routines can only be used in CFA and not in our standalone uHeapLmmm allocator as these routines use some features that are only provided byCFA and not by C. It makes the allocator even more usable to the programmers.241 CFA provides the liberty to know the returned type of a call to the allocator. So, mainly in these added routines, we removed the object size parameter from the routine as allocator can calculate the size of the object from the returned type.242 243 \subs ubsection T * malloc( void )192 @alignment@: the alignment to which the dynamic array needs to be aligned. 193 \item 194 @dim@: number of objects in the array 195 \item 196 @elemSize@: size of the object in the array. 197 \end{itemize} 198 It returns a dynamic array of objects that has the capacity to contain dim number of objects of the size of elemSize. The returned dynamic array is aligned to the given alignment and is 0 filled. On failure, it returns a @NULL@ pointer. 199 200 \subsection{\lstinline{size_t malloc_alignment( void * addr )}} 201 @malloc_alignment@ returns the alignment of a currently allocated dynamic object. It allows the programmer in memory management and personal bookkeeping. It helps the programmer in verofying the alignment of a dynamic object especially in a scenerio similar to prudcer-consumer where a producer allocates a dynamic object and the consumer needs to assure that the dynamic object was allocated with the required alignment. 202 \paragraph{Usage} 203 @malloc_alignment@ takes one parameters. 204 205 \begin{itemize} 206 \item 207 @addr@: the address of the currently allocated dynamic object. 208 \end{itemize} 209 @malloc_alignment@ returns the alignment of the given dynamic object. On failure, it return the value of default alignment of the uHeapLmmm allocator. 210 211 \subsection{\lstinline{bool malloc_zero_fill( void * addr )}} 212 @malloc_zero_fill@ returns whether a currently allocated dynamic object was initially zero filled at the time of allocation. It allows the programmer in memory management and personal bookkeeping. It helps the programmer in verifying the zero filled property of a dynamic object especially in a scenerio similar to prudcer-consumer where a producer allocates a dynamic object and the consumer needs to assure that the dynamic object was zero filled at the time of allocation. 213 \paragraph{Usage} 214 @malloc_zero_fill@ takes one parameters. 215 216 \begin{itemize} 217 \item 218 @addr@: the address of the currently allocated dynamic object. 219 \end{itemize} 220 @malloc_zero_fill@ returns true if the dynamic object was initially zero filled and return false otherwise. On failure, it returns false. 221 222 \subsection{\lstinline{size_t malloc_size( void * addr )}} 223 @malloc_size@ returns the allocation size of a currently allocated dynamic object. It allows the programmer in memory management and personal bookkeeping. It helps the programmer in verofying the alignment of a dynamic object especially in a scenerio similar to prudcer-consumer where a producer allocates a dynamic object and the consumer needs to assure that the dynamic object was allocated with the required size. Its current alternate in the other allocators is @malloc_usable_size@. But, @malloc_size@ is different from @malloc_usable_size@ as @malloc_usabe_size@ returns the total data capacity of dynamic object including the extra space at the end of the dynamic object. On the other hand, @malloc_size@ returns the size that was given to the allocator at the allocation of the dynamic object. This size is updated when an object is realloced, resized, or passed through a similar allocator routine. 224 \paragraph{Usage} 225 @malloc_size@ takes one parameters. 226 227 \begin{itemize} 228 \item 229 @addr@: the address of the currently allocated dynamic object. 230 \end{itemize} 231 @malloc_size@ returns the allocation size of the given dynamic object. On failure, it return zero. 232 233 \subsection{\lstinline{void * realloc( void * oaddr, size_t nalign, size_t size )}} 234 This @realloc@ is an extension of the default @realloc@ (FIX ME: cite default @realloc@). In addition to reallocating an old object and preserving the data in old object, it can also realign the old object to a new alignment requirement. 235 \paragraph{Usage} 236 This @realloc@ takes three parameters. It takes an additional parameter of nalign as compared to the default @realloc@. 237 238 \begin{itemize} 239 \item 240 @oaddr@: the address of the old object that needs to be reallocated. 241 \item 242 @nalign@: the new alignment to which the old object needs to be realigned. 243 \item 244 @size@: the new size requirement of the to which the old object needs to be resized. 245 \end{itemize} 246 It returns an object with the size and alignment given in the parameters that preserves the data in the old object. On failure, it returns a @NULL@ pointer. 247 248 \subsection{\CFA Malloc Interface} 249 We added some routines to the malloc interface of \CFA. These routines can only be used in \CFA and not in our standalone uHeapLmmm allocator as these routines use some features that are only provided by \CFA and not by C. It makes the allocator even more usable to the programmers. 250 \CFA provides the liberty to know the returned type of a call to the allocator. So, mainly in these added routines, we removed the object size parameter from the routine as allocator can calculate the size of the object from the returned type. 251 252 \subsection{\lstinline{T * malloc( void )}} 244 253 This malloc is a simplified polymorphic form of defualt malloc (FIX ME: cite malloc). It does not take any parameter as compared to default malloc that takes one parameter. 245 254 \paragraph{Usage} 246 255 This malloc takes no parameters. 247 It returns a dynamic object of the size of type T. On failure, it return NULLpointer.248 249 \subs ubsection T * aalloc( size\_t dim )256 It returns a dynamic object of the size of type @T@. On failure, it returns a @NULL@ pointer. 257 258 \subsection{\lstinline{T * aalloc( size_t dim )}} 250 259 This aalloc is a simplified polymorphic form of above aalloc (FIX ME: cite aalloc). It takes one parameter as compared to the above aalloc that takes two parameters. 251 260 \paragraph{Usage} … … 254 263 \begin{itemize} 255 264 \item 256 dim: required number of objects in the array.257 \end{itemize} 258 It returns a dynamic object that has the capacity to contain dim number of objects, each of the size of type T. On failure, it return NULLpointer.259 260 \subs ubsection T * calloc( size\_t dim )265 @dim@: required number of objects in the array. 266 \end{itemize} 267 It returns a dynamic object that has the capacity to contain dim number of objects, each of the size of type @T@. On failure, it returns a @NULL@ pointer. 268 269 \subsection{\lstinline{T * calloc( size_t dim )}} 261 270 This calloc is a simplified polymorphic form of defualt calloc (FIX ME: cite calloc). It takes one parameter as compared to the default calloc that takes two parameters. 262 271 \paragraph{Usage} … … 265 274 \begin{itemize} 266 275 \item 267 dim: required number of objects in the array.268 \end{itemize} 269 It returns a dynamic object that has the capacity to contain dim number of objects, each of the size of type T. On failure, it return NULLpointer.270 271 \subs ubsection T * resize( T * ptr, size\_t size )272 This resize is a simplified polymorphic form of above resize (FIX ME: cite resize with alignment). It takes two parameters as compared to the above resize that takes three parameters. It frees the programmer from explicitly mentioning the alignment of the allocation as CFA provides gives allocator the liberty to get the alignment of the returned type.276 @dim@: required number of objects in the array. 277 \end{itemize} 278 It returns a dynamic object that has the capacity to contain dim number of objects, each of the size of type @T@. On failure, it returns a @NULL@ pointer. 279 280 \subsection{\lstinline{T * resize( T * ptr, size_t size )}} 281 This resize is a simplified polymorphic form of above resize (FIX ME: cite resize with alignment). It takes two parameters as compared to the above resize that takes three parameters. It frees the programmer from explicitly mentioning the alignment of the allocation as \CFA provides gives allocator the liberty to get the alignment of the returned type. 273 282 \paragraph{Usage} 274 283 This resize takes two parameters. … … 276 285 \begin{itemize} 277 286 \item 278 ptr: address of the old object.279 \item 280 size: the required size of the new object.281 \end{itemize} 282 It returns a dynamic object of the size given in paramters. The returned object is aligned to the alignemtn of type T. On failure, it return NULLpointer.283 284 \subs ubsection T * realloc( T * ptr, size\_t size )285 This realloc is a simplified polymorphic form of defualt realloc (FIX ME: cite realloc with align). It takes two parameters as compared to the above realloc that takes three parameters. It frees the programmer from explicitly mentioning the alignment of the allocation asCFA provides gives allocator the liberty to get the alignment of the returned type.286 \paragraph{Usage} 287 This realloctakes two parameters.288 289 \begin{itemize} 290 \item 291 ptr: address of the old object.292 \item 293 size: the required size of the new object.294 \end{itemize} 295 It returns a dynamic object of the size given in paramters that preserves the data in the given object. The returned object is aligned to the alignemtn of type T. On failure, it return NULLpointer.296 297 \subs ubsection T * memalign( size\_t align )287 @ptr@: address of the old object. 288 \item 289 @size@: the required size of the new object. 290 \end{itemize} 291 It returns a dynamic object of the size given in paramters. The returned object is aligned to the alignemtn of type @T@. On failure, it returns a @NULL@ pointer. 292 293 \subsection{\lstinline{T * realloc( T * ptr, size_t size )}} 294 This @realloc@ is a simplified polymorphic form of defualt @realloc@ (FIX ME: cite @realloc@ with align). It takes two parameters as compared to the above @realloc@ that takes three parameters. It frees the programmer from explicitly mentioning the alignment of the allocation as \CFA provides gives allocator the liberty to get the alignment of the returned type. 295 \paragraph{Usage} 296 This @realloc@ takes two parameters. 297 298 \begin{itemize} 299 \item 300 @ptr@: address of the old object. 301 \item 302 @size@: the required size of the new object. 303 \end{itemize} 304 It returns a dynamic object of the size given in paramters that preserves the data in the given object. The returned object is aligned to the alignemtn of type @T@. On failure, it returns a @NULL@ pointer. 305 306 \subsection{\lstinline{T * memalign( size_t align )}} 298 307 This memalign is a simplified polymorphic form of defualt memalign (FIX ME: cite memalign). It takes one parameters as compared to the default memalign that takes two parameters. 299 308 \paragraph{Usage} … … 302 311 \begin{itemize} 303 312 \item 304 align: the required alignment of the dynamic object.305 \end{itemize} 306 It returns a dynamic object of the size of type T that is aligned to given parameter align. On failure, it return NULLpointer.307 308 \subs ubsection T * amemalign( size\_t align, size\_t dim )313 @align@: the required alignment of the dynamic object. 314 \end{itemize} 315 It returns a dynamic object of the size of type @T@ that is aligned to given parameter align. On failure, it returns a @NULL@ pointer. 316 317 \subsection{\lstinline{T * amemalign( size_t align, size_t dim )}} 309 318 This amemalign is a simplified polymorphic form of above amemalign (FIX ME: cite amemalign). It takes two parameter as compared to the above amemalign that takes three parameters. 310 319 \paragraph{Usage} … … 313 322 \begin{itemize} 314 323 \item 315 align: required alignment of the dynamic array.316 \item 317 dim: required number of objects in the array.318 \end{itemize} 319 It returns a dynamic object that has the capacity to contain dim number of objects, each of the size of type T. The returned object is aligned to the given parameter align. On failure, it return NULLpointer.320 321 \subs ubsection T * cmemalign( size\_t align, size\_t dim )324 @align@: required alignment of the dynamic array. 325 \item 326 @dim@: required number of objects in the array. 327 \end{itemize} 328 It returns a dynamic object that has the capacity to contain dim number of objects, each of the size of type @T@. The returned object is aligned to the given parameter align. On failure, it returns a @NULL@ pointer. 329 330 \subsection{\lstinline{T * cmemalign( size_t align, size_t dim )}} 322 331 This cmemalign is a simplified polymorphic form of above cmemalign (FIX ME: cite cmemalign). It takes two parameter as compared to the above cmemalign that takes three parameters. 323 332 \paragraph{Usage} … … 326 335 \begin{itemize} 327 336 \item 328 align: required alignment of the dynamic array. 329 \item 330 dim: required number of objects in the array. 331 \end{itemize} 332 It returns a dynamic object that has the capacity to contain dim number of objects, each of the size of type T. The returned object is aligned to the given parameter align and is zero filled. On failure, it return NULL pointer. 333 334 \subsubsection T * aligned\_alloc( size\_t align ) 335 This aligned\_alloc is a simplified polymorphic form of defualt aligned\_alloc (FIX ME: cite aligned\_alloc). It takes one parameter as compared to the default aligned\_alloc that takes two parameters. 336 \paragraph{Usage} 337 This aligned\_alloc takes one parameter. 338 339 \begin{itemize} 340 \item 341 align: required alignment of the dynamic object. 342 \end{itemize} 343 It returns a dynamic object of the size of type T that is aligned to the given parameter. On failure, it return NULL pointer. 344 345 \subsubsection int posix\_memalign( T ** ptr, size\_t align ) 346 This posix\_memalign is a simplified polymorphic form of defualt posix\_memalign (FIX ME: cite posix\_memalign). It takes two parameters as compared to the default posix\_memalign that takes three parameters. 347 \paragraph{Usage} 348 This posix\_memalign takes two parameter. 349 350 \begin{itemize} 351 \item 352 ptr: variable address to store the address of the allocated object. 353 \item 354 align: required alignment of the dynamic object. 355 \end{itemize} 356 357 It stores address of the dynamic object of the size of type T in given parameter ptr. This object is aligned to the given parameter. On failure, it return NULL pointer. 358 359 \subsubsection T * valloc( void ) 360 This valloc is a simplified polymorphic form of defualt valloc (FIX ME: cite valloc). It takes no parameters as compared to the default valloc that takes one parameter. 361 \paragraph{Usage} 362 valloc takes no parameters. 363 It returns a dynamic object of the size of type T that is aligned to the page size. On failure, it return NULL pointer. 364 365 \subsubsection T * pvalloc( void ) 366 This pcvalloc is a simplified polymorphic form of defualt pcvalloc (FIX ME: cite pcvalloc). It takes no parameters as compared to the default pcvalloc that takes one parameter. 367 \paragraph{Usage} 368 pvalloc takes no parameters. 369 It returns a dynamic object of the size that is calcutaed by rouding the size of type T. The returned object is also aligned to the page size. On failure, it return NULL pointer. 370 371 \subsection Alloc Interface 372 In addition to improve allocator interface both for CFA and our standalone allocator uHeapLmmm in C. We also added a new alloc interface in CFA that increases usability of dynamic memory allocation. 337 @align@: required alignment of the dynamic array. 338 \item 339 @dim@: required number of objects in the array. 340 \end{itemize} 341 It returns a dynamic object that has the capacity to contain dim number of objects, each of the size of type @T@. The returned object is aligned to the given parameter align and is zero filled. On failure, it returns a @NULL@ pointer. 342 343 \subsection{\lstinline{T * aligned_alloc( size_t align )}} 344 This @aligned_alloc@ is a simplified polymorphic form of defualt @aligned_alloc@ (FIX ME: cite @aligned_alloc@). It takes one parameter as compared to the default @aligned_alloc@ that takes two parameters. 345 \paragraph{Usage} 346 This @aligned_alloc@ takes one parameter. 347 348 \begin{itemize} 349 \item 350 @align@: required alignment of the dynamic object. 351 \end{itemize} 352 It returns a dynamic object of the size of type @T@ that is aligned to the given parameter. On failure, it returns a @NULL@ pointer. 353 354 \subsection{\lstinline{int posix_memalign( T ** ptr, size_t align )}} 355 This @posix_memalign@ is a simplified polymorphic form of defualt @posix_memalign@ (FIX ME: cite @posix_memalign@). It takes two parameters as compared to the default @posix_memalign@ that takes three parameters. 356 \paragraph{Usage} 357 This @posix_memalign@ takes two parameter. 358 359 \begin{itemize} 360 \item 361 @ptr@: variable address to store the address of the allocated object. 362 \item 363 @align@: required alignment of the dynamic object. 364 \end{itemize} 365 366 It stores address of the dynamic object of the size of type @T@ in given parameter ptr. This object is aligned to the given parameter. On failure, it returns a @NULL@ pointer. 367 368 \subsection{\lstinline{T * valloc( void )}} 369 This @valloc@ is a simplified polymorphic form of defualt @valloc@ (FIX ME: cite @valloc@). It takes no parameters as compared to the default @valloc@ that takes one parameter. 370 \paragraph{Usage} 371 @valloc@ takes no parameters. 372 It returns a dynamic object of the size of type @T@ that is aligned to the page size. On failure, it returns a @NULL@ pointer. 373 374 \subsection{\lstinline{T * pvalloc( void )}} 375 \paragraph{Usage} 376 @pvalloc@ takes no parameters. 377 It returns a dynamic object of the size that is calcutaed by rouding the size of type @T@. The returned object is also aligned to the page size. On failure, it returns a @NULL@ pointer. 378 379 \subsection{Alloc Interface} 380 In addition to improve allocator interface both for \CFA and our standalone allocator uHeapLmmm in C. We also added a new alloc interface in \CFA that increases usability of dynamic memory allocation. 373 381 This interface helps programmers in three major ways. 374 382 … … 379 387 Parametre Positions: alloc interface frees programmers from remembering parameter postions in call to routines. 380 388 \item 381 Object Size: alloc interface does not require programmer to mention the object size as CFA allows allocator to determince the object size from returned type of alloc call.382 \end{itemize} 383 384 Alloc interface uses polymorphism, backtick routines (FIX ME: cite backtick) and ttype parameters of CFA (FIX ME: cite ttype) to provide a very simple dynamic memory allocation interface to the programmers. The new interfece has just one routine name alloc that can be used to perform a wide range of dynamic allocations. The parameters use backtick functions to provide a similar-to named parameters feature for our alloc interface so that programmers do not have to remember parameter positions in alloc call except the position of dimension (dim) parameter.385 386 \subs ubsection{Routine: T * alloc( ... )}387 Call to alloc wihout any parameter returns one object of size of type Tallocated dynamically.389 Object Size: alloc interface does not require programmer to mention the object size as \CFA allows allocator to determince the object size from returned type of alloc call. 390 \end{itemize} 391 392 Alloc interface uses polymorphism, backtick routines (FIX ME: cite backtick) and ttype parameters of \CFA (FIX ME: cite ttype) to provide a very simple dynamic memory allocation interface to the programmers. The new interfece has just one routine name alloc that can be used to perform a wide range of dynamic allocations. The parameters use backtick functions to provide a similar-to named parameters feature for our alloc interface so that programmers do not have to remember parameter positions in alloc call except the position of dimension (dim) parameter. 393 394 \subsection{Routine: \lstinline{T * alloc( ... )}} 395 Call to alloc wihout any parameter returns one object of size of type @T@ allocated dynamically. 388 396 Only the dimension (dim) parameter for array allocation has the fixed position in the alloc routine. If programmer wants to allocate an array of objects that the required number of members in the array has to be given as the first parameter to the alloc routine. 389 alocc routine accepts six kinds of arguments. Using different combinations of tha parameters, different kind of allocations can be performed. Any combincation of parameters can be used together except `realloc and `resize that should not be used simultanously in one call to routine as it creates ambiguity about whether to reallocate or resize a currently allocated dynamic object. If both `resize and `reallocare used in a call to alloc then the latter one will take effect or unexpected resulted might be produced.397 alocc routine accepts six kinds of arguments. Using different combinations of tha parameters, different kind of allocations can be performed. Any combincation of parameters can be used together except @`realloc@ and @`resize@ that should not be used simultanously in one call to routine as it creates ambiguity about whether to reallocate or resize a currently allocated dynamic object. If both @`resize@ and @`realloc@ are used in a call to alloc then the latter one will take effect or unexpected resulted might be produced. 390 398 391 399 \paragraph{Dim} 392 This is the only parameter in the alloc routine that has a fixed-position and it is also the only parameter that does not use a backtick function. It has to be passed at the first position to alloc call in-case of an array allocation of objects of type T.393 It represents the required number of members in the array allocation as in CFA's aalloc (FIX ME: cite aalloc).394 This parameter should be of type size\_t.395 396 Example: int a = alloc( 5 )400 This is the only parameter in the alloc routine that has a fixed-position and it is also the only parameter that does not use a backtick function. It has to be passed at the first position to alloc call in-case of an array allocation of objects of type @T@. 401 It represents the required number of members in the array allocation as in \CFA's aalloc (FIX ME: cite aalloc). 402 This parameter should be of type @size_t@. 403 404 Example: @int a = alloc( 5 )@ 397 405 This call will return a dynamic array of five integers. 398 406 399 407 \paragraph{Align} 400 This parameter is position-free and uses a backtick routine align ( `align). The parameter passed with `align should be of type size\_t. If the alignment parameter is not a power of two or is less than the default alignment of the allocator (that can be found out using routine libAlign inCFA) then the passed alignment parameter will be rejected and the default alignment will be used.401 402 Example: int b = alloc( 5 , 64`align )408 This parameter is position-free and uses a backtick routine align (@`align@). The parameter passed with @`align@ should be of type @size_t@. If the alignment parameter is not a power of two or is less than the default alignment of the allocator (that can be found out using routine libAlign in \CFA) then the passed alignment parameter will be rejected and the default alignment will be used. 409 410 Example: @int b = alloc( 5 , 64`align )@ 403 411 This call will return a dynamic array of five integers. It will align the allocated object to 64. 404 412 405 413 \paragraph{Fill} 406 This parameter is position-free and uses a backtick routine fill ( `fill). In case of realloc, only the extra space after copying the data in the old object will be filled with given parameter.414 This parameter is position-free and uses a backtick routine fill (@`fill@). In case of @realloc@, only the extra space after copying the data in the old object will be filled with given parameter. 407 415 Three types of parameters can be passed using `fill. 408 416 409 417 \begin{itemize} 410 418 \item 411 char: A char can be passed with `fillto fill the whole dynamic allocation with the given char recursively till the end of required allocation.412 \item 413 Object of returned type: An object of type of returned type can be passed with `fillto fill the whole dynamic allocation with the given object recursively till the end of required allocation.414 \item 415 Dynamic object of returned type: A dynamic object of type of returned type can be passed with `fill to fill the dynamic allocation with the given dynamic object. In this case, the allocated memory is not filled recursively till the end of allocation. The filling happen untill the end object passed to `fillor the end of requested allocation reaches.416 \end{itemize} 417 418 Example: int b = alloc( 5 , 'a'`fill )419 @char@: A char can be passed with @`fill@ to fill the whole dynamic allocation with the given char recursively till the end of required allocation. 420 \item 421 Object of returned type: An object of type of returned type can be passed with @`fill@ to fill the whole dynamic allocation with the given object recursively till the end of required allocation. 422 \item 423 Dynamic object of returned type: A dynamic object of type of returned type can be passed with @`fill@ to fill the dynamic allocation with the given dynamic object. In this case, the allocated memory is not filled recursively till the end of allocation. The filling happen untill the end object passed to @`fill@ or the end of requested allocation reaches. 424 \end{itemize} 425 426 Example: @int b = alloc( 5 , 'a'`fill )@ 419 427 This call will return a dynamic array of five integers. It will fill the allocated object with character 'a' recursively till the end of requested allocation size. 420 428 421 Example: int b = alloc( 5 , 4`fill )429 Example: @int b = alloc( 5 , 4`fill )@ 422 430 This call will return a dynamic array of five integers. It will fill the allocated object with integer 4 recursively till the end of requested allocation size. 423 431 424 Example: int b = alloc( 5 , a`fill ) where ais a pointer of int type432 Example: @int b = alloc( 5 , a`fill )@ where @a@ is a pointer of int type 425 433 This call will return a dynamic array of five integers. It will copy data in a to the returned object non-recursively untill end of a or the newly allocated object is reached. 426 434 427 435 \paragraph{Resize} 428 This parameter is position-free and uses a backtick routine resize ( `resize). It represents the old dynamic object (oaddr) that the programmer wants to436 This parameter is position-free and uses a backtick routine resize (@`resize@). It represents the old dynamic object (oaddr) that the programmer wants to 429 437 \begin{itemize} 430 438 \item … … 435 443 fill with something. 436 444 \end{itemize} 437 The data in old dynamic object will not be preserved in the new object. The type of object passed to `resizeand the returned type of alloc call can be different.438 439 Example: int b = alloc( 5 , a`resize )445 The data in old dynamic object will not be preserved in the new object. The type of object passed to @`resize@ and the returned type of alloc call can be different. 446 447 Example: @int b = alloc( 5 , a`resize )@ 440 448 This call will resize object a to a dynamic array that can contain 5 integers. 441 449 442 Example: int b = alloc( 5 , a`resize , 32`align )450 Example: @int b = alloc( 5 , a`resize , 32`align )@ 443 451 This call will resize object a to a dynamic array that can contain 5 integers. The returned object will also be aligned to 32. 444 452 445 Example: int b = alloc( 5 , a`resize , 32`align , 2`fill)453 Example: @int b = alloc( 5 , a`resize , 32`align , 2`fill )@ 446 454 This call will resize object a to a dynamic array that can contain 5 integers. The returned object will also be aligned to 32 and will be filled with 2. 447 455 448 456 \paragraph{Realloc} 449 This parameter is position-free and uses a backtick routine realloc (`realloc). It represents the old dynamic object (oaddr) that the programmer wants to457 This parameter is position-free and uses a backtick routine @realloc@ (@`realloc@). It represents the old dynamic object (oaddr) that the programmer wants to 450 458 \begin{itemize} 451 459 \item … … 456 464 fill with something. 457 465 \end{itemize} 458 The data in old dynamic object will be preserved in the new object. The type of object passed to `reallocand the returned type of alloc call cannot be different.459 460 Example: int b = alloc( 5 , a`realloc )466 The data in old dynamic object will be preserved in the new object. The type of object passed to @`realloc@ and the returned type of alloc call cannot be different. 467 468 Example: @int b = alloc( 5 , a`realloc )@ 461 469 This call will realloc object a to a dynamic array that can contain 5 integers. 462 470 463 Example: int b = alloc( 5 , a`realloc , 32`align )471 Example: @int b = alloc( 5 , a`realloc , 32`align )@ 464 472 This call will realloc object a to a dynamic array that can contain 5 integers. The returned object will also be aligned to 32. 465 473 466 Example: int b = alloc( 5 , a`realloc , 32`align , 2`fill)474 Example: @int b = alloc( 5 , a`realloc , 32`align , 2`fill )@ 467 475 This call will resize object a to a dynamic array that can contain 5 integers. The returned object will also be aligned to 32. The extra space after copying data of a to the returned object will be filled with 2. -
doc/theses/mubeen_zulfiqar_MMath/background.tex
rf5a51db rcc7bbe6 1 1 \chapter{Background} 2 3 4 A program dynamically allocates and deallocates the storage for a variable, referred to as an \newterm{object}, through calls such as @malloc@ and @free@ in C, and @new@ and @delete@ in \CC. 5 Space for each allocated object comes from the dynamic-allocation zone. 6 A \newterm{memory allocator} contains a complex data-structure and code that manages the layout of objects in the dynamic-allocation zone. 7 The management goals are to make allocation/deallocation operations as fast as possible while densely packing objects to make efficient use of memory. 8 Objects in C/\CC cannot be moved to aid the packing process, only adjacent free storage can be \newterm{coalesced} into larger free areas. 9 The allocator grows or shrinks the dynamic-allocation zone to obtain storage for objects and reduce memory usage via operating-system calls, such as @mmap@ or @sbrk@ in UNIX. 10 11 12 \section{Allocator Components} 13 \label{s:AllocatorComponents} 14 15 \VRef[Figure]{f:AllocatorComponents} shows the two important data components for a memory allocator, management and storage, collectively called the \newterm{heap}. 16 The \newterm{management data} is a data structure located at a known memory address and contains all information necessary to manage the storage data. 17 The management data starts with fixed-sized information in the static-data memory that flows into the dynamic-allocation memory. 18 The \newterm{storage data} is composed of allocated and freed objects, and \newterm{reserved memory}. 19 Allocated objects (white) are variable sized, and allocated and maintained by the program; 20 \ie only the program knows the location of allocated storage, not the memory allocator. 21 \begin{figure}[h] 22 \centering 23 \input{AllocatorComponents} 24 \caption{Allocator Components (Heap)} 25 \label{f:AllocatorComponents} 26 \end{figure} 27 Freed objects (light grey) are memory deallocated by the program, which are linked into one or more lists facilitating easy location for new allocations. 28 Often the free list is chained internally so it does not consume additional storage, \ie the link fields are placed at known locations in the unused memory blocks. 29 Reserved memory (dark grey) is one or more blocks of memory obtained from the operating system but not yet allocated to the program; 30 if there are multiple reserved blocks, they are also chained together, usually internally. 31 32 Allocated and freed objects typically have additional management data embedded within them. 33 \VRef[Figure]{f:AllocatedObject} shows an allocated object with a header, trailer, and alignment padding and spacing around the object. 34 The header contains information about the object, \eg size, type, etc. 35 The trailer may be used to simplify an allocation implementation, \eg coalescing, and/or for security purposes to mark the end of an object. 36 An object may be preceded by padding to ensure proper alignment. 37 Some algorithms quantize allocation requests into distinct sizes resulting in additional spacing after objects less than the quantized value. 38 When padding and spacing are necessary, neither can be used to satisfy a future allocation request while the current allocation exists. 39 A free object also contains management data, \eg size, chaining, etc. 40 The amount of management data for a free node defines the minimum allocation size, \eg if 16 bytes are needed for a free-list node, any allocation request less than 16 bytes must be rounded up, otherwise the free list cannot use internal chaining. 41 The information in an allocated or freed object is overwritten when it transitions from allocated to freed and vice-versa by new management information and possibly data. 42 43 \begin{figure} 44 \centering 45 \input{AllocatedObject} 46 \caption{Allocated Object} 47 \label{f:AllocatedObject} 48 \end{figure} 49 50 51 \section{Single-Threaded Memory-Allocator} 52 \label{s:SingleThreadedMemoryAllocator} 53 54 A single-threaded memory-allocator does not run any threads itself, but is used by a single-threaded program. 55 Because the memory allocator is only executed by a single thread, concurrency issues do not exist. 56 The primary issues in designing a single-threaded memory-allocator are fragmentation and locality. 57 58 59 \subsection{Fragmentation} 60 \label{s:Fragmentation} 61 62 Fragmentation is memory requested from the operating system but not used by the program; 63 hence, allocated objects are not fragmentation. 64 \VRef[Figure]{f:InternalExternalFragmentation}) shows fragmentation is divided into two forms: internal or external. 65 66 \begin{figure} 67 \centering 68 \input{IntExtFragmentation} 69 \caption{Internal and External Fragmentation} 70 \label{f:InternalExternalFragmentation} 71 \end{figure} 72 73 \newterm{Internal fragmentation} is memory space that is allocated to the program, but is not intended to be accessed by the program, such as headers, trailers, padding, and spacing around an allocated object. 74 This memory is typically used by the allocator for management purposes or required by the architecture for correctness, \eg alignment. 75 Internal fragmentation is problematic when management space is a significant proportion of an allocated object. 76 For example, if internal fragmentation is as large as the object being managed, then the memory usage for that object is doubled. 77 An allocator should strive to keep internal management information to a minimum. 78 79 \newterm{External fragmentation} is all memory space reserved from the operating system but not allocated to the program~\cite{Wilson95,Lim98,Siebert00}, which includes freed objects, all external management data, and reserved memory. 80 This memory is problematic in two ways: heap blowup and highly fragmented memory. 81 \newterm{Heap blowup} occurs when memory freed by the program is not reused for future allocations leading to potentially unbounded external fragmentation growth~\cite{Berger00}. 82 Heap blowup can occur due to allocator policies that are too restrictive in reusing freed memory and/or no coalescing of free storage. 83 Memory can become \newterm{highly fragmented} after multiple allocations and deallocations of objects. 84 \VRef[Figure]{f:MemoryFragmentation} shows an example of how a small block of memory fragments as objects are allocated and deallocated over time. 85 Blocks of free memory become smaller and non-contiguous making them less useful in serving allocation requests. 86 Memory is highly fragmented when the sizes of most free blocks are unusable. 87 For example, \VRef[Figure]{f:Contiguous} and \VRef[Figure]{f:HighlyFragmented} have the same quantity of external fragmentation, but \VRef[Figure]{f:HighlyFragmented} is highly fragmented. 88 If there is a request to allocate a large object, \VRef[Figure]{f:Contiguous} is more likely to be able to satisfy it with existing free memory, while \VRef[Figure]{f:HighlyFragmented} likely has to request more memory from the operating system. 89 90 \begin{figure} 91 \centering 92 \input{MemoryFragmentation} 93 \caption{Memory Fragmentation} 94 \label{f:MemoryFragmentation} 95 \vspace{10pt} 96 \subfigure[Contiguous]{ 97 \input{ContigFragmentation} 98 \label{f:Contiguous} 99 } % subfigure 100 \subfigure[Highly Fragmented]{ 101 \input{NonContigFragmentation} 102 \label{f:HighlyFragmented} 103 } % subfigure 104 \caption{Fragmentation Quality} 105 \label{f:FragmentationQuality} 106 \end{figure} 107 108 For a single-threaded memory allocator, three basic approaches for controlling fragmentation have been identified~\cite{Johnstone99}. 109 The first approach is a \newterm{sequential-fit algorithm} with one list of free objects that is searched for a block large enough to fit a requested object size. 110 Different search policies determine the free object selected, \eg the first free object large enough or closest to the requested size. 111 Any storage larger than the request can become spacing after the object or be split into a smaller free object. 112 The cost of the search depends on the shape and quality of the free list, \eg a linear versus a binary-tree free-list, a sorted versus unsorted free-list. 113 114 The second approach is a \newterm{segregated} or \newterm{binning algorithm} with a set of lists for different sized freed objects. 115 When an object is allocated, the requested size is rounded up to the nearest bin-size, possibly with spacing after the object. 116 A binning algorithm is fast at finding free memory of the appropriate size and allocating it, since the first free object on the free list is used. 117 The fewer bin-sizes, the fewer lists need to be searched and maintained; 118 however, the bin sizes are less likely to closely fit the requested object size, leading to more internal fragmentation. 119 The more bin-sizes, the longer the search and the less likely free objects are to be reused, leading to more external fragmentation and potentially heap blowup. 120 A variation of the binning algorithm allows objects to be allocated to the requested size, but when an object is freed, it is placed on the free list of the next smallest or equal bin-size. 121 For example, with bin sizes of 8 and 16 bytes, a request for 12 bytes allocates only 12 bytes, but when the object is freed, it is placed on the 8-byte bin-list. 122 For subsequent requests, the bin free-lists contain objects of different sizes, ranging from one bin-size to the next (8-16 in this example), and a sequential-fit algorithm may be used to find an object large enough for the requested size on the associated bin list. 123 124 The third approach is \newterm{splitting} and \newterm{coalescing algorithms}. 125 When an object is allocated, if there are no free objects of the requested size, a larger free object may be split into two smaller objects to satisfy the allocation request without obtaining more memory from the operating system. 126 For example, in the buddy system, a block of free memory is split into two equal chunks, one of those chunks is again split into two equal chunks, and so on until a block just large enough to fit the requested object is created. 127 When an object is deallocated it is coalesced with the objects immediately before and after it in memory, if they are free, turning them into one larger object. 128 Coalescing can be done eagerly at each deallocation or lazily when an allocation cannot be fulfilled. 129 In all cases, coalescing increases allocation latency, hence some allocations can cause unbounded delays during coalescing. 130 While coalescing does not reduce external fragmentation, the coalesced blocks improve fragmentation quality so future allocations are less likely to cause heap blowup. 131 Splitting and coalescing can be used with other algorithms to avoid highly fragmented memory. 132 133 134 \subsection{Locality} 135 \label{s:Locality} 136 137 The principle of locality recognizes that programs tend to reference a small set of data, called a working set, for a certain period of time, where a working set is composed of temporal and spatial accesses~\cite{Denning05}. 138 Temporal clustering implies a group of objects are accessed repeatedly within a short time period, while spatial clustering implies a group of objects physically close together (nearby addresses) are accessed repeatedly within a short time period. 139 Temporal locality commonly occurs during an iterative computation with a fix set of disjoint variables, while spatial locality commonly occurs when traversing an array. 140 141 Hardware takes advantage of temporal and spatial locality through multiple levels of caching (\ie memory hierarchy). 142 When an object is accessed, the memory physically located around the object is also cached with the expectation that the current and nearby objects will be referenced within a short period of time. 143 For example, entire cache lines are transferred between memory and cache and entire virtual-memory pages are transferred between disk and memory. 144 A program exhibiting good locality has better performance due to fewer cache misses and page faults\footnote{With the advent of large RAM memory, paging is becoming less of an issue in modern programming.}. 145 146 Temporal locality is largely controlled by how a program accesses its variables~\cite{Feng05}. 147 Nevertheless, a memory allocator can have some indirect influence on temporal locality and largely dictates spatial locality. 148 For temporal locality, an allocator can return storage for new allocations that was just freed as these memory locations are still \emph{warm} in the memory hierarchy. 149 For spatial locality, an allocator can place objects used together close together in memory, so the working set of the program fits into the fewest possible cache lines and pages. 150 However, usage patterns are different for every program as is the underlying hardware memory architecture; 151 hence, no general-purpose memory-allocator can provide ideal locality for every program on every computer. 152 153 There are a number of ways a memory allocator can degrade locality by increasing the working set. 154 For example, a memory allocator may access multiple free objects before finding one to satisfy an allocation request (\eg sequential-fit algorithm). 155 If there are a (large) number of objects accessed in very different areas of memory, the allocator may perturb the program's memory hierarchy causing multiple cache or page misses~\cite{Grunwald93}. 156 Another way locality can be degraded is by spatially separating related data. 157 For example, in a binning allocator, objects of different sizes are allocated from different bins that may be located in different pages of memory. 158 159 160 \section{Multi-Threaded Memory-Allocator} 161 \label{s:MultiThreadedMemoryAllocator} 162 163 A multi-threaded memory-allocator does not run any threads itself, but is used by a multi-threaded program. 164 In addition to single-threaded design issues of locality and fragmentation, a multi-threaded allocator may be simultaneously accessed by multiple threads, and hence, must deal with concurrency issues such as ymutual exclusion, false sharing, and additional forms of heap blowup. 165 166 167 \subsection{Mutual Exclusion} 168 \label{s:MutualExclusion} 169 170 \newterm{Mutual exclusion} provides sequential access to the shared management data of the heap. 171 There are two performance issues for mutual exclusion. 172 First is the overhead necessary to perform (at least) a hardware atomic operation every time a shared resource is accessed. 173 Second is when multiple threads contend for a shared resource simultaneously, and hence, some threads must wait until the resource is released. 174 Contention can be reduced in a number of ways: 175 using multiple fine-grained locks versus a single lock, spreading the contention across a number of locks; 176 using trylock and generating new storage if the lock is busy, yielding a classic space versus time tradeoff; 177 using one of the many lock-free approaches for reducing contention on basic data-structure operations~\cite{Oyama99}. 178 However, all of these approaches have degenerate cases where contention occurs. 179 180 181 \subsection{False Sharing} 182 \label{s:FalseSharing} 183 184 False sharing is a dynamic phenomenon leading to cache thrashing. 185 When two or more threads on separate CPUs simultaneously change different objects sharing a cache line, the change invalidates the other thread's associated cache, even though these threads may be uninterested in the other modified object. 186 False sharing can occur in three different ways: program induced, allocator-induced active, and allocator-induced passive; 187 a memory allocator can only affect the latter two. 188 189 \paragraph{\newterm{Program-induced false-sharing}} occurs when one thread passes an object sharing a cache line to another thread, and both threads modify the respective objects. 190 \VRef[Figure]{f:ProgramInducedFalseSharing} shows when Task$_1$ passes Object$_2$ to Task$_2$, a false-sharing situation forms when Task$_1$ modifies Object$_1$ and Task$_2$ modifies Object$_2$. 191 Changes to Object$_1$ invalidate CPU$_2$'s cache line, and changes to Object$_2$ invalidate CPU$_1$'s cache line. 192 193 \begin{figure} 194 \centering 195 \subfigure[Program-Induced False-Sharing]{ 196 \input{ProgramFalseSharing} 197 \label{f:ProgramInducedFalseSharing} 198 } \\ 199 \vspace{5pt} 200 \subfigure[Allocator-Induced Active False-Sharing]{ 201 \input{AllocInducedActiveFalseSharing} 202 \label{f:AllocatorInducedActiveFalseSharing} 203 } \\ 204 \vspace{5pt} 205 \subfigure[Allocator-Induced Passive False-Sharing]{ 206 \input{AllocInducedPassiveFalseSharing} 207 \label{f:AllocatorInducedPassiveFalseSharing} 208 } % subfigure 209 \caption{False Sharing} 210 \label{f:FalseSharing} 211 \end{figure} 212 213 \paragraph{\newterm{Allocator-induced active false-sharing}} occurs when objects are allocated within the same cache line but to different threads. 214 For example, in \VRef[Figure]{f:AllocatorInducedActiveFalseSharing}, each task allocates an object and loads a cache-line of memory into its associated cache. 215 Again, changes to Object$_1$ invalidate CPU$_2$'s cache line, and changes to Object$_2$ invalidate CPU$_1$'s cache line. 216 217 \paragraph{\newterm{Allocator-induced passive false-sharing}} is another form of allocator-induced false-sharing caused by program-induced false-sharing. 218 When an object in a program-induced false-sharing situation is deallocated, a future allocation of that object may cause passive false-sharing. 219 For example, in \VRef[Figure]{f:AllocatorInducedPassiveFalseSharing}, Task$_1$ passes Object$_2$ to Task$_2$, and Task$_2$ subsequently deallocates Object$_2$. 220 Allocator-induced passive false-sharing occurs when Object$_2$ is reallocated to Task$_2$ while Task$_1$ is still using Object$_1$. 221 222 223 \subsection{Heap Blowup} 224 \label{s:HeapBlowup} 225 226 In a multi-threaded program, heap blowup can occur when memory freed by one thread is inaccessible to other threads due to the allocation strategy. 227 Specific examples are presented in later sections. 228 229 230 \section{Multi-Threaded Memory-Allocator Features} 231 \label{s:MultiThreadedMemoryAllocatorFeatures} 232 233 By analyzing a suite of existing allocators (see \VRef{s:ExistingAllocators}), the following salient features were identified: 234 \begin{list}{\arabic{enumi}.}{\usecounter{enumi}\topsep=0.5ex\parsep=0pt\itemsep=0pt} 235 \item multiple heaps 236 \begin{list}{\alph{enumii})}{\usecounter{enumii}\topsep=0.5ex\parsep=0pt\itemsep=0pt} 237 \item with or without a global heap 238 \item with or without ownership 239 \end{list} 240 \item object containers 241 \begin{list}{\alph{enumii})}{\usecounter{enumii}\topsep=0.5ex\parsep=0pt\itemsep=0pt} 242 \item with or without ownership 243 \item fixed or variable sized 244 \item global or local free-lists 245 \end{list} 246 \item hybrid private/public heap 247 \item allocation buffer 248 \item lock-free operations 249 \end{list} 250 The first feature, multiple heaps, pertains to different kinds of heaps. 251 The second feature, object containers, pertains to the organization of objects within the storage area. 252 The remaining features apply to different parts of the allocator design or implementation. 253 254 255 \section{Multiple Heaps} 256 \label{s:MultipleHeaps} 257 258 A single-threaded allocator has at most one thread and heap, while a multi-threaded allocator has potentially multiple threads and heaps. 259 The multiple threads cause complexity, and multiple heaps are a mechanism for dealing with the complexity. 260 The spectrum ranges from multiple threads using a single heap, denoted as T:1 (see \VRef[Figure]{f:SingleHeap}), to multiple threads sharing multiple heaps, denoted as T:H (see \VRef[Figure]{f:SharedHeaps}), to one thread per heap, denoted as 1:1 (see \VRef[Figure]{f:PerThreadHeap}), which is almost back to a single-threaded allocator. 261 262 In the T:1 model, all threads allocate and deallocate objects from one heap. 263 Memory is obtained from the freed objects or reserved memory in the heap, or from the operating system (OS); 264 the heap may also return freed memory to the operating system. 265 The arrows indicate the direction memory conceptually moves for each kind of operation: allocation moves memory along the path from the heap/operating-system to the user application, while deallocation moves memory along the path from the application back to the heap/operating-system. 266 To safely handle concurrency, a single heap uses locking to provide mutual exclusion. 267 Whether using a single lock for all heap operations or fine-grained locking for different operations, a single heap may be a significant source of contention for programs with a large amount of memory allocation. 268 269 \begin{figure} 270 \centering 271 \subfigure[T:1]{ 272 % \input{SingleHeap.pstex_t} 273 \input{SingleHeap} 274 \label{f:SingleHeap} 275 } % subfigure 276 \vrule 277 \subfigure[T:H]{ 278 % \input{MultipleHeaps.pstex_t} 279 \input{SharedHeaps} 280 \label{f:SharedHeaps} 281 } % subfigure 282 \vrule 283 \subfigure[1:1]{ 284 % \input{MultipleHeapsGlobal.pstex_t} 285 \input{PerThreadHeap} 286 \label{f:PerThreadHeap} 287 } % subfigure 288 \caption{Multiple Heaps, Thread:Heap Relationship} 289 \end{figure} 290 291 In the T:H model, each thread allocates storage from several heaps depending on certain criteria, with the goal of reducing contention by spreading allocations/deallocations across the heaps. 292 The decision on when to create a new heap and which heap a thread allocates from depends on the allocator design. 293 The performance goal is to reduce the ratio of heaps to threads. 294 In general, locking is required, since more than one thread may concurrently access a heap during its lifetime, but contention is reduced because fewer threads access a specific heap. 295 Two examples of this approach are: 296 \begin{description} 297 \item[heap pool:] 298 Multiple heaps are managed in a pool, starting with a single or a fixed number of heaps that increase\-/decrease depending on contention\-/space issues. 299 At creation, a thread is associated with a heap from the pool. 300 When the thread attempts an allocation and its associated heap is locked (contention), it scans for an unlocked heap in the pool. 301 If an unlocked heap is found, the thread changes its association and uses that heap. 302 If all heaps are locked, the thread may create a new heap, use it, and then place the new heap into the pool; 303 or the thread can block waiting for a heap to become available. 304 While the heap-pool approach often minimizes the number of extant heaps, the worse case can result in more heaps than threads; 305 \eg if the number of threads is large at startup with many allocations creating a large number of heaps and then the number of threads reduces. 306 \item[kernel threads:] 307 Each kernel thread (CPU) executing an application has its own heap. 308 A thread allocates/deallocates from/to the heap of the kernel thread on which it is executing. 309 Special precautions must be taken to handle or prevent the case where a thread is preempted during allocation/deallocation and restarts execution on a different kernel thread~\cite{Dice02}. 310 \end{description} 311 312 In the 1:1 model (thread heaps), each thread has its own heap, which eliminates contention and locking because no thread accesses another thread's heap. 313 An additional benefit of thread heaps is improved locality due to better memory layout. 314 As each thread only allocates from its heap, all objects for a thread are more consolidated in the storage area for that heap, better utilizing each CPUs cache and accessing fewer pages. 315 In contrast, the T:H model spreads each thread's objects over a larger area in different heaps. 316 Thread heaps can also eliminate allocator-induced active false-sharing, if memory is acquired so it does not overlap at crucial boundaries with memory for another thread's heap. 317 For example, assume page boundaries coincide with cache line boundaries, then if a thread heap always acquires pages of memory, no two threads share a page or cache line unless pointers are passed among them. 318 Hence, allocator-induced active false-sharing in \VRef[Figure]{f:AllocatorInducedActiveFalseSharing} cannot occur because the memory for thread heaps never overlaps. 319 320 Threads using multiple heaps need to determine the specific heap to access for an allocation/deallocation, \ie association of thread to heap. 321 A number of techniques are used to establish this association. 322 The simplest approach is for each thread to have a pointer to its associated heap (or to administrative information that points to the heap), and this pointer changes if the association changes. 323 For threading systems with thread-local/specific storage, the heap pointer/data is created using this mechanism; 324 otherwise, the heap routines must use approaches like hashing the thread's stack-pointer or thread-id to find its associated heap. 325 326 The storage management for multiple heaps is more complex than for a single heap (see \VRef[Figure]{f:AllocatorComponents}). 327 \VRef[Figure]{f:MultipleHeapStorage} illustrates the general storage layout for multiple heaps. 328 Allocated and free objects are labelled by the thread or heap they are associated with. 329 (Links between free objects are removed for simplicity.) 330 The management information in the static zone must be able to locate all heaps in the dynamic zone. 331 The management information for the heaps must reside in the dynamic-allocation zone if there are a variable number. 332 Each heap in the dynamic zone is composed of a list of a free objects and a pointer to its reserved memory. 333 An alternative implementation is for all heaps to share one reserved memory, which requires a separate lock for the reserved storage to ensure mutual exclusion when acquiring new memory. 334 Because multiple threads can allocate/free/reallocate adjacent storage, all forms of false sharing may occur. 335 Other storage-management options are to use @mmap@ to set aside (large) areas of virtual memory for each heap and suballocate each heap's storage within that area. 336 337 \begin{figure} 338 \centering 339 \input{MultipleHeapsStorage} 340 \caption{Multiple-Heap Storage} 341 \label{f:MultipleHeapStorage} 342 \end{figure} 343 344 Multiple heaps increase external fragmentation as the ratio of heaps to threads increases, which can lead to heap blowup. 345 The external fragmentation experienced by a program with a single heap is now multiplied by the number of heaps, since each heap manages its own free storage and allocates its own reserved memory. 346 Additionally, objects freed by one heap cannot be reused by other threads, except indirectly by returning free memory to the operating system, which can be expensive. 347 (Depending on how the operating system provides dynamic storage to an application, returning storage may be difficult or impossible, \eg the contiguous @sbrk@ area in Unix.) 348 In the worst case, a program in which objects are allocated from one heap but deallocated to another heap means these freed objects are never reused. 349 350 Adding a \newterm{global heap} (G) attempts to reduce the cost of obtaining/returning memory among heaps (sharing) by buffering storage within the application address-space. 351 Now, each heap obtains and returns storage to/from the global heap rather than the operating system. 352 Storage is obtained from the global heap only when a heap allocation cannot be fulfilled, and returned to the global heap when a heap's free memory exceeds some threshold. 353 Similarly, the global heap buffers this memory, obtaining and returning storage to/from the operating system as necessary. 354 The global heap does not have its own thread and makes no internal allocation requests; 355 instead, it uses the application thread, which called one of the multiple heaps and then the global heap, to perform operations. 356 Hence, the worst-case cost of a memory operation includes all these steps. 357 With respect to heap blowup, the global heap provides an indirect mechanism to move free memory among heaps, which usually has a much lower cost than interacting with the operating system to achieve the same goal and is independent of the mechanism used by the operating system to present dynamic memory to an address space. 358 359 However, since any thread may indirectly perform a memory operation on the global heap, it is a shared resource that requires locking. 360 A single lock can be used to protect the global heap or fine-grained locking can be used to reduce contention. 361 In general, the cost is minimal since the majority of memory operations are completed without the use of the global heap. 362 363 For thread heaps, when a kernel/user-thread terminates, there are two options for handling its heap. 364 First is to free all objects in the heap to the global heap and destroy the thread heap. 365 Second is to place the thread heap on a list of available heaps and reuse it for a new kernel/user thread in the future. 366 Destroying the thread heap immediately may reduce external fragmentation sooner, since all free objects are freed to the global heap and may be reused by other threads. 367 Alternatively, reusing thread heaps may improve performance if the inheriting thread makes similar allocation requests as the thread that previously held the thread heap. 368 369 As multiple heaps are a key feature for a multi-threaded allocator, all further discussion assumes multiple heaps with a global heap to eliminate direct interaction with the operating system. 370 371 372 \subsection{Ownership} 373 \label{s:Ownership} 374 375 \newterm{Ownership} defines which heap an object is returned-to on deallocation. 376 If a thread returns an object to the heap it was originally allocated from, the heap has ownership of its objects. 377 Alternatively, a thread can return an object to the heap it is currently allocating from, which can be any heap accessible during a thread's lifetime. 378 \VRef[Figure]{f:HeapsOwnership} shows an example of multiple heaps (minus the global heap) with and without ownership. 379 Again, the arrows indicate the direction memory conceptually moves for each kind of operation. 380 For the 1:1 thread:heap relationship, a thread only allocates from its own heap, and without ownership, a thread only frees objects to its own heap, which means the heap is private to its owner thread and does not require any locking, called a \newterm{private heap}. 381 For the T:1/T:H models with or without ownership or the 1:1 model with ownership, a thread may free objects to different heaps, which makes each heap publicly accessible to all threads, called a \newterm{public heap}. 382 383 \begin{figure} 384 \centering 385 \subfigure[Ownership]{ 386 \input{MultipleHeapsOwnership} 387 } % subfigure 388 \hspace{0.25in} 389 \subfigure[No Ownership]{ 390 \input{MultipleHeapsNoOwnership} 391 } % subfigure 392 \caption{Heap Ownership} 393 \label{f:HeapsOwnership} 394 \end{figure} 395 396 \VRef[Figure]{f:MultipleHeapStorageOwnership} shows the effect of ownership on storage layout. 397 (For simplicity assume the heaps all use the same size of reserves storage.) 398 In contrast to \VRef[Figure]{f:MultipleHeapStorage}, each reserved area used by a heap only contains free storage for that particular heap because threads must return free objects back to the owner heap. 399 Again, because multiple threads can allocate/free/reallocate adjacent storage in the same heap, all forms of false sharing may occur. 400 The exception is for the 1:1 model if reserved memory does not overlap a cache-line because all allocated storage within a used area is associated with a single thread. 401 In this case, there is no allocator-induced active false-sharing (see \VRef[Figure]{f:AllocatorInducedActiveFalseSharing}) because two adjacent allocated objects used by different threads cannot share a cache-line. 402 As well, there is no allocator-induced passive false-sharing (see \VRef[Figure]{f:AllocatorInducedActiveFalseSharing}) because two adjacent allocated objects used by different threads cannot occur because free objects are returned to the owner heap. 403 % Passive false-sharing may still occur, if delayed ownership is used (see below). 404 405 \begin{figure} 406 \centering 407 \input{MultipleHeapsOwnershipStorage.pstex_t} 408 \caption{Multiple-Heap Storage with Ownership} 409 \label{f:MultipleHeapStorageOwnership} 410 \end{figure} 411 412 The main advantage of ownership is preventing heap blowup by returning storage for reuse by the owner heap. 413 Ownership prevents the classical problem where one thread performs allocations from one heap, passes the object to another thread, and the receiving thread deallocates the object to another heap, hence draining the initial heap of storage. 414 As well, allocator-induced passive false-sharing is eliminated because returning an object to its owner heap means it can never be allocated to another thread. 415 For example, in \VRef[Figure]{f:AllocatorInducedPassiveFalseSharing}, the deallocation by Task$_2$ returns Object$_2$ back to Task$_1$'s heap; 416 hence a subsequent allocation by Task$_2$ cannot return this storage. 417 The disadvantage of ownership is deallocating to another task's heap so heaps are no longer private and require locks to provide safe concurrent access. 418 419 Object ownership can be immediate or delayed, meaning objects may be returned to the owner heap immediately at deallocation or after some delay. 420 A thread may delay the return by storing objects it does not own on a separate free list. 421 Delaying can improve performance by batching objects for return to their owner heap and possibly reallocating these objects if storage runs out on the current heap. 422 However, reallocation can result in passive false-sharing. 423 For example, in \VRef[Figure]{f:AllocatorInducedPassiveFalseSharing}, Object$_2$ may be deallocated to Task$_2$'s heap initially. 424 If Task$_2$ reallocates Object$_2$ before it is returned to its owner heap, then passive false-sharing may occur. 425 426 427 \section{Object Containers} 428 \label{s:ObjectContainers} 429 430 One approach for managing objects places headers/trailers around individual objects, meaning memory adjacent to the object is reserved for object-management information, as shown in \VRef[Figure]{f:ObjectHeaders}. 431 However, this approach leads to poor cache usage, since only a portion of the cache line is holding useful information from the program's perspective. 432 Spatial locality is also negatively affected. 433 While the header and object are together in memory, they are generally not accessed together; 434 \eg the object is accessed by the program when it is allocated, while the header is accessed by the allocator when the object is free. 435 This difference in usage patterns can lead to poor cache locality~\cite{Feng05}. 436 Additionally, placing headers on individual objects can lead to redundant management information. 437 For example, if a header stores only the object size, then all objects with the same size have identical headers. 438 439 \begin{figure} 440 \centering 441 \subfigure[Object Headers]{ 442 \input{ObjectHeaders} 443 \label{f:ObjectHeaders} 444 } % subfigure 445 \\ 446 \subfigure[Object Container]{ 447 \input{Container} 448 \label{f:ObjectContainer} 449 } % subfigure 450 \caption{Header Placement} 451 \label{f:HeaderPlacement} 452 \end{figure} 453 454 An alternative approach for managing objects factors common header/trailer information to a separate location in memory and organizes associated free storage into blocks called \newterm{object containers} (\newterm{superblocks} in~\cite{Berger00}), as in \VRef[Figure]{f:ObjectContainer}. 455 The header for the container holds information necessary for all objects in the container; 456 a trailer may also be used at the end of the container. 457 Similar to the approach described for thread heaps in \VRef{s:MultipleHeaps}, if container boundaries do not overlap with memory of another container at crucial boundaries and all objects in a container are allocated to the same thread, allocator-induced active false-sharing is avoided. 458 459 The difficulty with object containers lies in finding the object header/trailer given only the object address, since that is normally the only information passed to the deallocation operation. 460 One way to do this is to start containers on aligned addresses in memory, then truncate the lower bits of the object address to obtain the header address (or round up and subtract the trailer size to obtain the trailer address). 461 For example, if an object at address 0xFC28\,EF08 is freed and containers are aligned on 64\,KB (0x0001\,0000) addresses, then the container header is at 0xFC28\,0000. 462 463 Normally, a container has homogeneous objects of fixed size, with fixed information in the header that applies to all container objects (\eg object size and ownership). 464 This approach greatly reduces internal fragmentation since far fewer headers are required, and potentially increases spatial locality as a cache line or page holds more objects since the objects are closer together due to the lack of headers. 465 However, although similar objects are close spatially within the same container, different sized objects are further apart in separate containers. 466 Depending on the program, this may or may not improve locality. 467 If the program uses several objects from a small number of containers in its working set, then locality is improved since fewer cache lines and pages are required. 468 If the program uses many containers, there is poor locality, as both caching and paging increase. 469 Another drawback is that external fragmentation may be increased since containers reserve space for objects that may never be allocated by the program, \ie there are often multiple containers for each size only partially full. 470 However, external fragmentation can be reduced by using small containers. 471 472 Containers with heterogeneous objects implies different headers describing them, which complicates the problem of locating a specific header solely by an address. 473 A couple of solutions can be used to implement containers with heterogeneous objects. 474 However, the problem with allowing objects of different sizes is that the number of objects, and therefore headers, in a single container is unpredictable. 475 One solution allocates headers at one end of the container, while allocating objects from the other end of the container; 476 when the headers meet the objects, the container is full. 477 Freed objects cannot be split or coalesced since this causes the number of headers to change. 478 The difficulty in this strategy remains in finding the header for a specific object; 479 in general, a search is necessary to find the object's header among the container headers. 480 A second solution combines the use of container headers and individual object headers. 481 Each object header stores the object's heterogeneous information, such as its size, while the container header stores the homogeneous information, such as the owner when using ownership. 482 This approach allows containers to hold different types of objects, but does not completely separate headers from objects. 483 The benefit of the container in this case is to reduce some redundant information that is factored into the container header. 484 485 In summary, object containers trade off internal fragmentation for external fragmentation by isolating common administration information to remove/reduce internal fragmentation, but at the cost of external fragmentation as some portion of a container may not be used and this portion is unusable for other kinds of allocations. 486 A consequence of this tradeoff is its effect on spatial locality, which can produce positive or negative results depending on program access-patterns. 487 488 489 \subsection{Container Ownership} 490 \label{s:ContainerOwnership} 491 492 Without ownership, objects in a container are deallocated to the heap currently associated with the thread that frees the object. 493 Thus, different objects in a container may be on different heap free-lists (see \VRef[Figure]{f:ContainerNoOwnershipFreelist}). 494 With ownership, all objects in a container belong to the same heap (see \VRef[Figure]{f:ContainerOwnershipFreelist}), so ownership of an object is determined by the container owner. 495 If multiple threads can allocate/free/reallocate adjacent storage in the same heap, all forms of false sharing may occur. 496 Only with the 1:1 model and ownership is active and passive false-sharing avoided (see \VRef{s:Ownership}). 497 Passive false-sharing may still occur, if delayed ownership is used. 498 499 \begin{figure} 500 \centering 501 \subfigure[No Ownership]{ 502 \input{ContainerNoOwnershipFreelist} 503 \label{f:ContainerNoOwnershipFreelist} 504 } % subfigure 505 \vrule 506 \subfigure[Ownership]{ 507 \input{ContainerOwnershipFreelist} 508 \label{f:ContainerOwnershipFreelist} 509 } % subfigure 510 \caption{Free-list Structure with Container Ownership} 511 \end{figure} 512 513 A fragmented heap has multiple containers that may be partially or completely free. 514 A completely free container can become reserved storage and be reset to allocate objects of a new size. 515 When a heap reaches a threshold of free objects, it moves some free storage to the global heap for reuse to prevent heap blowup. 516 Without ownership, when a heap frees objects to the global heap, individual objects must be passed, and placed on the global-heap's free-list. 517 Containers cannot be freed to the global heap unless completely free because 518 519 When a container changes ownership, the ownership of all objects within it change as well. 520 Moving a container involves moving all objects on the heap's free-list in that container to the new owner. 521 This approach can reduce contention for the global heap, since each request for objects from the global heap returns a container rather than individual objects. 522 523 Additional restrictions may be applied to the movement of containers to prevent active false-sharing. 524 For example, in \VRef[Figure]{f:ContainerFalseSharing1}, a container being used by Task$_1$ changes ownership, through the global heap. 525 In \VRef[Figure]{f:ContainerFalseSharing2}, when Task$_2$ allocates an object from the newly acquired container it is actively false-sharing even though no objects are passed among threads. 526 Note, once the object is freed by Task$_1$, no more false sharing can occur until the container changes ownership again. 527 To prevent this form of false sharing, container movement may be restricted to when all objects in the container are free. 528 One implementation approach that increases the freedom to return a free container to the operating system involves allocating containers using a call like @mmap@, which allows memory at an arbitrary address to be returned versus only storage at the end of the contiguous @sbrk@ area. 529 530 \begin{figure} 531 \centering 532 \subfigure[]{ 533 \input{ContainerFalseSharing1} 534 \label{f:ContainerFalseSharing1} 535 } % subfigure 536 \subfigure[]{ 537 \input{ContainerFalseSharing2} 538 \label{f:ContainerFalseSharing2} 539 } % subfigure 540 \caption{Active False-Sharing using Containers} 541 \label{f:ActiveFalseSharingContainers} 542 \end{figure} 543 544 Using containers with ownership increases external fragmentation since a new container for a requested object size must be allocated separately for each thread requesting it. 545 In \VRef[Figure]{f:ExternalFragmentationContainerOwnership}, using object ownership allocates 80\% more space than without ownership. 546 547 \begin{figure} 548 \centering 549 \subfigure[No Ownership]{ 550 \input{ContainerNoOwnership} 551 } % subfigure 552 \\ 553 \subfigure[Ownership]{ 554 \input{ContainerOwnership} 555 } % subfigure 556 \caption{External Fragmentation with Container Ownership} 557 \label{f:ExternalFragmentationContainerOwnership} 558 \end{figure} 559 560 561 \subsection{Container Size} 562 \label{s:ContainerSize} 563 564 One way to control the external fragmentation caused by allocating a large container for a small number of requested objects is to vary the size of the container. 565 As described earlier, container boundaries need to be aligned on addresses that are a power of two to allow easy location of the header (by truncating lower bits). 566 Aligning containers in this manner also determines the size of the container. 567 However, the size of the container has different implications for the allocator. 568 569 The larger the container, the fewer containers are needed, and hence, the fewer headers need to be maintained in memory, improving both internal fragmentation and potentially performance. 570 However, with more objects in a container, there may be more objects that are unallocated, increasing external fragmentation. 571 With smaller containers, not only are there more containers, but a second new problem arises where objects are larger than the container. 572 In general, large objects, \eg greater than 64\,KB, are allocated directly from the operating system and are returned immediately to the operating system to reduce long-term external fragmentation. 573 If the container size is small, \eg 1\,KB, then a 1.5\,KB object is treated as a large object, which is likely to be inappropriate. 574 Ideally, it is best to use smaller containers for smaller objects, and larger containers for medium objects, which leads to the issue of locating the container header. 575 576 In order to find the container header when using different sized containers, a super container is used (see~\VRef[Figure]{f:SuperContainers}). 577 The super container spans several containers, contains a header with information for finding each container header, and starts on an aligned address. 578 Super-container headers are found using the same method used to find container headers by dropping the lower bits of an object address. 579 The containers within a super container may be different sizes or all the same size. 580 If the containers in the super container are different sizes, then the super-container header must be searched to determine the specific container for an object given its address. 581 If all containers in the super container are the same size, \eg 16KB, then a specific container header can be found by a simple calculation. 582 The free space at the end of a super container is used to allocate new containers. 583 584 \begin{figure} 585 \centering 586 \input{SuperContainers} 587 % \includegraphics{diagrams/supercontainer.eps} 588 \caption{Super Containers} 589 \label{f:SuperContainers} 590 \end{figure} 591 592 Minimal internal and external fragmentation is achieved by having as few containers as possible, each being as full as possible. 593 It is also possible to achieve additional benefit by using larger containers for popular small sizes, as it reduces the number of containers with associated headers. 594 However, this approach assumes it is possible for an allocator to determine in advance which sizes are popular. 595 Keeping statistics on requested sizes allows the allocator to make a dynamic decision about which sizes are popular. 596 For example, after receiving a number of allocation requests for a particular size, that size is considered a popular request size and larger containers are allocated for that size. 597 If the decision is incorrect, larger containers than necessary are allocated that remain mostly unused. 598 A programmer may be able to inform the allocator about popular object sizes, using a mechanism like @mallopt@, in order to select an appropriate container size for each object size. 599 600 601 \subsection{Container Free-Lists} 602 \label{s:containersfreelists} 603 604 The container header allows an alternate approach for managing the heap's free-list. 605 Rather than maintain a global free-list throughout the heap (see~\VRef[Figure]{f:GlobalFreeListAmongContainers}), the containers are linked through their headers and only the local free objects within a container are linked together (see~\VRef[Figure]{f:LocalFreeListWithinContainers}). 606 Note, maintaining free lists within a container assumes all free objects in the container are associated with the same heap; 607 thus, this approach only applies to containers with ownership. 608 609 This alternate free-list approach can greatly reduce the complexity of moving all freed objects belonging to a container to another heap. 610 To move a container using a global free-list, as in \VRef[Figure]{f:GlobalFreeListAmongContainers}, the free list is first searched to find all objects within the container. 611 Each object is then removed from the free list and linked together to form a local free-list for the move to the new heap. 612 With local free-lists in containers, as in \VRef[Figure]{f:LocalFreeListWithinContainers}, the container is simply removed from one heap's free list and placed on the new heap's free list. 613 Thus, when using local free-lists, the operation of moving containers is reduced from $O(N)$ to $O(1)$. 614 The cost is adding information to a header, which increases the header size, and therefore internal fragmentation. 615 616 \begin{figure} 617 \centering 618 \subfigure[Global Free-List Among Containers]{ 619 \input{FreeListAmongContainers} 620 \label{f:GlobalFreeListAmongContainers} 621 } % subfigure 622 \hspace{0.25in} 623 \subfigure[Local Free-List Within Containers]{ 624 \input{FreeListWithinContainers} 625 \label{f:LocalFreeListWithinContainers} 626 } % subfigure 627 \caption{Container Free-List Structure} 628 \label{f:ContainerFreeListStructure} 629 \end{figure} 630 631 When all objects in the container are the same size, a single free-list is sufficient. 632 However, when objects in the container are different size, the header needs a free list for each size class when using a binning allocation algorithm, which can be a significant increase in the container-header size. 633 The alternative is to use a different allocation algorithm with a single free-list, such as a sequential-fit allocation-algorithm. 634 635 636 \subsection{Hybrid Private/Public Heap} 637 \label{s:HybridPrivatePublicHeap} 638 639 Section~\Vref{s:Ownership} discusses advantages and disadvantages of public heaps (T:H model and with ownership) and private heaps (thread heaps with ownership). 640 For thread heaps with ownership, it is possible to combine these approaches into a hybrid approach with both private and public heaps (see~\VRef[Figure]{f:HybridPrivatePublicHeap}). 641 The main goal of the hybrid approach is to eliminate locking on thread-local allocation/deallocation, while providing ownership to prevent heap blowup. 642 In the hybrid approach, a task first allocates from its private heap and second from its public heap if no free memory exists in the private heap. 643 Similarly, a task first deallocates an object its private heap, and second to the public heap. 644 Both private and public heaps can allocate/deallocate to/from the global heap if there is no free memory or excess free memory, although an implementation may choose to funnel all interaction with the global heap through one of the heaps. 645 Note, deallocation from the private to the public (dashed line) is unlikely because there is no obvious advantages unless the public heap provides the only interface to the global heap. 646 Finally, when a task frees an object it does not own, the object is either freed immediately to its owner's public heap or put in the freeing task's private heap for delayed ownership, which allows the freeing task to temporarily reuse an object before returning it to its owner or batch objects for an owner heap into a single return. 647 648 \begin{figure} 649 \centering 650 \input{PrivatePublicHeaps.pstex_t} 651 \caption{Hybrid Private/Public Heap for Per-thread Heaps} 652 \label{f:HybridPrivatePublicHeap} 653 % \vspace{10pt} 654 % \input{RemoteFreeList.pstex_t} 655 % \caption{Remote Free-List} 656 % \label{f:RemoteFreeList} 657 \end{figure} 658 659 As mentioned, an implementation may have only one heap deal with the global heap, so the other heap can be simplified. 660 For example, if only the private heap interacts with the global heap, the public heap can be reduced to a lock-protected free-list of objects deallocated by other threads due to ownership, called a \newterm{remote free-list}. 661 To avoid heap blowup, the private heap allocates from the remote free-list when it reaches some threshold or it has no free storage. 662 Since the remote free-list is occasionally cleared during an allocation, this adds to that cost. 663 Clearing the remote free-list is $O(1)$ if the list can simply be added to the end of the private-heap's free-list, or $O(N)$ if some action must be performed for each freed object. 664 665 If only the public heap interacts with other threads and the global heap, the private heap can handle thread-local allocations and deallocations without locking. 666 In this scenario, the private heap must deallocate storage after reaching a certain threshold to the public heap (and then eventually to the global heap from the public heap) or heap blowup can occur. 667 If the public heap does the major management, the private heap can be simplified to provide high-performance thread-local allocations and deallocations. 668 669 The main disadvantage of each thread having both a private and public heap is the complexity of managing two heaps and their interactions in an allocator. 670 Interestingly, heap implementations often focus on either a private or public heap, giving the impression a single versus a hybrid approach is being used. 671 In many case, the hybrid approach is actually being used, but the simpler heap is just folded into the complex heap, even though the operations logically belong in separate heaps. 672 For example, a remote free-list is actually a simple public-heap, but may be implemented as an integral component of the complex private-heap in an allocator, masking the presence of a hybrid approach. 673 674 675 \section{Allocation Buffer} 676 \label{s:AllocationBuffer} 677 678 An allocation buffer is reserved memory (see~\VRef{s:AllocatorComponents}) not yet allocated to the program, and is used for allocating objects when the free list is empty. 679 That is, rather than requesting new storage for a single object, an entire buffer is requested from which multiple objects are allocated later. 680 Both any heap may use an allocation buffer, resulting in allocation from the buffer before requesting objects (containers) from the global heap or operating system, respectively. 681 The allocation buffer reduces contention and the number of global/operating-system calls. 682 For coalescing, a buffer is split into smaller objects by allocations, and recomposed into larger buffer areas during deallocations. 683 684 Allocation buffers are useful initially when there are no freed objects in a heap because many allocations usually occur when a thread starts. 685 Furthermore, to prevent heap blowup, objects should be reused before allocating a new allocation buffer. 686 Thus, allocation buffers are often allocated more frequently at program/thread start, and then their use often diminishes. 687 688 Using an allocation buffer with a thread heap avoids active false-sharing, since all objects in the allocation buffer are allocated to the same thread. 689 For example, if all objects sharing a cache line come from the same allocation buffer, then these objects are allocated to the same thread, avoiding active false-sharing. 690 Active false-sharing may still occur if objects are freed to the global heap and reused by another heap. 691 692 Allocation buffers may increase external fragmentation, since some memory in the allocation buffer may never be allocated. 693 A smaller allocation buffer reduces the amount of external fragmentation, but increases the number of calls to the global heap or operating system. 694 The allocation buffer also slightly increases internal fragmentation, since a pointer is necessary to locate the next free object in the buffer. 695 696 The unused part of a container, neither allocated or freed, is an allocation buffer. 697 For example, when a container is created, rather than placing all objects within the container on the free list, the objects form an allocation buffer and are allocated from the buffer as allocation requests are made. 698 This lazy method of constructing objects is beneficial in terms of paging and caching. 699 For example, although an entire container, possibly spanning several pages, is allocated from the operating system, only a small part of the container is used in the working set of the allocator, reducing the number of pages and cache lines that are brought into higher levels of cache. 700 701 702 \section{Lock-Free Operations} 703 \label{s:LockFreeOperations} 704 705 A lock-free algorithm guarantees safe concurrent-access to a data structure, so that at least one thread can make progress in the system, but an individual task has no bound to execution, and hence, may starve~\cite[pp.~745--746]{Herlihy93}. 706 % A wait-free algorithm puts a finite bound on the number of steps any thread takes to complete an operation, so an individual task cannot starve 707 Lock-free operations can be used in an allocator to reduce or eliminate the use of locks. 708 Locks are a problem for high contention or if the thread holding the lock is preempted and other threads attempt to use that lock. 709 With respect to the heap, these situations are unlikely unless all threads makes extremely high use of dynamic-memory allocation, which can be an indication of poor design. 710 Nevertheless, lock-free algorithms can reduce the number of context switches, since a thread does not yield/block while waiting for a lock; 711 on the other hand, a thread may busy-wait for an unbounded period. 712 Finally, lock-free implementations have greater complexity and hardware dependency. 713 Lock-free algorithms can be applied most easily to simple free-lists, \eg remote free-list, to allow lock-free insertion and removal from the head of a stack. 714 Implementing lock-free operations for more complex data-structures (queue~\cite{Valois94}/deque~\cite{Sundell08}) is more complex. 715 Michael~\cite{Michael04} and Gidenstam \etal \cite{Gidenstam05} have created lock-free variations of the Hoard allocator. 716 2 717 3 718 \noindent … … 23 738 ==================== 24 739 25 \section{Background}26 27 740 % FIXME: cite wasik 28 741 \cite{wasik.thesis} 29 742 30 \s ubsection{Memory Allocation}31 With dynamic allocation being an important feature of C, there are many stand alone memory allocators that have been designed for different purposes. For this thesis, we chose 7 of the most popular and widely used memory allocators.743 \section{Existing Memory Allocators} 744 With dynamic allocation being an important feature of C, there are many stand-alone memory allocators that have been designed for different purposes. For this thesis, we chose 7 of the most popular and widely used memory allocators. 32 745 33 746 \paragraph{dlmalloc} … … 35 748 36 749 \paragraph{hoard} 37 Hoard (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and using a heap layer framework. It has per-thre d heaps that have thread-local free-lists, and a gloabl shared heap. (FIX ME: cite wasik)750 Hoard (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and using a heap layer framework. It has per-thread heaps that have thread-local free-lists, and a global shared heap. (FIX ME: cite wasik) 38 751 39 752 \paragraph{jemalloc} … … 44 757 45 758 \paragraph{rpmalloc} 46 rpmalloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses per-thread heap. Each heap has multiple size-classes and each size-c alss contains memory regions of the relevant size.759 rpmalloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses per-thread heap. Each heap has multiple size-classes and each size-class contains memory regions of the relevant size. 47 760 48 761 \paragraph{tbb malloc} … … 51 764 \paragraph{tc malloc} 52 765 tcmalloc (FIX ME: cite allocator) is a thread-safe allocator. It uses per-thread cache to store free objects that prevents contention on shared resources in multi-threaded application. A central free-list is used to refill per-thread cache when it gets empty. 53 54 \subsection{Benchmarks}55 There are multiple benchmarks that are built individually and evaluate different aspects of a memory allocator. But, there is not standard set of benchamrks that can be used to evaluate multiple aspects of memory allocators.56 57 \paragraph{threadtest}58 (FIX ME: cite benchmark and hoard) Each thread repeatedly allocates and then deallocates 100,000 objects. Runtime of the benchmark evaluates its efficiency.59 60 \paragraph{shbench}61 (FIX ME: cite benchmark and hoard) Each thread allocates and randomly frees a number of random-sized objects. It is a stress test that also uses runtime to determine efficiency of the allocator.62 63 \paragraph{larson}64 (FIX ME: cite benchmark and hoard) Larson simulates a server environment. Multiple threads are created where each thread allocator and free a number of objects within a size range. Some objects are passed from threads to the child threads to free. It caluculates memory operations per second as an indicator of memory allocator's performance. -
doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex
rf5a51db rcc7bbe6 41 41 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 42 42 43 44 \section{Benchmarks} 45 There are multiple benchmarks that are built individually and evaluate different aspects of a memory allocator. But, there is not standard set of benchamrks that can be used to evaluate multiple aspects of memory allocators. 46 47 \paragraph{threadtest} 48 (FIX ME: cite benchmark and hoard) Each thread repeatedly allocates and then deallocates 100,000 objects. Runtime of the benchmark evaluates its efficiency. 49 50 \paragraph{shbench} 51 (FIX ME: cite benchmark and hoard) Each thread allocates and randomly frees a number of random-sized objects. It is a stress test that also uses runtime to determine efficiency of the allocator. 52 53 \paragraph{larson} 54 (FIX ME: cite benchmark and hoard) Larson simulates a server environment. Multiple threads are created where each thread allocator and free a number of objects within a size range. Some objects are passed from threads to the child threads to free. It caluculates memory operations per second as an indicator of memory allocator's performance. 55 56 43 57 \section{Performance Matrices of Memory Allocators} 44 58 -
doc/theses/mubeen_zulfiqar_MMath/intro.tex
rf5a51db rcc7bbe6 1 1 \chapter{Introduction} 2 2 3 % Shared-memory multi-processor computers are ubiquitous and important for improving application performance. 4 % However, writing programs that take advantage of multiple processors is not an easy task~\cite{Alexandrescu01b}, \eg shared resources can become a bottleneck when increasing (scaling) threads. 5 % One crucial shared resource is program memory, since it is used by all threads in a shared-memory concurrent-program~\cite{Berger00}. 6 % Therefore, providing high-performance, scalable memory-management is important for virtually all shared-memory multi-threaded programs. 7 8 \vspace*{-23pt} 9 Memory management takes a sequence of program generated allocation/deallocation requests and attempts to satisfy them within a fixed-sized block of memory while minimizing the total amount of memory used. 10 A general-purpose dynamic-allocation algorithm cannot anticipate future allocation requests so its output is rarely optimal. 11 However, memory allocators do take advantage of regularities in allocation patterns for typical programs to produce excellent results, both in time and space (similar to LRU paging). 12 In general, allocators use a number of similar techniques, each optimizing specific allocation patterns. 13 Nevertheless, memory allocators are a series of compromises, occasionally with some static or dynamic tuning parameters to optimize specific program-request patterns. 14 15 16 \section{Memory Structure} 17 \label{s:MemoryStructure} 18 19 \VRef[Figure]{f:ProgramAddressSpace} shows the typical layout of a program's address space divided into the following zones (right to left): static code/data, dynamic allocation, dynamic code/data, and stack, with free memory surrounding the dynamic code/data~\cite{memlayout}. 20 Static code and data are placed into memory at load time from the executable and are fixed-sized at runtime. 21 Dynamic-allocation memory starts empty and grows/shrinks as the program dynamically creates/deletes variables with independent lifetime. 22 The programming-language's runtime manages this area, where management complexity is a function of the mechanism for deleting variables. 23 Dynamic code/data memory is managed by the dynamic loader for libraries loaded at runtime, which is complex especially in a multi-threaded program~\cite{Huang06}. 24 However, changes to the dynamic code/data space are typically infrequent, many occurring at program startup, and are largely outside of a program's control. 25 Stack memory is managed by the program call-mechanism using a simple LIFO technique, which works well for sequential programs. 26 For multi-threaded programs (and coroutines), a new stack is created for each thread; 27 these thread stacks are commonly created in dynamic-allocation memory. 28 This thesis focuses on management of the dynamic-allocation memory. 29 30 \begin{figure} 31 \centering 32 \input{AddressSpace} 33 \vspace{-5pt} 34 \caption{Program Address Space Divided into Zones} 35 \label{f:ProgramAddressSpace} 36 \end{figure} 37 38 39 \section{Dynamic Memory-Management} 40 \label{s:DynamicMemoryManagement} 41 42 Modern programming languages manage dynamic-allocation memory in different ways. 43 Some languages, such as Lisp~\cite{CommonLisp}, Java~\cite{Java}, Haskell~\cite{Haskell}, Go~\cite{Go}, provide explicit allocation but \emph{implicit} deallocation of data through garbage collection~\cite{Wilson92}. 44 In general, garbage collection supports memory compaction, where dynamic (live) data is moved during runtime to better utilize space. 45 However, moving data requires finding pointers to it and updating them to reflect new data locations. 46 Programming languages such as C~\cite{C}, \CC~\cite{C++}, and Rust~\cite{Rust} provide the programmer with explicit allocation \emph{and} deallocation of data. 47 These languages cannot find and subsequently move live data because pointers can be created to any storage zone, including internal components of allocated objects, and may contain temporary invalid values generated by pointer arithmetic. 48 Attempts have been made to perform quasi garbage collection in C/\CC~\cite{Boehm88}, but it is a compromise. 49 This thesis only examines dynamic memory-management with \emph{explicit} deallocation. 50 While garbage collection and compaction are not part this work, many of the results are applicable to the allocation phase in any memory-management approach. 51 52 Most programs use a general-purpose allocator, often the one provided implicitly by the programming-language's runtime. 53 When this allocator proves inadequate, programmers often write specialize allocators for specific needs. 54 C and \CC allow easy replacement of the default memory allocator with an alternative specialized or general-purpose memory-allocator. 55 (Jikes RVM MMTk~\cite{MMTk} provides a similar generalization for the Java virtual machine.) 56 However, high-performance memory-allocators for kernel and user multi-threaded programs are still being designed and improved. 57 For this reason, several alternative general-purpose allocators have been written for C/\CC with the goal of scaling in a multi-threaded program~\cite{Berger00,mtmalloc,streamflow,tcmalloc}. 58 This thesis examines the design of high-performance allocators for use by kernel and user multi-threaded applications written in C/\CC. 59 60 61 \section{Contributions} 62 \label{s:Contributions} 63 64 This work provides the following contributions in the area of concurrent dynamic allocation: 65 \begin{enumerate}[leftmargin=*] 66 \item 67 Implementation of a new stand-lone concurrent low-latency memory-allocator ($\approx$1,200 lines of code) for C/\CC programs using kernel threads (1:1 threading), and specialized versions of the allocator for programming languages \uC and \CFA using user-level threads running over multiple kernel threads (M:N threading). 68 69 \item 70 Adopt returning of @nullptr@ for a zero-sized allocation, rather than an actual memory address, both of which can be passed to @free@. 71 72 \item 73 Extended the standard C heap functionality by preserving with each allocation its original request size versus the amount allocated, if an allocation is zero fill, and the allocation alignment. 74 75 \item 76 Use the zero fill and alignment as \emph{sticky} properties for @realloc@, to realign existing storage, or preserve existing zero-fill and alignment when storage is copied. 77 Without this extension, it is unsafe to @realloc@ storage initially allocated with zero-fill/alignment as these properties are not preserved when copying. 78 This silent generation of a problem is unintuitive to programmers and difficult to locate because it is transient. 79 80 \item 81 Provide additional heap operations to complete programmer expectation with respect to accessing different allocation properties. 82 \begin{itemize} 83 \item 84 @resize( oaddr, size )@ re-purpose an old allocation for a new type \emph{without} preserving fill or alignment. 85 \item 86 @resize( oaddr, alignment, size )@ re-purpose an old allocation with new alignment but \emph{without} preserving fill. 87 \item 88 @realloc( oaddr, alignment, size )@ same as previous @realloc@ but adding or changing alignment. 89 \item 90 @aalloc( dim, elemSize )@ same as @calloc@ except memory is \emph{not} zero filled. 91 \item 92 @amemalign( alignment, dim, elemSize )@ same as @aalloc@ with memory alignment. 93 \item 94 @cmemalign( alignment, dim, elemSize )@ same as @calloc@ with memory alignment. 95 \end{itemize} 96 97 \item 98 Provide additional query operations to access information about an allocation: 99 \begin{itemize} 100 \item 101 @malloc_alignment( addr )@ returns the alignment of the allocation pointed-to by @addr@. 102 If the allocation is not aligned or @addr@ is the @nulladdr@, the minimal alignment is returned. 103 \item 104 @malloc_zero_fill( addr )@ returns a boolean result indicating if the memory pointed-to by @addr@ is allocated with zero fill, e.g., by @calloc@/@cmemalign@. 105 \item 106 @malloc_size( addr )@ returns the size of the memory allocation pointed-to by @addr@. 107 \item 108 @malloc_usable_size( addr )@ returns the usable size of the memory pointed-to by @addr@, i.e., the bin size containing the allocation, where @malloc_size( addr )@ $\le$ @malloc_usable_size( addr )@. 109 \end{itemize} 110 111 \item 112 Provide mostly contention-free allocation and free operations via a heap-per-kernel-thread implementation. 113 114 \item 115 Provide complete, fast, and contention-free allocation statistics to help understand program behaviour: 116 \begin{itemize} 117 \item 118 @malloc_stats()@ print memory-allocation statistics on the file-descriptor set by @malloc_stats_fd@. 119 \item 120 @malloc_info( options, stream )@ print memory-allocation statistics as an XML string on the specified file-descriptor set by @malloc_stats_fd@. 121 \item 122 @malloc_stats_fd( fd )@ set file-descriptor number for printing memory-allocation statistics (default @STDERR_FILENO@). 123 This file descriptor is used implicitly by @malloc_stats@ and @malloc_info@. 124 \end{itemize} 125 126 \item 127 Provide extensive runtime checks to valid allocation operations and identify the amount of unfreed storage at program termination. 128 129 \item 130 Build 4 different versions of the allocator: 131 \begin{itemize} 132 \item 133 static or dynamic linking 134 \item 135 statistic/debugging (testing) or no statistic/debugging (performance) 136 \end{itemize} 137 A program may link to any of these 4 versions of the allocator often without recompilation. 138 (It is possible to separate statistics and debugging, giving 8 different versions.) 139 140 \item 141 A micro-benchmark test-suite for comparing allocators rather than relying on a suite of arbitrary programs. 142 These micro-benchmarks have adjustment knobs to simulate allocation patterns hard-coded into arbitrary test programs 143 \end{enumerate} 144 145 \begin{comment} 3 146 \noindent 4 147 ==================== … … 26 169 27 170 \section{Introduction} 28 Dynamic memory allocation and management is one of the core features of C. It gives programmer the freedom to allocate, free, use, and manage dynamic memory himself. The programmer is not given the complete control of the dynamic memory management instead an interface of memory allocator is given to the progr mmer that can be used to allocate/free dynamic memory for the application's use.29 30 Memory allocator is a layer between th rprogrammer and the system. Allocator gets dynamic memory from the system in heap/mmap area of application storage and manages it for programmer's use.31 32 GNU C Library (FIX ME: cite this) provides an interchangeable memory allocator that can be replaced with a custom memory allocator that supports required features and fulfills application's custom needs. It also allows others to innovate in memory allocation and design their own memory allocator. GNU C Library has set guidelines that should be followed when designing a stand alone memory allocator. GNU C Library requires new memory allocators to have atlease following set of functions in their allocator's interface:171 Dynamic memory allocation and management is one of the core features of C. It gives programmer the freedom to allocate, free, use, and manage dynamic memory himself. The programmer is not given the complete control of the dynamic memory management instead an interface of memory allocator is given to the programmer that can be used to allocate/free dynamic memory for the application's use. 172 173 Memory allocator is a layer between the programmer and the system. Allocator gets dynamic memory from the system in heap/mmap area of application storage and manages it for programmer's use. 174 175 GNU C Library (FIX ME: cite this) provides an interchangeable memory allocator that can be replaced with a custom memory allocator that supports required features and fulfills application's custom needs. It also allows others to innovate in memory allocation and design their own memory allocator. GNU C Library has set guidelines that should be followed when designing a stand-alone memory allocator. GNU C Library requires new memory allocators to have at lease following set of functions in their allocator's interface: 33 176 34 177 \begin{itemize} … … 43 186 \end{itemize} 44 187 45 In addition to the above functions, GNU C Library also provides some more functions to increase the usability of the dynamic memory allocator. Most stand alone allocators also provide all or some of the above additional functions.188 In addition to the above functions, GNU C Library also provides some more functions to increase the usability of the dynamic memory allocator. Most stand-alone allocators also provide all or some of the above additional functions. 46 189 47 190 \begin{itemize} … … 60 203 \end{itemize} 61 204 62 With the rise of concurrent applications, memory allocators should be able to fulfill dynamic memory requests from multiple threads in parallel without causing contention on shared resources. There needs to be a set of a standard benchmarks that can be used to evaluate an allocator's performance in different scen erios.205 With the rise of concurrent applications, memory allocators should be able to fulfill dynamic memory requests from multiple threads in parallel without causing contention on shared resources. There needs to be a set of a standard benchmarks that can be used to evaluate an allocator's performance in different scenarios. 63 206 64 207 \section{Research Objectives} … … 69 212 Design a lightweight concurrent memory allocator with added features and usability that are currently not present in the other memory allocators. 70 213 \item 71 Design a suite of benchmarks to evalu te multiple aspects of a memory allocator.214 Design a suite of benchmarks to evaluate multiple aspects of a memory allocator. 72 215 \end{itemize} 73 216 74 217 \section{An outline of the thesis} 75 218 LAST FIX ME: add outline at the end 219 \end{comment} -
doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.bib
rf5a51db rcc7bbe6 34 34 year = "2008" 35 35 } 36 37 @article{Sleator85, 38 author = {Sleator, Daniel Dominic and Tarjan, Robert Endre}, 39 title = {Self-Adjusting Binary Search Trees}, 40 journal = jacm, 41 volume = 32, 42 number = 3, 43 year = 1985, 44 issn = {0004-5411}, 45 pages = {652-686}, 46 doi = {http://doi.acm.org.proxy.lib.uwaterloo.ca/10.1145/3828.3835}, 47 address = {New York, NY, USA}, 48 } 49 50 @article{Berger00, 51 author = {Emery D. Berger and Kathryn S. McKinley and Robert D. Blumofe and Paul R. Wilson}, 52 title = {Hoard: A Scalable Memory Allocator for Multithreaded Applications}, 53 booktitle = {International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-IX)}, 54 journal = sigplan, 55 volume = 35, 56 number = 11, 57 month = nov, 58 year = 2000, 59 pages = {117-128}, 60 note = {International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-IX)}, 61 } 62 63 @inproceedings{berger02reconsidering, 64 author = {Emery D. Berger and Benjamin G. Zorn and Kathryn S. McKinley}, 65 title = {Reconsidering Custom Memory Allocation}, 66 booktitle = {Proceedings of the 17th ACM SIGPLAN Conference on Object-Oriented Programming: Systems, Languages, and Applications (OOPSLA) 2002}, 67 month = nov, 68 year = 2002, 69 location = {Seattle, Washington, USA}, 70 publisher = {ACM}, 71 address = {New York, NY, USA}, 72 } 73 74 @article{larson99memory, 75 author = {Per-{\AA}ke Larson and Murali Krishnan}, 76 title = {Memory Allocation for Long-Running Server Applications}, 77 journal = sigplan, 78 volume = 34, 79 number = 3, 80 pages = {176-185}, 81 year = 1999, 82 url = {http://citeseer.ist.psu.edu/article/larson98memory.html} 83 } 84 85 @techreport{gidpt04, 86 author = {Anders Gidenstam and Marina Papatriantafilou and Philippas Tsigas}, 87 title = {Allocating Memory in a Lock-Free Manner}, 88 number = {2004-04}, 89 institution = {Computing Science}, 90 address = {Chalmers University of Technology}, 91 year = 2004, 92 url = {http://citeseer.ist.psu.edu/gidenstam04allocating.html} 93 } 94 95 @phdthesis{berger02thesis, 96 author = {Emery Berger}, 97 title = {Memory Management for High-Performance Applications}, 98 school = {The University of Texas at Austin}, 99 year = 2002, 100 month = aug, 101 url = {http://citeseer.ist.psu.edu/article/berger02memory.html} 102 } 103 104 @misc{sgimisc, 105 author = {SGI}, 106 title = {The Standard Template Library for {C++}}, 107 note = {\textsf{www.sgi.com/\-tech/\-stl/\-Allocators.html}}, 108 } 109 110 @misc{dlmalloc, 111 author = {Doug Lea}, 112 title = {dlmalloc version 2.8.4}, 113 month = may, 114 year = 2009, 115 note = {\textsf{ftp://g.oswego.edu/\-pub/\-misc/\-malloc.c}}, 116 } 117 118 @misc{ptmalloc2, 119 author = {Wolfram Gloger}, 120 title = {ptmalloc version 2}, 121 month = jun, 122 year = 2006, 123 note = {\textsf{http://www.malloc.de/\-malloc/\-ptmalloc2-current.tar.gz}}, 124 } 125 126 @misc{nedmalloc, 127 author = {Niall Douglas}, 128 title = {nedmalloc version 1.06 Beta}, 129 month = jan, 130 year = 2010, 131 note = {\textsf{http://\-prdownloads.\-sourceforge.\-net/\-nedmalloc/\-nedmalloc\_v1.06beta1\_svn1151.zip}}, 132 } 133 134 @misc{hoard, 135 author = {Emery D. Berger}, 136 title = {hoard version 3.8}, 137 month = nov, 138 year = 2009, 139 note = {\textsf{http://www.cs.umass.edu/\-$\sim$emery/\-hoard/\-hoard-3.8/\-source/hoard-38.tar.gz}}, 140 } 141 142 @comment{mtmalloc, 143 author = {Greg Nakhimovsky}, 144 title = {Improving Scalability of Multithreaded Dynamic Memory Allocation}, 145 journal = {Dr. Dobb's}, 146 month = jul, 147 year = 2001, 148 url = {http://www.ddj.com/mobile/184404685?pgno=1} 149 } 150 151 @misc{mtmalloc, 152 key = {mtmalloc}, 153 title = {mtmalloc.c}, 154 year = 2009, 155 note = {\textsf{http://src.opensolaris.org/\-source/\-xref/\-onnv/\-onnv-gate/\-usr/\-src/\-lib/\-libmtmalloc/\-common/\-mtmalloc.c}}, 156 } 157 158 @misc{tcmalloc, 159 author = {Sanjay Ghemawat and Paul Menage}, 160 title = {tcmalloc version 1.5}, 161 month = jan, 162 year = 2010, 163 note = {\textsf{http://google-perftools.\-googlecode.\-com/\-files/\-google-perftools-1.5.tar.gz}}, 164 } 165 166 @inproceedings{streamflow, 167 author = {Scott Schneider and Christos D. Antonopoulos and Dimitrios S. Nikolopoulos}, 168 title = {Scalable Locality-Conscious Multithreaded Memory Allocation}, 169 booktitle = {International Symposium on Memory Management (ISSM'06)}, 170 month = jun, 171 year = 2006, 172 pages = {84-94}, 173 location = {Ottawa, Ontario, Canada}, 174 publisher = {ACM}, 175 address = {New York, NY, USA}, 176 } 177 178 @misc{streamflowweb, 179 author = {Scott Schneider and Christos Antonopoulos and Dimitrios Nikolopoulos}, 180 title = {Streamflow}, 181 note = {\textsf{http://people.cs.vt.edu/\-\char`\~scschnei/\-streamflow}}, 182 } 183 184 @inproceedings{Blumofe94, 185 author = {R. Blumofe and C. Leiserson}, 186 title = {Scheduling Multithreaded Computations by Work Stealing}, 187 booktitle = {Proceedings of the 35th Annual Symposium on Foundations of Computer Science, Santa Fe, New Mexico.}, 188 pages = {356-368}, 189 year = 1994, 190 month = nov, 191 url = {http://citeseer.ist.psu.edu/article/blumofe94scheduling.html} 192 } 193 194 @article{Johnstone99, 195 author = {Mark S. Johnstone and Paul R. Wilson}, 196 title = {The Memory Fragmentation Problem: Solved?}, 197 journal = sigplan, 198 volume = 34, 199 number = 3, 200 pages = {26-36}, 201 year = 1999, 202 } 203 204 @inproceedings{Grunwald93, 205 author = {Dirk Grunwald and Benjamin G. Zorn and Robert Henderson}, 206 title = {Improving the Cache Locality of Memory Allocation}, 207 booktitle = {{SIGPLAN} Conference on Programming Language Design and Implementation}, 208 pages = {177-186}, 209 year = 1993, 210 url = {http://citeseer.ist.psu.edu/grunwald93improving.html} 211 } 212 213 @inproceedings{Wilson95, 214 author = {Wilson, Paul R. and Johnstone, Mark S. and Neely, Michael and Boles, David}, 215 title = {Dynamic Storage Allocation: A Survey and Critical Review}, 216 booktitle = {Proc. Int. Workshop on Memory Management}, 217 address = {Kinross Scotland, UK}, 218 year = 1995, 219 url = {http://citeseer.ist.psu.edu/wilson95dynamic.html} 220 } 221 222 @inproceedings{Siebert00, 223 author = {Fridtjof Siebert}, 224 title = {Eliminating External Fragmentation in a Non-moving Garbage Collector for Java}, 225 booktitle = {CASES '00: Proceedings of the 2000 international conference on Compilers, architecture, and synthesis for embedded systems}, 226 year = 2000, 227 isbn = {1-58113-338-3}, 228 pages = {9-17}, 229 location = {San Jose, California, United States}, 230 doi = {http://doi.acm.org.proxy.lib.uwaterloo.ca/10.1145/354880.354883}, 231 publisher = {ACM Press}, 232 address = {New York, NY, USA} 233 } 234 235 @inproceedings{Lim98, 236 author = {Tian F. Lim and Przemyslaw Pardyak and Brian N. Bershad}, 237 title = {A Memory-Efficient Real-Time Non-copying Garbage Collector}, 238 booktitle = {ISMM '98: Proceedings of the 1st international symposium on Memory management}, 239 year = 1998, 240 isbn = {1-58113-114-3}, 241 pages = {118-129}, 242 location = {Vancouver, British Columbia, Canada}, 243 doi = {http://doi.acm.org.proxy.lib.uwaterloo.ca/10.1145/286860.286873}, 244 publisher = {ACM Press}, 245 address = {New York, NY, USA} 246 } 247 248 @article{Chang01, 249 author = {J. Morris Chang and Woo Hyong Lee and Witawas Srisa-an}, 250 title = {A Study of the Allocation Behavior of {C++} Programs}, 251 journal = {J. Syst. Softw.}, 252 volume = 57, 253 number = 2, 254 year = 2001, 255 issn = {0164-1212}, 256 pages = {107-118}, 257 doi = {http://dx.doi.org/10.1016/S0164-1212(00)00122-9}, 258 publisher = {Elsevier Science Inc.}, 259 address = {New York, NY, USA} 260 } 261 262 @article{Herlihy93, 263 author = {Maurice Herlihy}, 264 title = {A Methodology for Implementing Highly Concurrent Data Objects}, 265 journal = toplas, 266 volume = 15, 267 number = 5, 268 year = 1993, 269 issn = {0164-0925}, 270 pages = {745-770}, 271 doi = {http://doi.acm.org.proxy.lib.uwaterloo.ca/10.1145/161468.161469}, 272 publisher = {ACM Press}, 273 address = {New York, NY, USA} 274 } 275 276 @article{Denning05, 277 author = {Peter J. Denning}, 278 title = {The Locality Principle}, 279 journal = cacm, 280 volume = 48, 281 number = 7, 282 year = 2005, 283 issn = {0001-0782}, 284 pages = {19-24}, 285 doi = {http://doi.acm.org.proxy.lib.uwaterloo.ca/10.1145/1070838.1070856}, 286 publisher = {ACM Press}, 287 address = {New York, NY, USA} 288 } 289 290 @misc{wilson-locality, 291 author = {Paul R. Wilson}, 292 title = {Locality of Reference, Patterns in Program Behavior, Memory Management, and Memory Hierarchies}, 293 url = {http://citeseer.ist.psu.edu/337869.html} 294 } 295 296 @inproceedings{Feng05, 297 author = {Yi Feng and Emery D. Berger}, 298 title = {A Locality-Improving Dynamic Memory Allocator}, 299 booktitle = {Proceedings of the 2005 Workshop on Memory System Performance}, 300 location = {Chicago, Illinois}, 301 publisher = {ACM}, 302 address = {New York, NY, USA}, 303 month = jun, 304 year = 2005, 305 pages = {68-77}, 306 } 307 308 @inproceedings{grunwald-locality, 309 author = {Dirk Grunwald and Benjamin Zorn and Robert Henderson}, 310 title = {Improving the Cache Locality of Memory Allocation}, 311 booktitle = {PLDI '93: Proceedings of the ACM SIGPLAN 1993 conference on Programming language design and implementation}, 312 year = 1993, 313 isbn = {0-89791-598-4}, 314 pages = {177-186}, 315 location = {Albuquerque, New Mexico, United States}, 316 doi = {http://doi.acm.org.proxy.lib.uwaterloo.ca/10.1145/155090.155107}, 317 publisher = {ACM Press}, 318 address = {New York, NY, USA} 319 } 320 321 @article{Alexandrescu01b, 322 author = {Andrei Alexandrescu}, 323 title = {{volatile} -- Multithreaded Programmer's Best Friend}, 324 journal = {Dr. Dobb's}, 325 month = feb, 326 year = 2001, 327 url = {http://www.ddj.com/cpp/184403766} 328 } 329 330 @article{Attardi03, 331 author = {Joseph Attardi and Neelakanth Nadgir}, 332 title = {A Comparison of Memory Allocators in Multiprocessors}, 333 journal = {Sun Developer Network}, 334 month = jun, 335 year = 2003, 336 note = {\textsf{http://developers.sun.com/\-solaris/\-articles/\-multiproc/\-multiproc.html}}, 337 } 338 339 @unpublished{memlayout, 340 author = {Peter Jay Salzman}, 341 title = {Memory Layout and the Stack}, 342 journal = {Using GNU's GDB Debugger}, 343 note = {\textsf{http://dirac.org/\-linux/\-gdb/\-02a-Memory\_Layout\_And\_The\_Stack.php}}, 344 } 345 346 @unpublished{Ferguson07, 347 author = {Justin N. Ferguson}, 348 title = {Understanding the Heap by Breaking It}, 349 note = {\textsf{https://www.blackhat.com/\-presentations/\-bh-usa-07/Ferguson/\-Whitepaper/\-bh-usa-07-ferguson-WP.pdf}}, 350 } 351 352 @inproceedings{Huang06, 353 author = {Xianglong Huang and Brian T Lewis and Kathryn S McKinley}, 354 title = {Dynamic Code Management: Improving Whole Program Code Locality in Managed Runtimes}, 355 booktitle = {VEE '06: Proceedings of the 2nd international conference on Virtual execution environments}, 356 year = 2006, 357 isbn = {1-59593-332-6}, 358 pages = {133-143}, 359 location = {Ottawa, Ontario, Canada}, 360 doi = {http://doi.acm.org/10.1145/1134760.1134779}, 361 publisher = {ACM Press}, 362 address = {New York, NY, USA} 363 } 364 365 @inproceedings{Herlihy03, 366 author = {M. Herlihy and V. Luchangco and M. Moir}, 367 title = {Obstruction-free Synchronization: Double-ended Queues as an Example}, 368 booktitle = {Proceedings of the 23rd IEEE International Conference on Distributed Computing Systems}, 369 year = 2003, 370 month = may, 371 url = {http://www.cs.brown.edu/~mph/publications.html} 372 } 373 374 @techreport{Detlefs93, 375 author = {David L. Detlefs and Al Dosser and Benjamin Zorn}, 376 title = {Memory Allocation Costs in Large {C} and {C++} Programs}, 377 number = {CU-CS-665-93}, 378 institution = {University of Colorado}, 379 address = {130 Lytton Avenue, Palo Alto, CA 94301 and Campus Box 430, Boulder, CO 80309}, 380 year = 1993, 381 url = {http://citeseer.ist.psu.edu/detlefs93memory.html} 382 } 383 384 @inproceedings{Oyama99, 385 author = {Y. Oyama and K. Taura and A. Yonezawa}, 386 title = {Executing Parallel Programs With Synchronization Bottlenecks Efficiently}, 387 booktitle = {Proceedings of International Workshop on Parallel and Distributed Computing for Symbolic and Irregular Applications (PDSIA '99)}, 388 year = {1999}, 389 pages = {182--204}, 390 publisher = {World Scientific}, 391 address = {Sendai, Japan}, 392 } 393 394 @inproceedings{Dice02, 395 author = {Dave Dice and Alex Garthwaite}, 396 title = {Mostly Lock-Free Malloc}, 397 booktitle = {Proceedings of the 3rd international symposium on Memory management (ISMM'02)}, 398 month = jun, 399 year = 2002, 400 pages = {163-174}, 401 location = {Berlin, Germany}, 402 publisher = {ACM}, 403 address = {New York, NY, USA}, 404 } -
doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex
rf5a51db rcc7bbe6 85 85 \usepackage{comment} % Removes large sections of the document. 86 86 \usepackage{tabularx} 87 \usepackage{subfigure} 87 88 88 89 % Hyperlinks make it very easy to navigate an electronic document. … … 168 169 %\usepackageinput{common} 169 170 \CFAStyle % CFA code-style for all languages 170 \lstset{basicstyle=\linespread{0.9}\tt} % CFA typewriter font 171 \lstset{basicstyle=\linespread{0.9}\sf} % CFA typewriter font 172 \newcommand{\uC}{$\mu$\CC} 171 173 \newcommand{\PAB}[1]{{\color{red}PAB: #1}} 172 174 … … 224 226 \addcontentsline{toc}{chapter}{\textbf{References}} 225 227 226 \bibliography{ uw-ethesis,pl}228 \bibliography{pl,uw-ethesis} 227 229 % Tip: You can create multiple .bib files to organize your references. 228 230 % Just list them all in the \bibliogaphy command, separated by commas (no spaces). -
doc/user/user.tex
rf5a51db rcc7bbe6 11 11 %% Created On : Wed Apr 6 14:53:29 2016 12 12 %% Last Modified By : Peter A. Buhr 13 %% Last Modified On : Sun Oct 10 12:45:00 202114 %% Update Count : 5 09513 %% Last Modified On : Mon Feb 14 17:20:39 2022 14 %% Update Count : 5382 15 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16 16 17 17 % requires tex packages: texlive-base texlive-latex-base tex-common texlive-humanities texlive-latex-extra texlive-fonts-recommended 18 18 19 \documentclass[twoside ,11pt]{article}19 \documentclass[twoside]{article} 20 20 21 21 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% … … 40 40 % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_ 41 41 % green highlighting ¢...¢ (cent symbol) emacs: C-q M-" 42 % LaTex escape §...§ (section symbol) emacs: C-q M-'42 % LaTex escape ...§ (section symbol) emacs: C-q M-' 43 43 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^ 44 44 % math escape $...$ (dollar symbol) … … 85 85 \newcommand{\B}[1]{{\Textbf[blue]{#1}}} 86 86 \newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}} 87 \newcommand{\Sp}{\R{\textvisiblespace}} 87 88 \newcommand{\KWC}{K-W C\xspace} 88 89 … … 156 157 One of the main design philosophies of \CFA is to ``\Index{describe not prescribe}'', which means \CFA tries to provide a pathway from low-level C programming to high-level \CFA programming, but it does not force programmers to ``do the right thing''. 157 158 Programmers can cautiously add \CFA extensions to their C programs in any order and at any time to incrementally move towards safer, higher-level programming. 158 A programmer is always free to reach back to C from \CFA, for any reason, and in many cases, new \CFA features can be locally switched back to the reC counterpart.159 There is no notion or requirement for \emph{rewriting} a legacy C program in\CFA;159 A programmer is always free to reach back to C from \CFA, for any reason, and in many cases, new \CFA features can be locally switched back to their C counterpart. 160 There is no notion or requirement for \emph{rewriting} a legacy C program to \CFA; 160 161 instead, a programmer evolves a legacy program into \CFA by incrementally incorporating \CFA features. 161 162 As well, new programs can be written in \CFA using a combination of C and \CFA features. … … 163 164 164 165 \Index*[C++]{\CC{}}~\cite{c++:v1} had a similar goal 30 years ago, allowing object-oriented programming to be incrementally added to C. 165 However, \CC currently has the disadvantages of a strong object-oriented bias, multiple legacy design-choices that cannot be updated, and active divergence of the language model from C, requiring significant effort and training to incrementally add \CC to a C-based project.166 However, \CC currently has the disadvantages of a strong object-oriented bias, multiple legacy design-choices that are difficult to update, and active divergence of the language model from C, requiring significant effort and training to incrementally add \CC to a C-based project. 166 167 In contrast, \CFA has 30 years of hindsight and a clean starting point. 167 168 168 169 Like \Index*[C++]{\CC{}}, there may be both old and new ways to achieve the same effect. 169 170 For example, the following programs compare the C, \CFA, and \CC I/O mechanisms, where the programs output the same result. 170 \begin{ center}171 \begin{flushleft} 171 172 \begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}} 172 \multicolumn{1}{ c@{\hspace{1em}}}{\textbf{C}} & \multicolumn{1}{c}{\textbf{\CFA}} & \multicolumn{1}{c}{\textbf{\CC}} \\173 \begin{cfa} 173 \multicolumn{1}{@{}c@{\hspace{1em}}}{\textbf{C}} & \multicolumn{1}{c}{\textbf{\CFA}} & \multicolumn{1}{c@{}}{\textbf{\CC}} \\ 174 \begin{cfa}[tabsize=3] 174 175 #include <stdio.h>$\indexc{stdio.h}$ 175 176 … … 180 181 \end{cfa} 181 182 & 182 \begin{cfa} 183 \begin{cfa}[tabsize=3] 183 184 #include <fstream>$\indexc{fstream}$ 184 185 … … 189 190 \end{cfa} 190 191 & 191 \begin{cfa} 192 \begin{cfa}[tabsize=3] 192 193 #include <iostream>$\indexc{iostream}$ 193 194 using namespace std; 194 195 int main() { 195 196 int x = 0, y = 1, z = 2; 196 ®cout <<x<<" "<<y<<" "<<z<<endl;®197 ®cout << x << ' ' << y << ' ' << z << endl;® 197 198 } 198 199 \end{cfa} 199 200 \end{tabular} 200 \end{ center}201 \end{flushleft} 201 202 While \CFA I/O \see{\VRef{s:StreamIOLibrary}} looks similar to \Index*[C++]{\CC{}}, there are important differences, such as automatic spacing between variables and an implicit newline at the end of the expression list, similar to \Index*{Python}~\cite{Python}. 202 203 … … 238 239 however, it largely extended the C language, and did not address many of C's existing problems.\footnote{% 239 240 Two important existing problems addressed were changing the type of character literals from ©int© to ©char© and enumerator from ©int© to the type of its enumerators.} 240 \Index*{Fortran}~\cite{Fortran08}, \Index*{ Ada}~\cite{Ada12}, and \Index*{Cobol}~\cite{Cobol14} are examples of programming languages that took an evolutionary approach, where modern language-features (\eg objects, concurrency) are added and problems fixed within the framework of the existing language.241 \Index*{Fortran}~\cite{Fortran08}, \Index*{Cobol}~\cite{Cobol14}, and \Index*{Ada}~\cite{Ada12} are examples of programming languages that took an evolutionary approach, where modern language-features (\eg objects, concurrency) are added and problems fixed within the framework of the existing language. 241 242 \Index*{Java}~\cite{Java8}, \Index*{Go}~\cite{Go}, \Index*{Rust}~\cite{Rust} and \Index*{D}~\cite{D} are examples of the revolutionary approach for modernizing C/\CC, resulting in a new language rather than an extension of the descendent. 242 243 These languages have different syntax and semantics from C, do not interoperate directly with C, and are not systems languages because of restrictive memory-management or garbage collection. … … 333 334 long double _Complex ®abs®( long double _Complex ); 334 335 \end{cfa} 335 The problem is \Index{name clash} between the C name ©abs© and the \CFA names ©abs©, resulting in two name linkages\index{C linkage}: ©extern "C"© and ©extern "Cforall"© (default).336 The problem is a \Index{name clash} between the C name ©abs© and the \CFA names ©abs©, resulting in two name linkages\index{C linkage}: ©extern "C"© and ©extern "Cforall"© (default). 336 337 Overloaded names must use \newterm{name mangling}\index{mangling!name} to create unique names that are different from unmangled C names. 337 338 Hence, there is the same need as in \CC to know if a name is a C or \CFA name, so it can be correctly formed. … … 377 378 The program is linked with the debugging version of the runtime system. 378 379 The debug version performs runtime checks to aid the debugging phase of a \CFA program, but can substantially slow program execution. 379 The runtime checks should only be removed after theprogram is completely debugged.380 The runtime checks should only be removed after a program is completely debugged. 380 381 \textbf{This option is the default.} 381 382 … … 452 453 cfa $test$.cfa -XCFA -P -XCFA parse -XCFA -n # show program parse without prelude 453 454 \end{lstlisting} 455 Alternatively, multiple flages can be specified separated with commas and \emph{without} spaces. 456 \begin{lstlisting}[language=sh,{moredelim=**[is][\protect\color{red}]{®}{®}}] 457 cfa $test$.cfa -XCFA®,®-Pparse®,®-n # show program parse without prelude 458 \end{lstlisting} 454 459 \begin{description}[topsep=5pt,itemsep=0pt,parsep=0pt] 455 460 \item … … 533 538 double ®``®forall = 3.5; 534 539 \end{cfa} 535 536 Existing C programs with keyword clashes can be converted by enclosing keyword identifiers in backquotes, and eventually the identifier name can be changed to a non-keyword name. 540 Existing C programs with keyword clashes can be converted by prefixing the keyword identifiers with double backquotes, and eventually the identifier name can be changed to a non-keyword name. 537 541 \VRef[Figure]{f:HeaderFileInterposition} shows how clashes in existing C header-files \see{\VRef{s:StandardHeaders}} can be handled using preprocessor \newterm{interposition}: ©#include_next© and ©-I filename©. 538 542 Several common C header-files with keyword clashes are fixed in the standard \CFA header-library, so there is a seamless programming-experience. … … 627 631 \subsection{\texorpdfstring{\LstKeywordStyle{if} / \LstKeywordStyle{while} Statement}{if / while Statement}} 628 632 629 The ©if©/©while© expression allows declarations, similar to ©for©declaration expression.\footnote{630 Declarations in the ©do©-©while© condition are not useful because they appear after the loop body.}633 The \Indexc{if}/\Indexc{while} expression allows declarations, similar to \Indexc{for} declaration expression.\footnote{ 634 Declarations in the \Indexc{do}-©while© condition are not useful because they appear after the loop body.} 631 635 \begin{cfa} 632 636 if ( ®int x = f()® ) ... $\C{// x != 0}$ … … 640 644 while ( ®struct S { int i; } x = { f() }; x.i < 4® ) ... $\C{// relational expression}$ 641 645 \end{cfa} 642 Unless a relational expression is specified, each variable is compared not equal to 0, which is the standard semantics for the ©if©/©while© expression, and the results are combined using the logical ©&&©operator.643 The scope of the declaration(s) is local to the ©if© statement but exist within both the \emph{then} and \emph{else} clauses.646 Unless a relational expression is specified, each variable is compared not equal to 0, which is the standard semantics for the ©if©/©while© expression, and the results are combined using the logical \Indexc{&&} operator. 647 The scope of the declaration(s) is local to the ©if©/©while© statement, \ie in both \emph{then} and \emph{else} clauses for ©if©, and loop body for ©while©. 644 648 \CC only provides a single declaration always compared ©!=© to 0. 645 649 … … 649 653 \label{s:caseClause} 650 654 651 C restricts the ©case© clause of a ©switch©statement to a single value.655 C restricts the \Indexc{case} clause of a \Indexc{switch} statement to a single value. 652 656 For multiple ©case© clauses associated with the same statement, it is necessary to have multiple ©case© clauses rather than multiple values. 653 Requiring a ©case© clause for each value does not seem to bein the spirit of brevity normally associated with C.654 Therefore, the ©case© clause is extended with a list of values , as in:657 Requiring a ©case© clause for each value is not in the spirit of brevity normally associated with C. 658 Therefore, the ©case© clause is extended with a list of values. 655 659 \begin{cquote} 656 660 \begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}} … … 703 707 \subsection{\texorpdfstring{\LstKeywordStyle{switch} Statement}{switch Statement}} 704 708 705 C allows a number of questionable forms for the ©switch©statement:709 C allows a number of questionable forms for the \Indexc{switch} statement: 706 710 \begin{enumerate} 707 711 \item 708 By default, the end of a ©case©clause\footnote{712 By default, the end of a \Indexc{case} clause\footnote{ 709 713 In this section, the term \emph{case clause} refers to either a ©case© or ©default© clause.} 710 714 \emph{falls through} to the next ©case© clause in the ©switch© statement; 711 to exit a ©switch© statement from a ©case© clause requires explicitly terminating the clause with a transfer statement, most commonly ©break©:715 to exit a ©switch© statement from a ©case© clause requires explicitly terminating the clause with a transfer statement, most commonly \Indexc{break}: 712 716 \begin{cfa} 713 717 switch ( i ) { 714 718 case 1: 715 719 ... 716 // fall-through720 $\R{\LstCommentStyle{// fall-through}}$ 717 721 case 2: 718 722 ... 719 break;// exit switch statement723 ®break;® // exit switch statement 720 724 } 721 725 \end{cfa} … … 763 767 } 764 768 \end{cfa} 765 This situation better handled without fall-through by allowinga list of case values \see{\VRef{s:caseClause}}.769 This situation is better handled by a list of case values \see{\VRef{s:caseClause}}. 766 770 While fall-through itself is not a problem, the problem occurs when fall-through is the default, as this semantics is unintuitive to many programmers and is different from most programming languages with a ©switch© statement. 767 771 Hence, default fall-through semantics results in a large number of programming errors as programmers often \emph{forget} the ©break© statement at the end of a ©case© clause, resulting in inadvertent fall-through. … … 777 781 ... 778 782 } // if 779 case 2:780 while ( j < 5 ) {781 ...782 ®case 3:® // transfer into "while" statement783 ...784 } // while785 } // switch786 783 \end{cfa} 787 784 This usage branches into control structures, which is known to cause both comprehension and technical difficulties. … … 789 786 The technical problem results from the inability to ensure declaration and initialization of variables when blocks are not entered at the beginning. 790 787 There are few arguments for this kind of control flow, and therefore, there is a strong impetus to eliminate it. 791 Nevertheless, C does have an idiom where this capability is used, known as ``\Index*{Duff's device}''~\cite{Duff83}: 788 789 This C idiom is known as ``\Index*{Duff's device}''~\cite{Duff83}, from this example: 792 790 \begin{cfa} 793 791 register int n = (count + 7) / 8; … … 858 856 still works. 859 857 Nevertheless, reversing the default action would have a non-trivial effect on case actions that compound, such as the above example of processing shell arguments. 860 Therefore, to preserve backwards compatibility, it is necessary to introduce a new kind of ©switch© statement, called ©choose©, with no implicit fall-through semantics and an explicit fall-through if the last statement of a case-clause ends with the new keyword ©fallthrough©/©fallthru©, \eg:858 Therefore, to preserve backwards compatibility, it is necessary to introduce a new kind of ©switch© statement, called \Indexc{choose}, with no implicit fall-through semantics and an explicit fall-through if the last statement of a case-clause ends with the new keyword \Indexc{fallthrough}/\Indexc{fallthru}, \eg: 861 859 \begin{cfa} 862 860 ®choose® ( i ) { … … 885 883 Therefore, no change is made for this issue. 886 884 \item 887 Dealing with unreachable code in a ©switch©/©choose© body is solved by restricting declarations and associatedinitialization to the start of statement body, which is executed \emph{before} the transfer to the appropriate ©case© clause\footnote{885 Dealing with unreachable code in a ©switch©/©choose© body is solved by restricting declarations and initialization to the start of statement body, which is executed \emph{before} the transfer to the appropriate ©case© clause\footnote{ 888 886 Essentially, these declarations are hoisted before the ©switch©/©choose© statement and both declarations and statement are surrounded by a compound statement.} and precluding statements before the first ©case© clause. 889 887 Further declarations at the same nesting level as the statement body are disallowed to ensure every transfer into the body is sound. … … 908 906 \subsection{Non-terminating and Labelled \texorpdfstring{\LstKeywordStyle{fallthrough}}{Non-terminating and Labelled fallthrough}} 909 907 910 The ©fallthrough© clause may be non-terminating within a ©case©clause or have a target label to common code from multiple case clauses.908 The \Indexc{fallthrough} clause may be non-terminating within a \Indexc{case} clause or have a target label to common code from multiple case clauses. 911 909 \begin{center} 912 910 \begin{tabular}{@{}lll@{}} … … 960 958 \end{tabular} 961 959 \end{center} 962 The target label must be below the ©fallthrough©and may not be nested in a control structure, and963 the target label must be at the same or higher level as the containing ©case©clause and located at964 the same level as a ©case© clause; the target label may be case ©default©, but only associated965 with the current ©switch©/©choose©statement.960 The target label must be below the \Indexc{fallthrough} and may not be nested in a control structure, and 961 the target label must be at the same or higher level as the containing \Indexc{case} clause and located at 962 the same level as a ©case© clause; the target label may be case \Indexc{default}, but only associated 963 with the current \Indexc{switch}/\Indexc{choose} statement. 966 964 967 965 \begin{figure} … … 1076 1074 Looping a fixed number of times, possibly with a loop index, occurs frequently. 1077 1075 \CFA condenses simply looping to facilitate coding speed and safety. 1078 The ©for©/©while©/©do-while©loop-control is augmented as follows \see{examples in \VRef[Figure]{f:LoopControlExamples}}:1076 The \Indexc{for}, \Indexc{while}, and \Indexc{do} loop-control is augmented as follows \see{examples in \VRef[Figure]{f:LoopControlExamples}}: 1079 1077 \begin{itemize}[itemsep=0pt] 1080 1078 \item … … 1145 1143 \subsection{\texorpdfstring{Labelled \LstKeywordStyle{continue} / \LstKeywordStyle{break} Statement}{Labelled continue / break Statement}} 1146 1144 1147 C ©continue© and ©break©statements, for altering control flow, are restricted to one level of nesting for a particular control structure.1145 C \Indexc{continue} and \Indexc{break} statements, for altering control flow, are restricted to one level of nesting for a particular control structure. 1148 1146 This restriction forces programmers to use \Indexc{goto} to achieve the equivalent control-flow for more than one level of nesting. 1149 1147 To prevent having to switch to the ©goto©, \CFA extends the \Indexc{continue}\index{continue@©continue©!labelled}\index{labelled!continue@©continue©} and \Indexc{break}\index{break@©break©!labelled}\index{labelled!break@©break©} with a target label to support static multi-level exit\index{multi-level exit}\index{static multi-level exit}~\cite{Buhr85}, as in Java. 1150 For both ©continue© and ©break©, the target label must be directly associated with a ©for©, ©while© or ©do©statement;1151 for ©break©, the target label can also be associated with a ©switch©, ©if©or compound (©{}©) statement.1148 For both ©continue© and ©break©, the target label must be directly associated with a \Indexc{for}, \Indexc{while} or \Indexc{do} statement; 1149 for ©break©, the target label can also be associated with a \Indexc{switch}, \Indexc{if} or compound (©{}©) statement. 1152 1150 \VRef[Figure]{f:MultiLevelExit} shows a comparison between labelled ©continue© and ©break© and the corresponding C equivalent using ©goto© and labels. 1153 1151 The innermost loop has 8 exit points, which cause continuation or termination of one or more of the 7 \Index{nested control-structure}s. … … 1224 1222 \end{figure} 1225 1223 1226 Both labelled ©continue© and ©break© are a ©goto©\index{goto@©goto©!restricted} restricted in the following ways:1224 Both labelled \Indexc{continue} and \Indexc{break} are a \Indexc{goto}\index{goto@©goto©!restricted} restricted in the following ways: 1227 1225 \begin{itemize} 1228 1226 \item … … 1240 1238 1241 1239 1240 \subsection{\texorpdfstring{Extended \LstKeywordStyle{else}}{Extended else}} 1241 \label{s:ExtendedElse} 1242 \index{extended ©else©} 1243 1244 The ©if© statement has an optional ©else© clause executed if the conditional is false. 1245 This concept is extended to the \Indexc{while}, \Indexc{for}, and \Indexc{do} looping constructs (like Python). 1246 Hence, if the loop conditional becomes false, looping stops and the corresponding ©else© clause is executed, if present. 1247 1248 The following example is a linear search for the key 3 in an array, where finding the key is handled with a ©break© and not finding with the ©else© clause on the loop construct. 1249 \begin{cquote} 1250 \begin{cfa} 1251 int a[10]; 1252 \end{cfa} 1253 \begin{tabular}{@{}lll@{}} 1254 \begin{cfa} 1255 1256 while ( int i = 0; i < 10 ) { 1257 if ( a[i] == 3 ) break; // found 1258 i += 1; 1259 } ®else® { // i == 10 1260 sout | "not found"; 1261 } 1262 \end{cfa} 1263 & 1264 \begin{cfa} 1265 1266 for ( i; 10 ) { 1267 if ( a[i] == 3 ) break; // found 1268 1269 } ®else® { // i == 10 1270 sout | "not found"; 1271 } 1272 \end{cfa} 1273 & 1274 \begin{cfa} 1275 int i = 0; 1276 do { 1277 if ( a[i] == 3 ) break; // found 1278 i += 1; 1279 } while( i < 10 ) ®else® { // i == 10 1280 sout | "not found"; 1281 } 1282 \end{cfa} 1283 \end{tabular} 1284 \end{cquote} 1285 Note, \Index{dangling else} now occurs with \Indexc{if}, \Indexc{while}, \Indexc{for}, \Indexc{do}, and \Indexc{waitfor}. 1286 1287 1242 1288 %\subsection{\texorpdfstring{\protect\lstinline{with} Statement}{with Statement}} 1243 1289 \subsection{\texorpdfstring{\LstKeywordStyle{with} Statement}{with Statement}} … … 1266 1312 Therefore, reducing aggregate qualification is a useful language design goal. 1267 1313 1268 C allows unnamed nested aggregates thatopen their scope into the containing aggregate.1314 C partially addresses the problem by eliminating qualification for enumerated types and unnamed \emph{nested} aggregates, which open their scope into the containing aggregate. 1269 1315 This feature is used to group fields for attributes and/or with ©union© aggregates. 1270 1316 \begin{cfa} 1271 1317 struct S { 1272 struct { int g, h; } __attribute__(( aligned(64) ));1318 struct $\R{\LstCommentStyle{/* unnamed */}}$ { int g, h; } __attribute__(( aligned(64) )); 1273 1319 int tag; 1274 union {1320 union $\R{\LstCommentStyle{/* unnamed */}}$ { 1275 1321 struct { char c1, c2; } __attribute__(( aligned(128) )); 1276 1322 struct { int i1, i2; }; 1277 1323 struct { double d1, d2; }; 1278 1324 }; 1279 }; 1280 s.g; s.h; s.tag; s.c1; s.c2; s.i1; s.i2; s.d1; s.d2; 1325 } s; 1326 enum { R, G, B }; 1327 s.g; s.h; s.tag = R; s.c1; s.c2; s.i1 = G; s.i2 = B; s.d1; s.d2; 1281 1328 \end{cfa} 1282 1329 … … 1323 1370 \end{cfa} 1324 1371 where qualification is only necessary to disambiguate the shadowed variable ©i©. 1325 1326 In detail, the ©with© statement may appear as the body of a function or nested within a function body. 1372 In detail, the ©with© statement may form a function body or be nested within a function body. 1373 1327 1374 The ©with© clause takes a list of expressions, where each expression provides an aggregate type and object. 1328 1375 (Enumerations are already opened.) … … 1333 1380 \end{cfa} 1334 1381 The expression object is the implicit qualifier for the open structure-fields. 1382 1335 1383 \CFA's ability to overload variables \see{\VRef{s:VariableOverload}} and use the left-side of assignment in type resolution means most fields with the same name but different types are automatically disambiguated, eliminating qualification. 1336 1384 All expressions in the expression list are open in parallel within the compound statement. … … 1362 1410 \end{cfa} 1363 1411 A cast or qualification can be used to disambiguate variables within a ©with© \emph{statement}. 1364 A cast can be used to disambiguate among overload variables in a ©with© \emph{expression}:1412 A cast can also be used to disambiguate among overload variables in a ©with© \emph{expression}: 1365 1413 \begin{cfa} 1366 1414 with ( w ) { ... } $\C{// ambiguous, same name and no context}$ … … 1371 1419 Finally, there is an interesting problem between parameters and the function-body ©with©, \eg: 1372 1420 \begin{cfa} 1373 void ?{}( S & s, int i ) with ( s ) { $\C{// constructor}$ 1374 ®s.i = i;® j = 3; m = 5.5; $\C{// initialize fields}$ 1375 } 1376 \end{cfa} 1377 Here, the assignment ©s.i = i© means ©s.i = s.i©, which is meaningless, and there is no mechanism to qualify the parameter ©i©, making the assignment impossible using the function-body ©with©. 1378 To solve this problem, parameters are treated like an initialized aggregate: 1379 \begin{cfa} 1380 struct Params { 1381 S & s; 1382 int i; 1421 void f( S & s, char c ) with ( s ) { 1422 ®s.c = c;® i = 3; d = 5.5; $\C{// initialize fields}$ 1423 } 1424 \end{cfa} 1425 Here, the assignment ©s.c = c© means ©s.c = s.c©, which is meaningless, and there is no mechanism to qualify the parameter ©c©, making the assignment impossible using the function-body ©with©. 1426 To solve this problem, parameters \emph{not} explicitly opened are treated like an initialized aggregate: 1427 \begin{cfa} 1428 struct Params { $\C{// s explicitly opened so S \& s elided}$ 1429 char c; 1383 1430 } params; 1384 1431 \end{cfa} 1385 1432 and implicitly opened \emph{after} a function-body open, to give them higher priority: 1386 1433 \begin{cfa} 1387 void ?{}( S & s, int ®i® ) with ( s ) ®with( $\emph{\R{params}}$ )® { // syntax not allowed, illustration only1388 s. i = ®i®; j = 3; m= 5.5;1434 void f( S & s, char ®c® ) with ( s ) ®with( $\emph{\R{params}}$ )® { // syntax not allowed, illustration only 1435 s.c = ®c;® i = 3; d = 5.5; 1389 1436 } 1390 1437 \end{cfa} 1391 1438 This implicit semantic matches with programmer expectation. 1392 1393 1439 1394 1440 … … 3397 3443 This requirement is the same as for comma expressions in argument lists. 3398 3444 3399 Type qualifiers, \ie const and volatile, may modify a tuple type.3400 The meaning is t he same as for a type qualifier modifying an aggregate type [Int99, x 6.5.2.3(7),x 6.7.3(11)], \ie the qualifier is distributedacross all of the types in the tuple, \eg:3445 Type qualifiers, \ie ©const© and ©volatile©, may modify a tuple type. 3446 The meaning is to distribute the qualifier across all of the types in the tuple, \eg: 3401 3447 \begin{cfa} 3402 3448 const volatile [ int, float, const int ] x; … … 3597 3643 Stream ©exit© implicitly returns ©EXIT_FAILURE© to the shell. 3598 3644 \begin{cfa} 3599 ®exit® | "x (" | x | ") negative value."; // terminate and return EXIT_FAILURE to shell3600 ®abort® | "x (" | x | ") negative value."; // terminate and generate stack trace and core file3645 ®exit® | "x (" | x | ") negative value."; // terminate and return EXIT_FAILURE to shell 3646 ®abort® | "x (" | x | ") negative value."; // terminate and generate stack trace and core file 3601 3647 \end{cfa} 3602 3648 Note, \CFA stream variables ©stdin©, ©stdout©, ©stderr©, ©exit©, and ©abort© overload C variables ©stdin©, ©stdout©, ©stderr©, and functions ©exit© and ©abort©, respectively. … … 4267 4313 sout | '1' | '2' | '3'; 4268 4314 sout | 1 | "" | 2 | "" | 3; 4269 sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x =" | 4 | "x $" | 5 | "x £" | 6 | "x¥"4270 | 7 | "x ¡" | 8 | "x ¿" | 9 | "x«" | 10;4315 sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x =" | 4 | "x $" | 5 | "x £" | 6 | "x Â¥" 4316 | 7 | "x ¡" | 8 | "x ¿" | 9 | "x «" | 10; 4271 4317 sout | 1 | ", x" | 2 | ". x" | 3 | "; x" | 4 | "! x" | 5 | "? x" | 6 | "% x" 4272 | 7 | " ¢ x" | 8 | "» x" | 9 | ") x" | 10 | "] x" | 11 | "} x";4318 | 7 | "¢ x" | 8 | "» x" | 9 | ") x" | 10 | "] x" | 11 | "} x"; 4273 4319 sout | "x`" | 1 | "`x'" | 2 | "'x\"" | 3 | "\"x:" | 4 | ":x " | 5 | " x\t" | 6 | "\tx"; 4274 4320 sout | "x ( " | 1 | " ) x" | 2 | " , x" | 3 | " :x: " | 4; … … 4446 4492 The common usage is the short form of the mutex statement\index{ostream@©ostream©!mutex@©mutex©} to lock a stream during a single cascaded I/O expression, \eg: 4447 4493 \begin{cfa} 4448 $\emph{thread\(_1\)}$ : ®mutex( )® sout | "abc " | "def ";4449 $\emph{thread\(_2\)}$ : ®mutex( )® sout | "uvw " | "xyz ";4494 $\emph{thread\(_1\)}$ : ®mutex( sout )® sout | "abc " | "def "; 4495 $\emph{thread\(_2\)}$ : ®mutex( sout )® sout | "uvw " | "xyz "; 4450 4496 \end{cfa} 4451 4497 Now, the order of the thread execution is still non-deterministic, but the output is constrained to two possible lines in either order. … … 4470 4516 ®mutex( sout )® { 4471 4517 sout | 1; 4472 ®mutex( ) sout® | 2 | 3; $\C{// unnecessary, but ok because of recursive lock}$4518 ®mutex( sout ) sout® | 2 | 3; $\C{// unnecessary, but ok because of recursive lock}$ 4473 4519 sout | 4; 4474 4520 } // implicitly release sout lock … … 4482 4528 int x, y, z, w; 4483 4529 sin | x; 4484 ®mutex( ) sin® | y | z;$\C{// unnecessary, but ok because of recursive lock}$4530 ®mutex( sin )® sin | y | z; $\C{// unnecessary, but ok because of recursive lock}$ 4485 4531 sin | w; 4486 4532 } // implicitly release sin lock … … 4491 4537 \Textbf{WARNING:} The general problem of \Index{nested locking} can occur if routines are called in an I/O sequence that block, \eg: 4492 4538 \begin{cfa} 4493 ®mutex( ) sout®| "data:" | rtn( mon ); $\C{// mutex call on monitor}$4539 ®mutex( sout )® sout | "data:" | rtn( mon ); $\C{// mutex call on monitor}$ 4494 4540 \end{cfa} 4495 4541 If the thread executing the I/O expression blocks in the monitor with the ©sout© lock, other threads writing to ©sout© also block until the thread holding the lock is unblocked and releases it. … … 4498 4544 \begin{cfa} 4499 4545 int ®data® = rtn( mon ); 4500 mutex() sout | "data:" | ®data®; 4501 \end{cfa} 4546 mutex( sout ) sout | "data:" | ®data®; 4547 \end{cfa} 4548 4549 4550 \subsection{Locale} 4551 \index{stream!locale} 4552 \index{locale!stream} 4553 4554 Cultures use different syntax, called a \newterm{locale}, for printing numbers so they are easier to read, \eg: 4555 \begin{cfa} 4556 12®,®345®.®123 $\C[1.25in]{// comma separator, period decimal-point}$ 4557 12®.®345®,®123 $\C{// period separator, comma decimal-point}$ 4558 12$\Sp$345®,®123®.® $\C{// space separator, comma decimal-point, period terminator}\CRT$ 4559 \end{cfa} 4560 A locale is selected with function ©setlocale©, and the corresponding locale package \emph{must} be installed on the underlying system; 4561 ©setlocale© returns ©0p© if the requested locale is unavailable. 4562 Furthermore, a locale covers the syntax for many cultural items, \eg address, measurement, money, etc. 4563 This discussion applies to item ©LC_NUMERIC© for formatting non-monetary integral and floating-point values. 4564 \VRef[Figure]{f:StreamLocale} shows selecting different cultural syntax, which may be associated with one or more countries. 4565 4566 \begin{figure} 4567 \begin{cfa} 4568 #include <fstream.hfa> 4569 #include <locale.h> $\C{// setlocale}$ 4570 #include <stdlib.h> $\C{// getenv}$ 4571 4572 int main() { 4573 void print() { 4574 sout | 12 | 123 | 1234 | 12345 | 123456 | 1234567; 4575 sout | 12. | 123.1 | 1234.12 | 12345.123 | 123456.1234 | 1234567.12345; 4576 sout | nl; 4577 } 4578 sout | "Default locale off"; 4579 print(); 4580 sout | "Locale on" | ®setlocale( LC_NUMERIC, getenv( "LANG" ) )®; // enable local locale 4581 print(); 4582 sout | "German" | ®setlocale( LC_NUMERIC, "de_DE.UTF-8" )®; // enable German locale 4583 print(); 4584 sout | "Ukraine" | ®setlocale( LC_NUMERIC, "uk_UA.utf8" )®; // enable Ukraine locale 4585 print(); 4586 sout | "Default locale off" | ®setlocale( LC_NUMERIC, "C" )®; // disable locale 4587 print(); 4588 } 4589 4590 Default locale off 4591 12 123 1234 12345 123456 1234567 4592 12. 123.1 1234.12 12345.123 123456.1234 1234567.12345 4593 4594 Locale on en_US.UTF-8 4595 12 123 1®,®234 12®,®345 123®,®456 1®,®234®,®567 4596 12®.® 123®.®1 1®,®234®.®12 12®,®345®.®123 123®,®456®.®1234 1®,®234®,®567®.®12345 4597 4598 German de_DE.UTF-8 4599 12 123 1®.®234 12®.®345 123®.®456 1®.®234®.®567 4600 12®.® 123®,®1®.® 1®.®234®,®12 12®.®345®,®123 123®.®456®,®1234 1®.®234®.®567®,®12345 4601 4602 Ukraine uk_UA.utf8 4603 12 123 1 234 12 345 123 456 1 234 567 4604 12®.® 123®,®1®.® 1$\Sp$234®,®12®.® 12$\Sp$ 345®,®123®.® 123$\Sp$ 456®,®1234®.® 1$\Sp$ 234$\Sp$567®,®12345®.® 4605 4606 Default locale off C 4607 12 123 1234 12345 123456 1234567 4608 12. 123.1 1234.12 12345.123 123456.1234 1234567.12345 4609 \end{cfa} 4610 \caption{Stream Locale} 4611 \label{f:StreamLocale} 4612 \end{figure} 4502 4613 4503 4614 … … 4555 4666 \end{figure} 4556 4667 4668 4557 4669 \begin{comment} 4558 4670 \section{Types} … … 4637 4749 4638 4750 4639 \s ubsection{Structures}4751 \section{Structures} 4640 4752 4641 4753 Structures in \CFA are basically the same as structures in C. … … 5270 5382 \subsection{Coroutine} 5271 5383 5272 \Index{Coroutines} are the precursor to t asks.5384 \Index{Coroutines} are the precursor to threads. 5273 5385 \VRef[Figure]{f:FibonacciCoroutine} shows a coroutine that computes the \Index*{Fibonacci} numbers. 5274 5386 … … 5372 5484 5373 5485 5374 \subsection{T asks}5486 \subsection{Threads} 5375 5487 5376 5488 \CFA also provides a simple mechanism for creating and utilizing user level threads. 5377 A t askprovides mutual exclusion like a monitor, and also has its own execution state and a thread of control.5378 Similar to a monitor, a t askis defined like a structure:5489 A thread provides mutual exclusion like a monitor, and also has its own execution state and a thread of control. 5490 Similar to a monitor, a thread is defined like a structure: 5379 5491 5380 5492 \begin{figure} … … 5420 5532 } 5421 5533 \end{cfa} 5422 \caption{Simple T asks}5423 \label{f:SimpleT asks}5534 \caption{Simple Threads} 5535 \label{f:SimpleThreads} 5424 5536 \end{figure} 5425 5537 … … 6788 6900 In \CFA, there are ambiguous cases with dereference and operator identifiers, \eg ©int *?*?()©, where the string ©*?*?© can be interpreted as: 6789 6901 \begin{cfa} 6790 *?$\ R{\textvisiblespace}$*? $\C{// dereference operator, dereference operator}$6791 *$\ R{\textvisiblespace}$?*? $\C{// dereference, multiplication operator}$6902 *?$\Sp$*? $\C{// dereference operator, dereference operator}$ 6903 *$\Sp$?*? $\C{// dereference, multiplication operator}$ 6792 6904 \end{cfa} 6793 6905 By default, the first interpretation is selected, which does not yield a meaningful parse. … … 6813 6925 Therefore, it is necessary to disambiguate these cases with a space: 6814 6926 \begin{cfa} 6815 i++$\ R{\textvisiblespace}$? i : 0;6816 i?$\ R{\textvisiblespace}$++i : 0;6927 i++$\Sp$? i : 0; 6928 i?$\Sp$++i : 0; 6817 6929 \end{cfa} 6818 6930 … … 7430 7542 char random( void );$\indexc{random}$ 7431 7543 char random( char u ); $\C{// [0,u)}$ 7432 char random( char l, char u ); $\C{// [l,u )}$7544 char random( char l, char u ); $\C{// [l,u]}$ 7433 7545 int random( void ); 7434 7546 int random( int u ); $\C{// [0,u)}$ 7435 int random( int l, int u ); $\C{// [l,u )}$7547 int random( int l, int u ); $\C{// [l,u]}$ 7436 7548 unsigned int random( void ); 7437 7549 unsigned int random( unsigned int u ); $\C{// [0,u)}$ 7438 unsigned int random( unsigned int l, unsigned int u ); $\C{// [l,u )}$7550 unsigned int random( unsigned int l, unsigned int u ); $\C{// [l,u]}$ 7439 7551 long int random( void ); 7440 7552 long int random( long int u ); $\C{// [0,u)}$ 7441 long int random( long int l, long int u ); $\C{// [l,u )}$7553 long int random( long int l, long int u ); $\C{// [l,u]}$ 7442 7554 unsigned long int random( void ); 7443 7555 unsigned long int random( unsigned long int u ); $\C{// [0,u)}$ 7444 unsigned long int random( unsigned long int l, unsigned long int u ); $\C{// [l,u )}$7556 unsigned long int random( unsigned long int l, unsigned long int u ); $\C{// [l,u]}$ 7445 7557 float random( void ); $\C{// [0.0, 1.0)}$ 7446 7558 double random( void ); $\C{// [0.0, 1.0)}$ … … 8106 8218 8107 8219 8220 \section{Pseudo Random Number Generator} 8221 \label{s:PRNG} 8222 8223 Random numbers are values generated independently, i.e., new values do not depend on previous values (independent trials), \eg lottery numbers, shuffled cards, dice roll, coin flip. 8224 While a primary goal of programming is computing values that are \emph{not} random, random values are useful in simulation, cryptography, games, etc. 8225 A random-number generator is an algorithm that computes independent values. 8226 If the algorithm uses deterministic computation (a predictable sequence of values), it generates \emph{pseudo} random numbers versus \emph{true} random numbers. 8227 8228 All \newterm{pseudo random-number generators} (\newterm{PRNG}) involve some technique to scramble bits of a value, \eg multiplicative recurrence: 8229 \begin{cfa} 8230 rand = 36973 * (rand & 65535) + (rand >> 16); // scramble bits 8231 \end{cfa} 8232 Multiplication of large values adds new least-significant bits and drops most-significant bits. 8233 \begin{quote} 8234 \begin{tabular}{@{}r|l@{}} 8235 bits 63--32 (most) & bits 31--0 (least) \\ 8236 \hline 8237 0x0 & 0x3e8e36 \\ 8238 0x5f & 0x718c25e1 \\ 8239 0xad3e & 0x7b5f1dbe \\ 8240 0xbc3b & 0xac69ff19 \\ 8241 0x1070f & 0x2d258dc6 \\ 8242 \end{tabular} 8243 \end{quote} 8244 By dropping bits 63--32, bits 31--0 become scrambled after each multiply. 8245 The least-significant bits \emph{appear} random but the same bits are always generated given a fixed starting value, called the \newterm{seed} (value 0x3e8e36 above). 8246 Hence, if a program uses the same seed, the same sequence of pseudo-random values is generated from the PRNG. 8247 Often the seed is set to another random value like a program's process identifier (©getpid©\index{getpid@©getpid©}) or time when the program is run; 8248 hence, one random value bootstraps another. 8249 Finally, a PRNG usually generates a range of large values, \eg ©[0, UINT_MAX]©, which are scaled using the modulus operator, \eg ©prng() % 5© produces random values in the range 0--4. 8250 8251 \CFA provides a sequential PRNG type only accessible by a single thread (not thread-safe) and a set of global and companion thread PRNG functions accessible by multiple threads without contention. 8252 \begin{itemize} 8253 \item 8254 The ©PRNG© type is for sequential programs, like coroutining: 8255 \begin{cfa} 8256 struct PRNG { ... }; $\C[3.75in]{// opaque type}$ 8257 void ?{}( PRNG & prng ); $\C{// random seed}$ 8258 void ?{}( PRNG & prng, uint32_t seed ); $\C{// fixed seed}$ 8259 void set_seed( PRNG & prng, uint32_t seed ); $\C{// set seed}$ 8260 uint32_t get_seed( PRNG & prng ); $\C{// get seed}$ 8261 uint32_t prng( PRNG & prng ); $\C{// [0,UINT\_MAX]}$ 8262 uint32_t prng( PRNG & prng, uint32_t u ); $\C{// [0,u)}$ 8263 uint32_t prng( PRNG & prng, uint32_t l, uint32_t u ); $\C{// [l,u]}$ 8264 uint32_t calls( PRNG & prng ); $\C{// number of calls}\CRT$ 8265 \end{cfa} 8266 A ©PRNG© object is used to randomize behaviour or values during execution, \eg in games, a character makes a random move or an object takes on a random value. 8267 In this scenario, it is useful to have multiple ©PRNG© objects, \eg one per player or object. 8268 However, sequential execution is still repeatable given the same starting seeds for all ©PRNG©s. 8269 \VRef[Figure]{f:SequentialPRNG} shows an example that creates two sequential ©PRNG©s, sets both to the same seed (1009), and illustrates the three forms for generating random values, where both ©PRNG©s generate the same sequence of values. 8270 8271 \begin{figure} 8272 \begin{cfa} 8273 PRNG prng1, prng2; 8274 ®set_seed( prng1, 1009 )®; ®set_seed( prng2, 1009 )®; 8275 for ( 10 ) { 8276 // Do not cascade prng calls because side-effect functions called in arbitrary order. 8277 sout | nlOff | ®prng( prng1 )®; sout | ®prng( prng1, 5 )®; sout | ®prng( prng1, 0, 5 )® | '\t'; 8278 sout | ®prng( prng2 )®; sout | ®prng( prng2, 5 )®; sout | ®prng( prng2, 0, 5 )® | nlOn; 8279 } 8280 \end{cfa} 8281 \begin{cquote} 8282 \begin{tabular}{@{}ll@{}} 8283 \begin{cfa} 8284 37301721 2 2 8285 1681308562 1 3 8286 290112364 3 2 8287 1852700364 4 3 8288 733221210 1 3 8289 1775396023 2 3 8290 123981445 2 3 8291 2062557687 2 0 8292 283934808 1 0 8293 672325890 1 3 8294 \end{cfa} 8295 & 8296 \begin{cfa} 8297 37301721 2 2 8298 1681308562 1 3 8299 290112364 3 2 8300 1852700364 4 3 8301 733221210 1 3 8302 1775396023 2 3 8303 123981445 2 3 8304 2062557687 2 0 8305 283934808 1 0 8306 672325890 1 3 8307 \end{cfa} 8308 \end{tabular} 8309 \end{cquote} 8310 \caption{Sequential PRNG} 8311 \label{f:SequentialPRNG} 8312 \end{figure} 8313 8314 \item 8315 The PRNG global and companion thread functions are for concurrent programming, such as randomizing execution in short-running programs, \eg ©yield( prng() % 5 )©. 8316 \begin{cfa} 8317 void set_seed( uint32_t seed ); $\C[3.75in]{// set global seed}$ 8318 uint32_t get_seed(); $\C{// get global seed}$ 8319 // SLOWER 8320 uint32_t prng(); $\C{// [0,UINT\_MAX]}$ 8321 uint32_t prng( uint32_t u ); $\C{// [0,u)}$ 8322 uint32_t prng( uint32_t l, uint32_t u ); $\C{// [l,u]}$ 8323 // FASTER 8324 uint32_t prng( $thread\LstStringStyle{\textdollar}$ & th ); $\C{// [0,UINT\_MAX]}$ 8325 uint32_t prng( $thread\LstStringStyle{\textdollar}$ & th, uint32_t u ); $\C{// [0,u)}$ 8326 uint32_t prng( $thread\LstStringStyle{\textdollar}$ & th, uint32_t l, uint32_t u ); $\C{// [l,u]}\CRT$ 8327 \end{cfa} 8328 The only difference between the two sets of ©prng© routines is performance. 8329 8330 Because concurrent execution is non-deterministic, seeding the concurrent PRNG is less important, as repeatable execution is impossible. 8331 Hence, there is one system-wide PRNG (global seed) but each \CFA thread has its own non-contended PRNG state. 8332 If the global seed is set, threads start with this seed, until it is reset and then threads start with the reset seed. 8333 Hence, these threads generate the same sequence of random numbers from their specific starting seed. 8334 If the global seed is \emph{not} set, threads start with a random seed, until the global seed is set. 8335 Hence, these threads generate different sequences of random numbers. 8336 If each thread needs its own seed, use a sequential ©PRNG© in each thread. 8337 The slower ©prng© functions \emph{without} a thread argument call ©active_thread© internally to indirectly access the current thread's PRNG state, while the faster ©prng© functions \emph{with} a thread argument directly access the thread through the thread parameter. 8338 If a thread pointer is available, \eg in thread main, eliminating the call to ©active_thread© significantly reduces the cost of accessing the thread's PRNG state. 8339 \VRef[Figure]{f:ConcurrentPRNG} shows an example using the slower/faster concurrent PRNG in the program main and a thread. 8340 8341 \begin{figure} 8342 \begin{cfa} 8343 thread T {}; 8344 void main( ®T & th® ) { // thread address 8345 for ( i; 10 ) { 8346 sout | nlOff | ®prng()®; sout | ®prng( 5 )®; sout | ®prng( 0, 5 )® | '\t'; // SLOWER 8347 sout | nlOff | ®prng( th )®; sout | ®prng( th, 5 )®; sout | ®prng( th, 0, 5 )® | nlOn; // FASTER 8348 } 8349 } 8350 int main() { 8351 set_seed( 1009 ); 8352 $\R{thread\LstStringStyle{\textdollar}}$ ®& th = *active_thread()®; // program-main thread-address 8353 for ( i; 10 ) { 8354 sout | nlOff | ®prng()®; sout | ®prng( 5 )®; sout | ®prng( 0, 5 )® | '\t'; // SLOWER 8355 sout | nlOff | ®prng( th )®; sout | ®prng( th, 5 )®; sout | ®prng( th, 0, 5 )® | nlOn; // FASTER 8356 } 8357 sout | nl; 8358 T t; // run thread 8359 } 8360 \end{cfa} 8361 \begin{cquote} 8362 \begin{tabular}{@{}ll@{}} 8363 \begin{cfa} 8364 37301721 2 2 8365 290112364 3 2 8366 733221210 1 3 8367 123981445 2 3 8368 283934808 1 0 8369 1414344101 1 3 8370 871831898 3 4 8371 2142057611 4 4 8372 802117363 0 4 8373 2346353643 1 3 8374 \end{cfa} 8375 & 8376 \begin{cfa} 8377 1681308562 1 3 8378 1852700364 4 3 8379 1775396023 2 3 8380 2062557687 2 0 8381 672325890 1 3 8382 873424536 3 4 8383 866783532 0 1 8384 17310256 2 5 8385 492964499 0 0 8386 2143013105 3 2 8387 \end{cfa} 8388 \end{tabular} 8389 \begin{cfa} 8390 // same output as above from thread t 8391 \end{cfa} 8392 \end{cquote} 8393 \caption{Concurrent PRNG} 8394 \label{f:ConcurrentPRNG} 8395 \end{figure} 8396 \end{itemize} 8397 8398 8108 8399 \section{Multi-precision Integers} 8109 8400 \label{s:MultiPrecisionIntegers} … … 8310 8601 \end{tabular} 8311 8602 \end{cquote} 8312 \small 8603 8313 8604 \begin{cfa} 8314 8605 Factorial Numbers -
driver/Makefile.am
rf5a51db rcc7bbe6 19 19 20 20 # applies to both programs 21 AM_CXXFLAGS = @HOST_FLAGS@ -Wall - O2 -g -std=c++14 -I${abs_top_srcdir}/src -I${abs_top_srcdir}/src/include21 AM_CXXFLAGS = @HOST_FLAGS@ -Wall -Wextra -Werror=return-type -O2 -g -std=c++14 -I${abs_top_srcdir}/src -I${abs_top_srcdir}/src/include 22 22 23 23 # don't install cfa directly -
driver/cc1.cc
rf5a51db rcc7bbe6 10 10 // Created On : Fri Aug 26 14:23:51 2005 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Jul 21 09:46:24 202113 // Update Count : 4 1912 // Last Modified On : Thu Feb 17 18:04:23 2022 13 // Update Count : 422 14 14 // 15 15 … … 61 61 static string __CFA_FLAGPREFIX__( "__CFA_FLAG" ); // "__CFA_FLAG__=" suffix 62 62 63 static void checkEnv1( const char * args[], int & nargs ) {// stage 163 static void checkEnv1() { // stage 1 64 64 extern char ** environ; 65 65 … … 155 155 cerr << "Stage1" << endl; 156 156 #endif // __DEBUG_H__ 157 checkEnv1( args, nargs );// arguments passed via environment variables157 checkEnv1(); // arguments passed via environment variables 158 158 #ifdef __DEBUG_H__ 159 159 for ( int i = 1; i < argc; i += 1 ) { -
libcfa/prelude/Makefile.am
rf5a51db rcc7bbe6 26 26 27 27 CC = @LOCAL_CFACC@ 28 AM_CFLAGS = -g -Wall -W no-unused-function -fPIC @ARCH_FLAGS@ @CONFIG_CFLAGS@28 AM_CFLAGS = -g -Wall -Werror=return-type -Wno-unused-function -fPIC @ARCH_FLAGS@ @CONFIG_CFLAGS@ 29 29 AM_CFAFLAGS = @CONFIG_CFAFLAGS@ 30 30 -
libcfa/src/Makefile.am
rf5a51db rcc7bbe6 33 33 # The built sources must not depend on the installed inst_headers_src 34 34 AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr -I$(srcdir)/concurrency $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@ 35 AM_CFLAGS = -g -Wall -W no-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@36 AM_CCASFLAGS = -g -Wall -W no-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@35 AM_CFLAGS = -g -Wall -Werror=return-type -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@ 36 AM_CCASFLAGS = -g -Wall -Werror=return-type -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@ 37 37 CFACC = @CFACC@ 38 38 -
libcfa/src/concurrency/io/setup.cfa
rf5a51db rcc7bbe6 32 32 33 33 void __cfa_io_start( processor * proc ) {} 34 bool __cfa_io_flush( processor * proc, int ) { }34 bool __cfa_io_flush( processor * proc, int ) { return false; } 35 35 void __cfa_io_stop ( processor * proc ) {} 36 36 -
libcfa/src/concurrency/kernel.hfa
rf5a51db rcc7bbe6 173 173 174 174 static inline void ?{}(__timestamp_t & this) { this.tv = 0; this.ma = 0; } 175 static inline void ^?{}(__timestamp_t & this) {}175 static inline void ^?{}(__timestamp_t &) {} 176 176 177 177 struct __attribute__((aligned(128))) __ready_queue_caches_t; -
libcfa/src/concurrency/kernel/startup.cfa
rf5a51db rcc7bbe6 18 18 19 19 // C Includes 20 #include <errno.h> 20 #include <errno.h> // errno 21 21 #include <signal.h> 22 #include <string.h> 23 #include <unistd.h> 22 #include <string.h> // strerror 23 #include <unistd.h> // sysconf 24 24 25 25 extern "C" { 26 #include <limits.h>// PTHREAD_STACK_MIN27 #include <unistd.h> 28 #include <sys/eventfd.h> 29 #include <sys/mman.h>// mprotect30 #include <sys/resource.h>// getrlimit26 #include <limits.h> // PTHREAD_STACK_MIN 27 #include <unistd.h> // syscall 28 #include <sys/eventfd.h> // eventfd 29 #include <sys/mman.h> // mprotect 30 #include <sys/resource.h> // getrlimit 31 31 } 32 32 33 33 // CFA Includes 34 34 #include "kernel_private.hfa" 35 #include "startup.hfa" 35 #include "startup.hfa" // STARTUP_PRIORITY_XXX 36 36 #include "limits.hfa" 37 37 #include "math.hfa" … … 102 102 extern void __wake_proc(processor *); 103 103 extern int cfa_main_returned; // from interpose.cfa 104 extern uint32_t __global_random_seed;104 uint32_t __global_random_prime = 4_294_967_291u, __global_random_mask = false; 105 105 106 106 //----------------------------------------------------------------------------- … … 490 490 preferred = ready_queue_new_preferred(); 491 491 last_proc = 0p; 492 random_state = __global_random_ seed;492 random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl(); 493 493 #if defined( __CFA_WITH_VERIFY__ ) 494 494 canary = 0x0D15EA5E0D15EA5Ep; … … 736 736 check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute 737 737 738 size_t stacksize = DEFAULT_STACK_SIZE;738 size_t stacksize = max( PTHREAD_STACK_MIN, DEFAULT_STACK_SIZE ); 739 739 740 740 void * stack; -
libcfa/src/concurrency/preemption.cfa
rf5a51db rcc7bbe6 10 10 // Created On : Mon Jun 5 14:20:42 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Nov 6 07:42:13 202013 // Update Count : 5 412 // Last Modified On : Thu Feb 17 11:18:57 2022 13 // Update Count : 59 14 14 // 15 15 … … 243 243 //---------- 244 244 // special case for preemption since used often 245 bool __preemption_enabled() {245 __attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() { 246 246 // create a assembler label before 247 247 // marked as clobber all to avoid movement -
libcfa/src/concurrency/thread.cfa
rf5a51db rcc7bbe6 10 10 // Created On : Tue Jan 17 12:27:26 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 13 20:11:55202213 // Update Count : 4212 // Last Modified On : Sat Feb 12 15:24:18 2022 13 // Update Count : 66 14 14 // 15 15 … … 25 25 #include "invoke.h" 26 26 27 extern uint32_t __global_random_seed ;27 extern uint32_t __global_random_seed, __global_random_prime, __global_random_mask; 28 28 29 29 //----------------------------------------------------------------------------- … … 45 45 preferred = ready_queue_new_preferred(); 46 46 last_proc = 0p; 47 random_state = __global_random_ seed;47 random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl(); 48 48 #if defined( __CFA_WITH_VERIFY__ ) 49 49 canary = 0x0D15EA5E0D15EA5Ep; … … 176 176 177 177 void set_seed( uint32_t seed ) { 178 active_thread()->random_state = __global_random_seed = seed; 179 GENERATOR( active_thread()->random_state ); 178 uint32_t & state = active_thread()->random_state; 179 state = __global_random_seed = seed; 180 GENERATOR( state ); 181 __global_random_prime = state; 182 __global_random_mask = true; 180 183 } // set_seed 181 uint32_t prng( void ) { return GENERATOR( active_thread()->random_state ); } // [0,UINT_MAX] 184 185 uint32_t prng( void ) { // [0,UINT_MAX] 186 uint32_t & state = active_thread()->random_state; 187 return GENERATOR( state ); 188 } // prng 182 189 183 190 // Local Variables: // -
libcfa/src/concurrency/thread.hfa
rf5a51db rcc7bbe6 10 10 // Created On : Tue Jan 17 12:27:26 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 6 16:40:16202213 // Update Count : 712 // Last Modified On : Fri Feb 11 16:34:07 2022 13 // Update Count : 20 14 14 // 15 15 … … 130 130 T & join( T & this ); 131 131 132 //---------- 133 // prng 134 static inline { 135 uint32_t prng( thread$ & th ) __attribute__(( warn_unused_result )) { return LCG( th.random_state ); } // [0,UINT_MAX] 136 uint32_t prng( thread$ & th, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( th ) % u; } // [0,u) 137 uint32_t prng( thread$ & th, uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( th, u - l + 1 ) + l; } // [l,u] 138 forall( T & | is_thread(T) ) { 139 uint32_t prng( T & th ) __attribute__(( warn_unused_result )) { return prng( (thread &)th ); } // [0,UINT_MAX] 140 uint32_t prng( T & th, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( th ) % u; } // [0,u) 141 uint32_t prng( T & th, uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( th, u - l + 1 ) + l; } // [l,u] 142 } // distribution 143 } // distribution 144 132 145 // Local Variables: // 133 146 // mode: c // -
libcfa/src/math.trait.hfa
rf5a51db rcc7bbe6 16 16 #pragma once 17 17 18 trait Not( T) {19 void ?{}( T&, zero_t );20 int !?( T);18 trait Not( U ) { 19 void ?{}( U &, zero_t ); 20 int !?( U ); 21 21 }; // Not 22 22 … … 26 26 }; // Equality 27 27 28 trait Relational( T | Equality( T) ) {29 int ?<?( T, T);30 int ?<=?( T, T);31 int ?>?( T, T);32 int ?>=?( T, T);28 trait Relational( U | Equality( U ) ) { 29 int ?<?( U, U ); 30 int ?<=?( U, U ); 31 int ?>?( U, U ); 32 int ?>=?( U, U ); 33 33 }; // Relational 34 34 … … 39 39 }; // Signed 40 40 41 trait Additive( T | Signed( T) ) {42 T ?+?( T, T);43 T ?-?( T, T);44 T ?+=?( T &, T);45 T ?-=?( T &, T);41 trait Additive( U | Signed( U ) ) { 42 U ?+?( U, U ); 43 U ?-?( U, U ); 44 U ?+=?( U &, U ); 45 U ?-=?( U &, U ); 46 46 }; // Additive 47 47 … … 49 49 void ?{}( T &, one_t ); 50 50 // T ?++( T & ); 51 // T ++?( T & );51 // T ++?( T & ); 52 52 // T ?--( T & ); 53 53 // T --?( T & ); 54 54 }; // Incdec 55 55 56 trait Multiplicative( T | Incdec( T) ) {57 T ?*?( T, T);58 T ?/?( T, T);59 T ?%?( T, T);60 T ?/=?( T &, T);56 trait Multiplicative( U | Incdec( U ) ) { 57 U ?*?( U, U ); 58 U ?/?( U, U ); 59 U ?%?( U, U ); 60 U ?/=?( U &, U ); 61 61 }; // Multiplicative 62 62 -
libcfa/src/stdlib.cfa
rf5a51db rcc7bbe6 10 10 // Created On : Thu Jan 28 17:10:29 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 13 21:38:30202213 // Update Count : 59312 // Last Modified On : Thu Feb 10 22:41:39 2022 13 // Update Count : 602 14 14 // 15 15 … … 226 226 227 227 uint32_t __global_random_seed; // sequential/concurrent 228 uint32_t __global_random_state; // sequential only228 uint32_t __global_random_state; // sequential only 229 229 230 230 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; GENERATOR( state ); } // set seed 231 uint32_t prng( PRNG & prng ) with( prng ) { callcnt += 1; return GENERATOR( state ); } 232 233 void set_seed( uint32_t seed ) { __global_random_seed = seed; GENERATOR( __global_random_state ); } 231 232 void set_seed( uint32_t seed ) { __global_random_state = __global_random_seed = seed; GENERATOR( __global_random_state ); } 234 233 uint32_t get_seed() { return __global_random_seed; } 235 234 uint32_t prng( void ) { return GENERATOR( __global_random_state ); } // [0,UINT_MAX] -
libcfa/src/stdlib.hfa
rf5a51db rcc7bbe6 10 10 // Created On : Thu Jan 28 17:12:35 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Jan 13 21:34:46202213 // Update Count : 6 3612 // Last Modified On : Sat Feb 12 17:22:25 2022 13 // Update Count : 643 14 14 // 15 15 … … 18 18 #include "bits/defs.hfa" // OPTIONAL_THREAD 19 19 #include "bits/align.hfa" // libAlign 20 #include "bits/random.hfa" // prng 20 21 21 22 #include <stdlib.h> // *alloc, strto*, ato* … … 208 209 209 210 forall( TT... | { T * alloc_internal$( void *, T *, size_t, size_t, S_fill(T), TT ); } ) { 210 T * alloc_internal$( void * , T * Realloc, size_t Align, size_t Dim, S_fill(T) Fill, T_resize Resize, TT rest) {211 T * alloc_internal$( void * , T * , size_t Align, size_t Dim, S_fill(T) Fill, T_resize Resize, TT rest) { 211 212 return alloc_internal$( Resize, (T*)0p, Align, Dim, Fill, rest); 212 213 } 213 214 214 T * alloc_internal$( void * Resize, T * , size_t Align, size_t Dim, S_fill(T) Fill, S_realloc(T) Realloc, TT rest) {215 T * alloc_internal$( void * , T * , size_t Align, size_t Dim, S_fill(T) Fill, S_realloc(T) Realloc, TT rest) { 215 216 return alloc_internal$( (void*)0p, Realloc, Align, Dim, Fill, rest); 216 217 } … … 388 389 // Declaration : 389 390 // PRNG sprng = { 1009 } - set starting seed versus random seed 390 // 391 // 391 392 // Interface : 392 393 // set_seed( sprng, 1009 ) - set starting seed for ALL kernel threads versus random seed … … 410 411 411 412 void set_seed( PRNG & prng, uint32_t seed_ ); 412 uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX]413 413 static inline { 414 void ?{}( PRNG & prng ) { set_seed( prng, rdtscl() ); }// random seed415 void ?{}( PRNG & prng, uint32_t seed ) {set_seed( prng, seed ); } // fixed seed414 void ?{}( PRNG & prng ) with( prng ) { callcnt = 0; set_seed( prng, rdtscl() ); } // random seed 415 void ?{}( PRNG & prng, uint32_t seed ) with( prng ) { callcnt = 0; set_seed( prng, seed ); } // fixed seed 416 416 uint32_t get_seed( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { return seed; } // get seed 417 uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return LCG( state ); } // [0,UINT_MAX] 417 418 uint32_t prng( PRNG & prng, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( prng ) % u; } // [0,u) 418 419 uint32_t prng( PRNG & prng, uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( prng, u - l + 1 ) + l; } // [l,u] -
longrun_tests/Makefile.am
rf5a51db rcc7bbe6 36 36 -g \ 37 37 -Wall \ 38 -Wextra \ 39 -Werror=return-type 38 40 -Wno-unused-function \ 39 41 -quiet \ -
src/AST/Convert.hpp
rf5a51db rcc7bbe6 20 20 class Declaration; 21 21 namespace ast { 22 structTranslationUnit;22 class TranslationUnit; 23 23 }; 24 24 -
src/AST/Decl.cpp
rf5a51db rcc7bbe6 39 39 if ( uniqueId ) return; // ensure only set once 40 40 uniqueId = ++lastUniqueId; 41 idMap[ uniqueId ] = this; 41 // The extra readonly pointer is causing some reference counting issues. 42 // idMap[ uniqueId ] = this; 42 43 } 43 44 44 45 readonly<Decl> Decl::fromId( UniqueId id ) { 46 // Right now this map is always empty, so don't use it. 47 assert( false ); 45 48 IdMapType::const_iterator i = idMap.find( id ); 46 49 if ( i != idMap.end() ) return i->second; -
src/AST/Fwd.hpp
rf5a51db rcc7bbe6 140 140 typedef unsigned int UniqueId; 141 141 142 structTranslationUnit;142 class TranslationUnit; 143 143 // TODO: Get from the TranslationUnit: 144 144 extern ptr<Type> sizeType; -
src/AST/Pass.hpp
rf5a51db rcc7bbe6 239 239 private: 240 240 241 // Regular nodes 241 __pass::result1<ast::Stmt> call_accept( const ast::Stmt * ); 242 __pass::result1<ast::Expr> call_accept( const ast::Expr * ); 243 244 /// This has a `type` member that is the return type for the 245 /// generic call_accept if the generic call_accept is defined. 242 246 template< typename node_t > 243 struct result1 { 244 bool differs; 245 const node_t * value; 246 247 template< typename object_t, typename super_t, typename field_t > 248 void apply(object_t *, field_t super_t::* field); 249 }; 250 251 result1<ast::Stmt> call_accept( const ast::Stmt * ); 252 result1<ast::Expr> call_accept( const ast::Expr * ); 247 using generic_call_accept_result = 248 std::enable_if< 249 !std::is_base_of<ast::Expr, node_t>::value && 250 !std::is_base_of<ast::Stmt, node_t>::value 251 , __pass::result1< 252 typename std::remove_pointer< typename std::result_of< 253 decltype(&node_t::accept)(node_t*, type&) >::type >::type 254 > 255 >; 253 256 254 257 template< typename node_t > 255 258 auto call_accept( const node_t * node ) 256 -> typename std::enable_if< 257 !std::is_base_of<ast::Expr, node_t>::value && 258 !std::is_base_of<ast::Stmt, node_t>::value 259 , result1< 260 typename std::remove_pointer< decltype( node->accept(*this) ) >::type 261 > 262 >::type; 259 -> typename generic_call_accept_result<node_t>::type; 263 260 264 261 // requests WithStmtsToAdd directly add to this statement, as if it is a compound. 265 result1<ast::Stmt> call_accept_as_compound(const ast::Stmt *); 266 267 template<typename it_t, template <class...> class container_t> 268 static inline void take_all_delta( it_t it, container_t<ast::ptr<ast::Decl>> * decls, bool * mutated = nullptr ) { 269 if(empty(decls)) return; 270 271 std::transform(decls->begin(), decls->end(), it, [](ast::ptr<ast::Decl>&& decl) -> auto { 272 auto loc = decl->location; 273 auto stmt = new DeclStmt( loc, decl.release() ); 274 return { {stmt}, -1, false }; 275 }); 276 decls->clear(); 277 if(mutated) *mutated = true; 278 } 279 280 // Container of statements 262 __pass::result1<ast::Stmt> call_accept_as_compound(const ast::Stmt *); 263 281 264 template< template <class...> class container_t > 282 struct resultNstmt { 283 struct delta { 284 ptr<Stmt> nval; 285 ssize_t old_idx; 286 bool is_old; 287 288 delta(const Stmt * s, ssize_t i, bool old) : nval{s}, old_idx{i}, is_old{old} {} 289 }; 290 291 bool differs; 292 container_t< delta > values; 293 294 resultNstmt() : differs(false), values{} {} 295 resultNstmt(bool diff, container_t< delta > && vals) : differs(diff), values(vals) {} 296 297 template< typename object_t, typename super_t, typename field_t > 298 void apply(object_t *, field_t super_t::* field); 299 300 template< template <class...> class incontainer_t > 301 void take_all( incontainer_t<ast::ptr<ast::Stmt>> * stmts ) { 302 if(!stmts || stmts->empty()) return; 303 304 std::transform(stmts->begin(), stmts->end(), std::back_inserter( values ), [](ast::ptr<ast::Stmt>& decl) -> delta { 305 return delta( decl.release(), -1, false ); 306 }); 307 stmts->clear(); 308 differs = true; 309 } 310 311 template< template <class...> class incontainer_t > 312 void take_all( incontainer_t<ast::ptr<ast::Decl>> * decls ) { 313 if(!decls || decls->empty()) return; 314 315 std::transform(decls->begin(), decls->end(), std::back_inserter( values ), [](ast::ptr<ast::Decl>& decl) -> auto { 316 auto loc = decl->location; 317 auto stmt = new DeclStmt( loc, decl.release() ); 318 return delta( stmt, -1, false ); 319 }); 320 decls->clear(); 321 differs = true; 322 } 323 }; 324 325 template< template <class...> class container_t > 326 resultNstmt<container_t> call_accept( const container_t< ptr<Stmt> > & ); 327 328 // Container of something 265 __pass::resultNstmt<container_t> call_accept( const container_t< ptr<Stmt> > & ); 266 329 267 template< template <class...> class container_t, typename node_t > 330 struct resultN { 331 bool differs; 332 container_t<ptr<node_t>> values; 333 334 template< typename object_t, typename super_t, typename field_t > 335 void apply(object_t *, field_t super_t::* field); 336 }; 337 338 template< template <class...> class container_t, typename node_t > 339 resultN< container_t, node_t > call_accept( const container_t< ptr<node_t> > & container ); 268 __pass::resultN< container_t, node_t > call_accept( const container_t< ptr<node_t> > & container ); 340 269 341 270 public: -
src/AST/Pass.impl.hpp
rf5a51db rcc7bbe6 111 111 } 112 112 113 114 113 //------------------------------ 115 114 /// Check if value was mutated, different for pointers and containers … … 125 124 } 126 125 127 128 template< typename core_t >129 126 template< typename node_t > 130 127 template< typename object_t, typename super_t, typename field_t > 131 void ast::Pass< core_t >::result1< node_t >::apply(object_t * object, field_t super_t::* field) {128 void __pass::result1< node_t >::apply( object_t * object, field_t super_t::* field ) { 132 129 object->*field = value; 133 130 } … … 136 133 template< typename node_t > 137 134 auto ast::Pass< core_t >::call_accept( const node_t * node ) 138 -> typename std::enable_if< 139 !std::is_base_of<ast::Expr, node_t>::value && 140 !std::is_base_of<ast::Stmt, node_t>::value 141 , ast::Pass< core_t >::result1< 142 typename std::remove_pointer< decltype( node->accept(*this) ) >::type 143 > 144 >::type 135 -> typename ast::Pass< core_t >::template generic_call_accept_result<node_t>::type 145 136 { 146 137 __pedantic_pass_assert( __visit_children() ); … … 151 142 152 143 auto nval = node->accept( *this ); 153 ast::Pass< core_t >::result1<144 __pass::result1< 154 145 typename std::remove_pointer< decltype( node->accept(*this) ) >::type 155 146 > res; … … 160 151 161 152 template< typename core_t > 162 ast::Pass< core_t >::result1<ast::Expr> ast::Pass< core_t >::call_accept( const ast::Expr * expr ) {153 __pass::template result1<ast::Expr> ast::Pass< core_t >::call_accept( const ast::Expr * expr ) { 163 154 __pedantic_pass_assert( __visit_children() ); 164 155 __pedantic_pass_assert( expr ); … … 174 165 175 166 template< typename core_t > 176 ast::Pass< core_t >::result1<ast::Stmt> ast::Pass< core_t >::call_accept( const ast::Stmt * stmt ) {167 __pass::template result1<ast::Stmt> ast::Pass< core_t >::call_accept( const ast::Stmt * stmt ) { 177 168 __pedantic_pass_assert( __visit_children() ); 178 169 __pedantic_pass_assert( stmt ); … … 183 174 184 175 template< typename core_t > 185 ast::Pass< core_t >::result1<ast::Stmt> ast::Pass< core_t >::call_accept_as_compound( const ast::Stmt * stmt ) {176 __pass::template result1<ast::Stmt> ast::Pass< core_t >::call_accept_as_compound( const ast::Stmt * stmt ) { 186 177 __pedantic_pass_assert( __visit_children() ); 187 178 __pedantic_pass_assert( stmt ); … … 233 224 } 234 225 235 template< typename core_t >236 226 template< template <class...> class container_t > 237 227 template< typename object_t, typename super_t, typename field_t > 238 void ast::Pass< core_t >::resultNstmt<container_t>::apply(object_t * object, field_t super_t::* field) {228 void __pass::resultNstmt<container_t>::apply(object_t * object, field_t super_t::* field) { 239 229 auto & container = object->*field; 240 230 __pedantic_pass_assert( container.size() <= values.size() ); … … 243 233 244 234 container_t<ptr<Stmt>> nvals; 245 for (delta & d : values) {246 if ( d.is_old ) {235 for (delta & d : values) { 236 if ( d.is_old ) { 247 237 __pedantic_pass_assert( cit.idx <= d.old_idx ); 248 238 std::advance( cit, d.old_idx - cit.idx ); 249 239 nvals.push_back( std::move( (*cit).val) ); 250 240 } else { 251 nvals.push_back( std::move(d.n val) );241 nvals.push_back( std::move(d.new_val) ); 252 242 } 253 243 } 254 244 255 object->*field = std::move(nvals); 245 container = std::move(nvals); 246 } 247 248 template< template <class...> class container_t > 249 template< template <class...> class incontainer_t > 250 void __pass::resultNstmt< container_t >::take_all( incontainer_t<ptr<Stmt>> * stmts ) { 251 if (!stmts || stmts->empty()) return; 252 253 std::transform(stmts->begin(), stmts->end(), std::back_inserter( values ), 254 [](ast::ptr<ast::Stmt>& stmt) -> delta { 255 return delta( stmt.release(), -1, false ); 256 }); 257 stmts->clear(); 258 differs = true; 259 } 260 261 template< template<class...> class container_t > 262 template< template<class...> class incontainer_t > 263 void __pass::resultNstmt< container_t >::take_all( incontainer_t<ptr<Decl>> * decls ) { 264 if (!decls || decls->empty()) return; 265 266 std::transform(decls->begin(), decls->end(), std::back_inserter( values ), 267 [](ast::ptr<ast::Decl>& decl) -> delta { 268 auto loc = decl->location; 269 auto stmt = new DeclStmt( loc, decl.release() ); 270 return delta( stmt, -1, false ); 271 }); 272 decls->clear(); 273 differs = true; 256 274 } 257 275 258 276 template< typename core_t > 259 277 template< template <class...> class container_t > 260 ast::Pass< core_t >::resultNstmt<container_t> ast::Pass< core_t >::call_accept( const container_t< ptr<Stmt> > & statements ) {278 __pass::template resultNstmt<container_t> ast::Pass< core_t >::call_accept( const container_t< ptr<Stmt> > & statements ) { 261 279 __pedantic_pass_assert( __visit_children() ); 262 280 if( statements.empty() ) return {}; … … 285 303 pass_visitor_stats.avg->push(pass_visitor_stats.depth); 286 304 287 resultNstmt<container_t> new_kids;305 __pass::resultNstmt<container_t> new_kids; 288 306 for( auto value : enumerate( statements ) ) { 289 307 try { … … 327 345 } 328 346 329 template< typename core_t >330 347 template< template <class...> class container_t, typename node_t > 331 348 template< typename object_t, typename super_t, typename field_t > 332 void ast::Pass< core_t >::resultN<container_t, node_t>::apply(object_t * object, field_t super_t::* field) {349 void __pass::resultN<container_t, node_t>::apply(object_t * object, field_t super_t::* field) { 333 350 auto & container = object->*field; 334 351 __pedantic_pass_assert( container.size() == values.size() ); … … 346 363 template< typename core_t > 347 364 template< template <class...> class container_t, typename node_t > 348 ast::Pass< core_t >::resultN<container_t, node_t> ast::Pass< core_t >::call_accept( const container_t< ast::ptr<node_t> > & container ) {365 __pass::template resultN<container_t, node_t> ast::Pass< core_t >::call_accept( const container_t< ast::ptr<node_t> > & container ) { 349 366 __pedantic_pass_assert( __visit_children() ); 350 367 if( container.empty() ) return {}; … … 378 395 if ( ! errors.isEmpty() ) { throw errors; } 379 396 380 return ast:: Pass< core_t >::resultN<container_t, node_t>{ mutated,new_kids };397 return ast::__pass::resultN<container_t, node_t>{ mutated, new_kids }; 381 398 } 382 399 -
src/AST/Pass.proto.hpp
rf5a51db rcc7bbe6 23 23 class Pass; 24 24 25 structTranslationUnit;25 class TranslationUnit; 26 26 27 27 struct PureVisitor; … … 123 123 static constexpr bool value = std::is_void< ret_t >::value || 124 124 std::is_base_of<const node_t, typename std::remove_pointer<ret_t>::type >::value; 125 }; 126 127 /// The result is a single node. 128 template< typename node_t > 129 struct result1 { 130 bool differs; 131 const node_t * value; 132 133 template< typename object_t, typename super_t, typename field_t > 134 void apply( object_t *, field_t super_t::* field ); 135 }; 136 137 /// The result is a container of statements. 138 template< template<class...> class container_t > 139 struct resultNstmt { 140 /// The delta/change on a single node. 141 struct delta { 142 ptr<Stmt> new_val; 143 ssize_t old_idx; 144 bool is_old; 145 146 delta(const Stmt * s, ssize_t i, bool old) : 147 new_val(s), old_idx(i), is_old(old) {} 148 }; 149 150 bool differs; 151 container_t< delta > values; 152 153 template< typename object_t, typename super_t, typename field_t > 154 void apply( object_t *, field_t super_t::* field ); 155 156 template< template<class...> class incontainer_t > 157 void take_all( incontainer_t<ptr<Stmt>> * stmts ); 158 159 template< template<class...> class incontainer_t > 160 void take_all( incontainer_t<ptr<Decl>> * decls ); 161 }; 162 163 /// The result is a container of nodes. 164 template< template<class...> class container_t, typename node_t > 165 struct resultN { 166 bool differs; 167 container_t<ptr<node_t>> values; 168 169 template< typename object_t, typename super_t, typename field_t > 170 void apply( object_t *, field_t super_t::* field ); 125 171 }; 126 172 -
src/AST/TranslationUnit.hpp
rf5a51db rcc7bbe6 23 23 namespace ast { 24 24 25 struct TranslationUnit { 25 class TranslationUnit { 26 public: 26 27 std::list< ptr< Decl > > decls; 27 28 -
src/AST/module.mk
rf5a51db rcc7bbe6 16 16 17 17 SRC_AST = \ 18 AST/AssertAcyclic.cpp \19 AST/AssertAcyclic.hpp \20 18 AST/Attribute.cpp \ 21 19 AST/Attribute.hpp \ … … 64 62 AST/TypeSubstitution.cpp \ 65 63 AST/TypeSubstitution.hpp \ 64 AST/Util.cpp \ 65 AST/Util.hpp \ 66 66 AST/Visitor.hpp 67 67 -
src/CodeGen/FixNames.h
rf5a51db rcc7bbe6 20 20 class Declaration; 21 21 namespace ast { 22 structTranslationUnit;22 class TranslationUnit; 23 23 } 24 24 -
src/Common/CodeLocation.h
rf5a51db rcc7bbe6 33 33 34 34 CodeLocation( const CodeLocation& rhs ) = default; 35 CodeLocation( CodeLocation&& rhs ) = default; 36 CodeLocation& operator=( const CodeLocation & ) = default; 37 CodeLocation& operator=( CodeLocation && ) = default; 35 38 36 39 bool isSet () const { -
src/Common/CodeLocationTools.hpp
rf5a51db rcc7bbe6 17 17 18 18 namespace ast { 19 structTranslationUnit;19 class TranslationUnit; 20 20 } 21 21 -
src/Common/ResolvProtoDump.hpp
rf5a51db rcc7bbe6 17 17 18 18 namespace ast { 19 structTranslationUnit;19 class TranslationUnit; 20 20 } 21 21 -
src/Concurrency/Waitfor.cc
rf5a51db rcc7bbe6 372 372 ), 373 373 new ListInit( 374 map_range < std::list<Initializer*> > ( clause.target.arguments, [this](Expression * expr ){ 375 Expression * init = new CastExpr( 376 new UntypedExpr( 377 new NameExpr( "get_monitor" ), 378 { expr } 379 ), 380 new PointerType( 381 noQualifiers, 382 new StructInstType( 383 noQualifiers, 384 decl_monitor 385 ) 386 ), 387 false 388 ); 389 390 ResolvExpr::findSingleExpression( init, indexer ); 391 return new SingleInit( init ); 374 map_range < std::list<Initializer*> > ( clause.target.arguments, [](Expression * expr ){ 375 return new SingleInit( expr ); 392 376 }) 393 377 ) -
src/ControlStruct/MLEMutator.cc
rf5a51db rcc7bbe6 66 66 67 67 // break labels have to come after the statement they break out of, so mutate a statement, then if they inform us 68 // through the breakLabel field tha they need a place to jump to on a break statement, add the break label to the68 // through the breakLabel field that they need a place to jump to on a break statement, add the break label to the 69 69 // body of statements 70 70 void MultiLevelExitMutator::fixBlock( std::list< Statement * > &kids, bool caseClause ) { -
src/ControlStruct/MultiLevelExit.cpp
rf5a51db rcc7bbe6 176 176 auto mutStmt = mutate( stmt ); 177 177 // A child statement may set the break label. 178 mutStmt->kids = move( fixBlock( stmt->kids, false ));178 mutStmt->kids = fixBlock( stmt->kids, false ); 179 179 180 180 if ( isLabeled ) { -
src/InitTweak/FixInit.h
rf5a51db rcc7bbe6 21 21 class Declaration; 22 22 namespace ast { 23 structTranslationUnit;23 class TranslationUnit; 24 24 } 25 25 -
src/MakeLibCfa.h
rf5a51db rcc7bbe6 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // MakeLibCfa.h -- 7 // MakeLibCfa.h -- 8 8 // 9 9 // Author : Richard C. Bilson … … 20 20 class Declaration; 21 21 namespace ast { 22 structTranslationUnit;22 class TranslationUnit; 23 23 } 24 24 -
src/Makefile.am
rf5a51db rcc7bbe6 59 59 60 60 $(srcdir)/AST/Type.hpp : BasicTypes-gen.cc 61 ${AM_V_GEN}${CXXCOMPILE} $< -o BasicTypes-gen -Wall -Wextra 61 ${AM_V_GEN}${CXXCOMPILE} $< -o BasicTypes-gen -Wall -Wextra -Werror=return-type 62 62 @./BasicTypes-gen 63 63 @rm BasicTypes-gen … … 71 71 EXTRA_DIST = include/cassert include/optional BasicTypes-gen.cc 72 72 73 AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra - DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O3 -g -std=c++14 $(TCMALLOCFLAG)73 AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -Werror=return-type -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O3 -g -std=c++14 $(TCMALLOCFLAG) 74 74 AM_LDFLAGS = @HOST_FLAGS@ -Xlinker -export-dynamic 75 75 ARFLAGS = cr -
src/Parser/parser.yy
rf5a51db rcc7bbe6 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Tue Feb 1 11:06:13202213 // Update Count : 5 16712 // Last Modified On : Sat Feb 19 09:47:20 2022 13 // Update Count : 5218 14 14 // 15 15 … … 1052 1052 identifier_or_type_name ':' attribute_list_opt statement 1053 1053 { $$ = $4->add_label( $1, $3 ); } 1054 | identifier_or_type_name ':' attribute_list_opt error 1055 { SemanticError( yylloc, "previous label must be associated with a statement (where a declaration is not a statement). Move the label or terminate with a semi-colon." ); $$ = nullptr; } 1054 1056 ; 1055 1057 … … 1086 1088 | statement_list_nodecl statement 1087 1089 { assert( $1 ); $1->set_last( $2 ); $$ = $1; } 1090 | statement_list_nodecl error 1091 { SemanticError( yylloc, "declarations only allowed at the start of the switch body, i.e., after the '{'." ); $$ = nullptr; } 1088 1092 ; 1089 1093 … … 1093 1097 | MUTEX '(' ')' comma_expression ';' 1094 1098 { $$ = new StatementNode( build_mutex( nullptr, new StatementNode( build_expr( $4 ) ) ) ); } 1095 // { SemanticError( yylloc, "Mutex expression is currently unimplemented." ); $$ = nullptr; }1096 1099 ; 1097 1100 … … 1113 1116 $$ = $7 ? new StatementNode( build_compound( (StatementNode *)((new StatementNode( $7 ))->set_last( sw )) ) ) : sw; 1114 1117 } 1118 | SWITCH '(' comma_expression ')' '{' error '}' // CFA 1119 { SemanticError( yylloc, "only declarations can appear before the list of case clauses." ); $$ = nullptr; } 1115 1120 | CHOOSE '(' comma_expression ')' case_clause // CFA 1116 1121 { $$ = new StatementNode( build_switch( false, $3, $5 ) ); } … … 1120 1125 $$ = $7 ? new StatementNode( build_compound( (StatementNode *)((new StatementNode( $7 ))->set_last( sw )) ) ) : sw; 1121 1126 } 1127 | CHOOSE '(' comma_expression ')' '{' error '}' // CFA 1128 { SemanticError( yylloc, "only declarations can appear before the list of case clauses." ); $$ = nullptr; } 1122 1129 ; 1123 1130 … … 1158 1165 1159 1166 case_label: // CFA 1160 CASE case_value_list ':' { $$ = $2; } 1167 CASE error 1168 { SemanticError( yylloc, "missing case list after case." ); $$ = nullptr; } 1169 | CASE case_value_list ':' { $$ = $2; } 1170 | CASE case_value_list error 1171 { SemanticError( yylloc, "missing colon after case list." ); $$ = nullptr; } 1161 1172 | DEFAULT ':' { $$ = new StatementNode( build_default() ); } 1162 1173 // A semantic check is required to ensure only one default clause per switch/choose statement. 1163 ; 1164 1165 //label_list_opt: 1166 // // empty 1167 // | identifier_or_type_name ':' 1168 // | label_list_opt identifier_or_type_name ':' 1169 // ; 1174 | DEFAULT error 1175 { SemanticError( yylloc, "missing colon after default." ); $$ = nullptr; } 1176 ; 1170 1177 1171 1178 case_label_list: // CFA … … 1197 1204 { $$ = new StatementNode( build_while( $3, maybe_build_compound( $5 ) ) ); } 1198 1205 | WHILE '(' conditional_declaration ')' statement ELSE statement // CFA 1199 // { SemanticError( yylloc, "Loop default block is currently unimplemented." ); $$ = nullptr; }1200 1206 { $$ = new StatementNode( build_while( $3, maybe_build_compound( $5 ), $7 ) ); } 1201 1207 | DO statement WHILE '(' ')' ';' // CFA => do while( 1 ) … … 1204 1210 { $$ = new StatementNode( build_do_while( $5, maybe_build_compound( $2 ) ) ); } 1205 1211 | DO statement WHILE '(' comma_expression ')' ELSE statement // CFA 1206 // { SemanticError( yylloc, "Loop default block is currently unimplemented." ); $$ = nullptr; }1207 1212 { $$ = new StatementNode( build_do_while( $5, maybe_build_compound( $2 ), $8 ) ); } 1208 1213 | FOR '(' ')' statement // CFA => for ( ;; ) … … 1211 1216 { $$ = new StatementNode( build_for( $3, maybe_build_compound( $5 ) ) ); } 1212 1217 | FOR '(' for_control_expression_list ')' statement ELSE statement // CFA 1213 // { SemanticError( yylloc, "Loop default block is currently unimplemented." ); $$ = nullptr; }1214 1218 { $$ = new StatementNode( build_for( $3, maybe_build_compound( $5 ), $7 ) ); } 1215 1219 ; … … 2729 2733 | ASM '(' string_literal ')' ';' // GCC, global assembler statement 2730 2734 { $$ = DeclarationNode::newAsmStmt( new StatementNode( build_asm( false, $3, 0 ) ) ); } 2735 | EXTERN STRINGliteral 2736 { 2737 linkageStack.push( linkage ); // handle nested extern "C"/"Cforall" 2738 linkage = LinkageSpec::update( yylloc, linkage, $2 ); 2739 } 2740 up external_definition down 2741 { 2742 linkage = linkageStack.top(); 2743 linkageStack.pop(); 2744 $$ = $5; 2745 } 2731 2746 | EXTERN STRINGliteral // C++-style linkage specifier 2732 2747 { -
src/ResolvExpr/Resolver.cc
rf5a51db rcc7bbe6 1112 1112 } 1113 1113 1114 1114 1115 1115 } // anonymous namespace 1116 1116 /// Establish post-resolver invariants for expressions … … 1158 1158 1159 1159 namespace { 1160 1160 1161 1161 1162 1162 /// resolve `untyped` to the expression whose candidate satisfies `pred` with the … … 1905 1905 1906 1906 clause2.target.args.reserve( clause.target.args.size() ); 1907 const ast::StructDecl * decl_monitor = symtab.lookupStruct( "monitor$" ); 1907 1908 for ( auto arg : argsCandidates.front() ) { 1908 clause2.target.args.emplace_back( std::move( arg->expr ) ); 1909 const auto & loc = stmt->location; 1910 1911 ast::Expr * init = new ast::CastExpr( loc, 1912 new ast::UntypedExpr( loc, 1913 new ast::NameExpr( loc, "get_monitor" ), 1914 { arg->expr } 1915 ), 1916 new ast::PointerType( 1917 new ast::StructInstType( 1918 decl_monitor 1919 ) 1920 ) 1921 ); 1922 1923 clause2.target.args.emplace_back( findSingleExpression( init, symtab ) ); 1909 1924 } 1910 1925 … … 2077 2092 if (auto functionDecl = decl.as<ast::FunctionDecl>()) { 2078 2093 // xxx - can intrinsic gen ever fail? 2079 if (functionDecl->linkage == ast::Linkage::AutoGen) { 2094 if (functionDecl->linkage == ast::Linkage::AutoGen) { 2080 2095 auto mutDecl = mutate(functionDecl); 2081 2096 mutDecl->isDeleted = true; -
src/ResolvExpr/Resolver.h
rf5a51db rcc7bbe6 35 35 class StmtExpr; 36 36 class SymbolTable; 37 structTranslationUnit;37 class TranslationUnit; 38 38 class Type; 39 39 class TypeEnvironment; … … 72 72 ast::ptr< ast::Init > resolveCtorInit( 73 73 const ast::ConstructorInit * ctorInit, const ast::SymbolTable & symtab ); 74 /// Resolves a statement expression 74 /// Resolves a statement expression 75 75 const ast::Expr * resolveStmtExpr( 76 76 const ast::StmtExpr * stmtExpr, const ast::SymbolTable & symtab ); -
src/SymTab/Validate.cc
rf5a51db rcc7bbe6 194 194 }; 195 195 196 // These structs are the sub-sub-passes of ForallPointerDecay_old. 197 198 struct TraitExpander_old final { 199 void previsit( FunctionType * ); 200 void previsit( StructDecl * ); 201 void previsit( UnionDecl * ); 202 }; 203 204 struct AssertionFixer_old final { 205 void previsit( FunctionType * ); 206 void previsit( StructDecl * ); 207 void previsit( UnionDecl * ); 208 }; 209 210 struct CheckOperatorTypes_old final { 211 void previsit( ObjectDecl * ); 212 }; 213 214 struct FixUniqueIds_old final { 215 void previsit( DeclarationWithType * ); 216 }; 217 196 218 struct ReturnChecker : public WithGuards { 197 219 /// Checks that return statements return nothing if their return type is void … … 386 408 387 409 void validate_D( std::list< Declaration * > & translationUnit ) { 388 PassVisitor<ForallPointerDecay_old> fpd;389 410 { 390 411 Stats::Heap::newPass("validate-D"); … … 394 415 }); 395 416 Stats::Time::TimeBlock("Forall Pointer Decay", [&]() { 396 acceptAll( translationUnit, fpd); // must happen before autogenerateRoutines, after Concurrency::applyKeywords because uniqueIds must be set on declaration before resolution417 decayForallPointers( translationUnit ); // must happen before autogenerateRoutines, after Concurrency::applyKeywords because uniqueIds must be set on declaration before resolution 397 418 }); 398 419 Stats::Time::TimeBlock("Hoist Control Declarations", [&]() { … … 454 475 455 476 void decayForallPointers( std::list< Declaration * > & translationUnit ) { 456 PassVisitor<ForallPointerDecay_old> fpd; 457 acceptAll( translationUnit, fpd ); 477 PassVisitor<TraitExpander_old> te; 478 acceptAll( translationUnit, te ); 479 PassVisitor<AssertionFixer_old> af; 480 acceptAll( translationUnit, af ); 481 PassVisitor<CheckOperatorTypes_old> cot; 482 acceptAll( translationUnit, cot ); 483 PassVisitor<FixUniqueIds_old> fui; 484 acceptAll( translationUnit, fui ); 485 } 486 487 void decayForallPointersA( std::list< Declaration * > & translationUnit ) { 488 PassVisitor<TraitExpander_old> te; 489 acceptAll( translationUnit, te ); 490 } 491 void decayForallPointersB( std::list< Declaration * > & translationUnit ) { 492 PassVisitor<AssertionFixer_old> af; 493 acceptAll( translationUnit, af ); 494 } 495 void decayForallPointersC( std::list< Declaration * > & translationUnit ) { 496 PassVisitor<CheckOperatorTypes_old> cot; 497 acceptAll( translationUnit, cot ); 498 } 499 void decayForallPointersD( std::list< Declaration * > & translationUnit ) { 500 PassVisitor<FixUniqueIds_old> fui; 501 acceptAll( translationUnit, fui ); 458 502 } 459 503 … … 470 514 PassVisitor<EnumAndPointerDecay_old> epc; 471 515 PassVisitor<LinkReferenceToTypes_old> lrt( indexer ); 472 PassVisitor<ForallPointerDecay_old> fpd; 516 PassVisitor<TraitExpander_old> te; 517 PassVisitor<AssertionFixer_old> af; 518 PassVisitor<CheckOperatorTypes_old> cot; 519 PassVisitor<FixUniqueIds_old> fui; 473 520 type->accept( epc ); 474 521 type->accept( lrt ); 475 type->accept( fpd ); 522 type->accept( te ); 523 type->accept( af ); 524 type->accept( cot ); 525 type->accept( fui ); 476 526 } 477 527 … … 972 1022 } 973 1023 1024 /// Replace all traits in assertion lists with their assertions. 1025 void expandTraits( std::list< TypeDecl * > & forall ) { 1026 for ( TypeDecl * type : forall ) { 1027 std::list< DeclarationWithType * > asserts; 1028 asserts.splice( asserts.end(), type->assertions ); 1029 // expand trait instances into their members 1030 for ( DeclarationWithType * assertion : asserts ) { 1031 if ( TraitInstType * traitInst = dynamic_cast< TraitInstType * >( assertion->get_type() ) ) { 1032 // expand trait instance into all of its members 1033 expandAssertions( traitInst, back_inserter( type->assertions ) ); 1034 delete traitInst; 1035 } else { 1036 // pass other assertions through 1037 type->assertions.push_back( assertion ); 1038 } // if 1039 } // for 1040 } 1041 } 1042 1043 /// Fix each function in the assertion list and check for invalid void type. 1044 void fixAssertions( 1045 std::list< TypeDecl * > & forall, BaseSyntaxNode * node ) { 1046 for ( TypeDecl * type : forall ) { 1047 for ( DeclarationWithType *& assertion : type->assertions ) { 1048 bool isVoid = fixFunction( assertion ); 1049 if ( isVoid ) { 1050 SemanticError( node, "invalid type void in assertion of function " ); 1051 } // if 1052 } // for 1053 } 1054 } 1055 974 1056 void ForallPointerDecay_old::previsit( ObjectDecl * object ) { 975 1057 // ensure that operator names only apply to functions or function pointers … … 994 1076 void ForallPointerDecay_old::previsit( UnionDecl * aggrDecl ) { 995 1077 forallFixer( aggrDecl->parameters, aggrDecl ); 1078 } 1079 1080 void TraitExpander_old::previsit( FunctionType * ftype ) { 1081 expandTraits( ftype->forall ); 1082 } 1083 1084 void TraitExpander_old::previsit( StructDecl * aggrDecl ) { 1085 expandTraits( aggrDecl->parameters ); 1086 } 1087 1088 void TraitExpander_old::previsit( UnionDecl * aggrDecl ) { 1089 expandTraits( aggrDecl->parameters ); 1090 } 1091 1092 void AssertionFixer_old::previsit( FunctionType * ftype ) { 1093 fixAssertions( ftype->forall, ftype ); 1094 } 1095 1096 void AssertionFixer_old::previsit( StructDecl * aggrDecl ) { 1097 fixAssertions( aggrDecl->parameters, aggrDecl ); 1098 } 1099 1100 void AssertionFixer_old::previsit( UnionDecl * aggrDecl ) { 1101 fixAssertions( aggrDecl->parameters, aggrDecl ); 1102 } 1103 1104 void CheckOperatorTypes_old::previsit( ObjectDecl * object ) { 1105 // ensure that operator names only apply to functions or function pointers 1106 if ( CodeGen::isOperator( object->name ) && ! dynamic_cast< FunctionType * >( object->type->stripDeclarator() ) ) { 1107 SemanticError( object->location, toCString( "operator ", object->name.c_str(), " is not a function or function pointer." ) ); 1108 } 1109 } 1110 1111 void FixUniqueIds_old::previsit( DeclarationWithType * decl ) { 1112 decl->fixUniqueId(); 996 1113 } 997 1114 -
src/SymTab/Validate.h
rf5a51db rcc7bbe6 43 43 void validate_F( std::list< Declaration * > &translationUnit ); 44 44 void decayForallPointers( std::list< Declaration * > & translationUnit ); 45 void decayForallPointersA( std::list< Declaration * > & translationUnit ); 46 void decayForallPointersB( std::list< Declaration * > & translationUnit ); 47 void decayForallPointersC( std::list< Declaration * > & translationUnit ); 48 void decayForallPointersD( std::list< Declaration * > & translationUnit ); 45 49 46 50 const ast::Type * validateType( -
src/Validate/module.mk
rf5a51db rcc7bbe6 20 20 Validate/CompoundLiteral.cpp \ 21 21 Validate/CompoundLiteral.hpp \ 22 Validate/ForallPointerDecay.cpp \ 23 Validate/ForallPointerDecay.hpp \ 22 24 Validate/HandleAttributes.cc \ 23 25 Validate/HandleAttributes.h \ -
src/main.cc
rf5a51db rcc7bbe6 32 32 33 33 #include "AST/Convert.hpp" 34 #include "AST/Print.hpp" 34 35 #include "CompilationState.h" 35 36 #include "../config.h" // for CFA_LIBDIR … … 76 77 #include "Validate/Autogen.hpp" // for autogenerateRoutines 77 78 #include "Validate/FindSpecialDecls.h" // for findGlobalDecls 79 #include "Validate/ForallPointerDecay.hpp" // for decayForallPointers 78 80 #include "Validate/CompoundLiteral.hpp" // for handleCompoundLiterals 79 81 #include "Validate/InitializerLength.hpp" // for setLengthFromInitializer … … 331 333 332 334 if( useNewAST ) { 333 PASS( "Apply Concurrent Keywords", Concurrency::applyKeywords( translationUnit ) ); 334 PASS( "Forall Pointer Decay", SymTab::decayForallPointers( translationUnit ) ); 335 PASS( "Implement Concurrent Keywords", Concurrency::applyKeywords( translationUnit ) ); 336 //PASS( "Forall Pointer Decay - A", SymTab::decayForallPointersA( translationUnit ) ); 337 //PASS( "Forall Pointer Decay - B", SymTab::decayForallPointersB( translationUnit ) ); 338 //PASS( "Forall Pointer Decay - C", SymTab::decayForallPointersC( translationUnit ) ); 339 //PASS( "Forall Pointer Decay - D", SymTab::decayForallPointersD( translationUnit ) ); 335 340 CodeTools::fillLocations( translationUnit ); 336 341 … … 342 347 343 348 forceFillCodeLocations( transUnit ); 349 350 // Must be after implement concurrent keywords; because uniqueIds 351 // must be set on declaration before resolution. 352 // Must happen before autogen routines are added. 353 PASS( "Forall Pointer Decay", Validate::decayForallPointers( transUnit ) ); 344 354 345 355 // Must happen before autogen routines are added. -
tests/.expect/declarationSpecifier.arm64.txt
rf5a51db rcc7bbe6 1132 1132 char **_X13cfa_args_argvPPc_1; 1133 1133 char **_X13cfa_args_envpPPc_1; 1134 signed int _X17cfa_main_returnedi_1 = ((signed int )0);1134 __attribute__ ((weak)) extern signed int _X17cfa_main_returnedi_1; 1135 1135 signed int main(signed int _X4argci_1, char **_X4argvPPc_1, char **_X4envpPPc_1){ 1136 1136 __attribute__ ((unused)) signed int _X12_retval_maini_1; … … 1149 1149 signed int _tmp_cp_ret6; 1150 1150 signed int _X3reti_2 = (((void)(_tmp_cp_ret6=invoke_main(_X4argci_1, _X4argvPPc_1, _X4envpPPc_1))) , _tmp_cp_ret6); 1151 { 1152 ((void)(_X17cfa_main_returnedi_1=((signed int )1))); 1151 if ( ((&_X17cfa_main_returnedi_1)!=((signed int *)0)) ) { 1152 { 1153 ((void)(_X17cfa_main_returnedi_1=((signed int )1))); 1154 } 1155 1153 1156 } 1154 1157 -
tests/.expect/gccExtensions.arm64.txt
rf5a51db rcc7bbe6 324 324 char **_X13cfa_args_argvPPc_1; 325 325 char **_X13cfa_args_envpPPc_1; 326 signed int _X17cfa_main_returnedi_1 = ((signed int )0);326 __attribute__ ((weak)) extern signed int _X17cfa_main_returnedi_1; 327 327 signed int main(signed int _X4argci_1, char **_X4argvPPc_1, char **_X4envpPPc_1){ 328 328 __attribute__ ((unused)) signed int _X12_retval_maini_1; … … 341 341 signed int _tmp_cp_ret6; 342 342 signed int _X3reti_2 = (((void)(_tmp_cp_ret6=invoke_main(_X4argci_1, _X4argvPPc_1, _X4envpPPc_1))) , _tmp_cp_ret6); 343 { 344 ((void)(_X17cfa_main_returnedi_1=((signed int )1))); 343 if ( ((&_X17cfa_main_returnedi_1)!=((signed int *)0)) ) { 344 { 345 ((void)(_X17cfa_main_returnedi_1=((signed int )1))); 346 } 347 345 348 } 346 349 -
tests/.expect/random.arm64.txt
rf5a51db rcc7bbe6 1 1 õ 2 2 = 3 V 3 K 4 4 -911259971 5 5 6 6 -4 6 11 7 7 1232105397 8 8 0 9 1 89 11 10 10 -914096085 11 11 1 12 15 12 20 13 13 2077092859 14 14 1 15 1 115 12 16 16 0.677254 17 17 0.678106775246139 -
tests/Makefile.am
rf5a51db rcc7bbe6 43 43 -g \ 44 44 -Wall \ 45 -Werror=return-type \ 45 46 -Wno-unused-function \ 46 47 -Wno-psabi \ -
tests/meta/dumpable.cfa
rf5a51db rcc7bbe6 72 72 } 73 73 74 if((buf.f_bsize * buf.f_bavail) < 536870912) { 75 serr | "Available diskspace is less than ~500Mb: " | (buf.f_bsize * buf.f_bavail); 74 uint64_t avail = buf.f_bavail; 75 avail *= buf.f_bsize; 76 if(avail < 536870912_l64u) { 77 serr | "Available diskspace is less than ~500Mb: " | avail; 76 78 } 77 79 -
tests/unified_locking/mutex_test.hfa
rf5a51db rcc7bbe6 54 54 } 55 55 56 inttest() {56 void test() { 57 57 uint32_t sum = -32; 58 58 mo.sum = -32; -
tools/Makefile.am
rf5a51db rcc7bbe6 19 19 20 20 noinst_PROGRAMS = busy catchsig repeat watchdog 21 AM_CFLAGS = -Wall -Wextra - O2 -g21 AM_CFLAGS = -Wall -Wextra -Werror=return-type -O2 -g 22 22 busy_LDFLAGS = -pthread 23 23 -
tools/prettyprinter/Makefile.am
rf5a51db rcc7bbe6 32 32 nodist_pretty_SOURCES = ${SRC} 33 33 pretty_LDADD = ${LEXLIB} -ldl # yywrap 34 pretty_CXXFLAGS = -Wno-deprecated -Wall - DYY_NO_INPUT -O2 -g -std=c++1434 pretty_CXXFLAGS = -Wno-deprecated -Wall -Wextra -Werror=return-type -DYY_NO_INPUT -O2 -g -std=c++14 35 35 36 36 MOSTLYCLEANFILES = parser.output
Note:
See TracChangeset
for help on using the changeset viewer.