Changes in / [bd72f517:7d02d35]


Ignore:
Files:
20 added
26 deleted
26 edited

Legend:

Unmodified
Added
Removed
  • Jenkins/FullBuild

    rbd72f517 r7d02d35  
    1818
    1919                                parallel (
    20                                         //gcc_9_x86_new: { trigger_build( 'gcc-9',   'x86', false ) },
    21                                         //gcc_10_x86_new: { trigger_build( 'gcc-10',  'x86', false ) },
    22                                         gcc_7_x64_new: { trigger_build( 'gcc-7',   'x64', false ) },
    23                                         gcc_8_x64_new: { trigger_build( 'gcc-8',   'x64', false ) },
    24                                         gcc_9_x64_new: { trigger_build( 'gcc-9',   'x64', false ) },
     20                                        gcc_07_x86_new: { trigger_build( 'gcc-9',   'x86', false ) },
     21                                        gcc_08_x86_new: { trigger_build( 'gcc-10',  'x86', false ) },
     22                                        gcc_07_x64_new: { trigger_build( 'gcc-7',   'x64', false ) },
     23                                        gcc_08_x64_new: { trigger_build( 'gcc-8',   'x64', false ) },
     24                                        gcc_09_x64_new: { trigger_build( 'gcc-9',   'x64', false ) },
    2525                                        gcc_10_x64_new: { trigger_build( 'gcc-10',  'x64', false ) },
    2626                                        gcc_11_x64_new: { trigger_build( 'gcc-11',  'x64', false ) },
    27                                         //gcc_10_arm64_new: { trigger_build( 'gcc-10', 'arm64', false ) },
    28                                         //gcc_11_arm64_new: { trigger_build( 'gcc-11', 'arm64', false ) },
    29                                         //gcc_12_arm64_new: { trigger_build( 'gcc-12', 'arm64', false ) },
     27                                        gcc_10_arm64_new: { trigger_build( 'gcc-10', 'arm64', false ) },
     28                                        gcc_11_arm64_new: { trigger_build( 'gcc-11', 'arm64', false ) },
     29                                        gcc_12_arm64_new: { trigger_build( 'gcc-12', 'arm64', false ) },
    3030                                        clang_x64_new:  { trigger_build( 'clang',   'x64', true  ) },
    3131                                )
  • Jenkinsfile

    rbd72f517 r7d02d35  
    290290        BuildSettings(java.util.Collections$UnmodifiableMap param, String branch) {
    291291                switch( param.Compiler ) {
    292                         // case 'gcc-4.9':
    293                         //      this.Compiler = new CC_Desc('gcc-4.9', 'g++-4.9', 'gcc-4.9', '-flto=auto')
     292                        case 'gcc-11':
     293                                this.Compiler = new CC_Desc('gcc-11', 'g++-11', 'gcc-11', '-flto=auto')
     294                        break
     295                        case 'gcc-10':
     296                                this.Compiler = new CC_Desc('gcc-10', 'g++-10', 'gcc-10', '-flto=auto')
     297                        break
     298                        case 'gcc-9':
     299                                this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9', '-flto=auto')
     300                        break
     301                        case 'gcc-8':
     302                                this.Compiler = new CC_Desc('gcc-8', 'g++-8', 'gcc-8', '-flto=auto')
     303                        break
     304                        case 'gcc-7':
     305                                this.Compiler = new CC_Desc('gcc-7', 'g++-7', 'gcc-7', '-flto=auto')
     306                        break
     307                        // case 'gcc-6':
     308                        //      this.Compiler = new CC_Desc('gcc-6', 'g++-6', 'gcc-6', '-flto=auto')
    294309                        // break
    295310                        // case 'gcc-5':
    296311                        //      this.Compiler = new CC_Desc('gcc-5', 'g++-5', 'gcc-5', '-flto=auto')
    297312                        // break
    298                         // case 'gcc-6':
    299                         //      this.Compiler = new CC_Desc('gcc-6', 'g++-6', 'gcc-6', '-flto=auto')
     313                        // case 'gcc-4.9':
     314                        //      this.Compiler = new CC_Desc('gcc-4.9', 'g++-4.9', 'gcc-4.9', '-flto=auto')
    300315                        // break
    301                         case 'gcc-7':
    302                                 this.Compiler = new CC_Desc('gcc-7', 'g++-7', 'gcc-7', '-flto=auto')
    303                         break
    304                         case 'gcc-8':
    305                                 this.Compiler = new CC_Desc('gcc-8', 'g++-8', 'gcc-8', '-flto=auto')
    306                         break
    307                         case 'gcc-9':
    308                                 this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9', '-flto=auto')
    309                         break
    310                         case 'gcc-10':
    311                                 this.Compiler = new CC_Desc('gcc-10', 'g++-10', 'gcc-10', '-flto=auto')
    312                         break
    313                         case 'gcc-11':
    314                                 this.Compiler = new CC_Desc('gcc-11', 'g++-11', 'gcc-11', '-flto=auto')
    315                         break
    316                         case 'gcc-12':
    317                                 this.Compiler = new CC_Desc('gcc-12', 'g++-12', 'gcc-12', '-flto=auto')
    318                         break
    319316                        case 'clang':
    320317                                this.Compiler = new CC_Desc('clang', 'clang++-10', 'gcc-10', '-flto=thin -flto-jobs=0')
     
    328325                                this.Architecture = new Arch_Desc('x64', '--host=x86_64', 'x64')
    329326                        break
    330                         //case 'x86':
    331                         //      this.Architecture = new Arch_Desc('x86', '--host=i386', 'x86')
    332                         //break
    333                         // case 'arm64':
    334                         //      this.Architecture = new Arch_Desc('arm64', '--host=aarch64', 'arm64')
    335                         // break
     327                        case 'x86':
     328                                this.Architecture = new Arch_Desc('x86', '--host=i386', 'x86')
     329                        break
     330                        case 'arm64':
     331                                this.Architecture = new Arch_Desc('arm64', '--host=aarch64', 'arm64')
     332                        break
    336333                        default :
    337334                                error "Unhandled architecture : ${arch}"
     
    377374                                        description: 'Which compiler to use',                   \
    378375                                        name: 'Compiler',                                       \
    379                                         choices: 'gcc-7\ngcc-8\ngcc-9\ngcc-10\ngcc-11\ngcc-12\nclang',  \
     376                                        choices: 'gcc-11\ngcc-10\ngcc-9\ngcc-8\ngcc-7\nclang',  \
    380377                                        defaultValue: 'gcc-9',                                  \
    381378                                ],                                                              \
     
    413410                        ],
    414411                ]])
    415                                         // choices: 'gcc-4.9\ngcc-5\ngcc-6\ngcc-7\ngcc-8\ngcc-9\ngcc-10\ngcc-11\nclang',
     412                                        // choices: 'gcc-11\ngcc-10\ngcc-9\ngcc-8\ngcc-7\ngcc-6\ngcc-5\ngcc-4.9\nclang',
    416413                                        // defaultValue: 'gcc-8',
    417414
  • doc/LaTeXmacros/common.sty

    rbd72f517 r7d02d35  
    1111%% Created On       : Sat Apr  9 10:06:17 2016
    1212%% Last Modified By : Peter A. Buhr
    13 %% Last Modified On : Mon May  5 21:37:13 2025
    14 %% Update Count     : 666
     13%% Last Modified On : Wed Mar 19 21:22:28 2025
     14%% Update Count     : 664
    1515%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    16 
    17 % This latex idiom for checking empty optional parameters
    18 %    \ifx#1\@empty\else\if\relax\detokenize{#1}\relax
    19 % first checks if there is no optional parameter specified: \name{...} versus \name[]{...}
    20 % second checks if the optional parameter is specified but empty: \name[]{...} versus \name[...]{...}
    2116
    2217\setlength{\textheight}{9in}
     
    147142\newcommand{\Index}{\@ifstar\@sIndex\@Index}
    148143% inline text and as-in index: \Index[as-is index text]{inline text}
    149 \newcommand{\@Index}[2][\@empty]{\lowercase{\def\temp{#2}}#2\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\index{\temp}\else\index{#1@{\protect#2}}\fi\fi}
     144\newcommand{\@Index}[2][\@empty]{\lowercase{\def\temp{#2}}#2\ifx#1\@empty\index{\temp}\else\index{#1@{\protect#2}}\fi}
    150145% inline text but index with different as-is text: \Index[index text]{inline text}
    151 \newcommand{\@sIndex}[2][\@empty]{#2\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\index{#2}\else\index{#1@{\protect#2}}\fi\fi}
     146\newcommand{\@sIndex}[2][\@empty]{#2\ifx#1\@empty\index{#2}\else\index{#1@{\protect#2}}\fi}
    152147
    153148% inline text and code index (cannot use ©)
     
    161156\newcommand{\newtermFontInline}{\emph}
    162157\newcommand{\newterm}{\protect\@ifstar\@snewterm\@newterm}
    163 \newcommand{\@snewterm}[2][\@empty]{{\newtermFontInline{#2}}\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\index{#2}\else\index{#1@{\protect#2}}\fi\fi}
    164 \newcommand{\@newterm}[2][\@empty]{\lowercase{\def\temp{#2}}{\newtermFontInline{#2}}\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\index{\temp}\else\index{#1@{\protect#2}}\fi\fi}
     158\newcommand{\@snewterm}[2][\@empty]{{\newtermFontInline{#2}}\ifx#1\@empty\index{#2}\else\index{#1@{\protect#2}}\fi}
     159\newcommand{\@newterm}[2][\@empty]{\lowercase{\def\temp{#2}}{\newtermFontInline{#2}}\ifx#1\@empty\index{\temp}\else\index{#1@{\protect#2}}\fi}
    165160
    166161% \snake{<identifier>}
     
    259254\renewcommand{\reftextfaraway}[1]{\unskip, p.~\pageref{#1}}
    260255\renewcommand{\reftextpagerange}[2]{\unskip, pp.~\pageref{#1}--\pageref{#2}}
    261 \newcommand{\VRef}[2][Section]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else{#1}\nobreakspace\fi\fi\vref{#2}}
    262 \newcommand{\VRefrange}[3][Sections]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else{#1}\nobreakspace\fi\fi\vrefrange{#2}{#3}}
    263 \newcommand{\VPageref}[2][page]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else{#1}\nobreakspace\fi\fi\pageref{#2}}
    264 \newcommand{\VPagerefrange}[3][pages]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else{#1}\nobreakspace\fi\fi\pageref{#2}{#3}}
     256\newcommand{\VRef}[2][Section]{\ifx#1\@empty\else{#1}\nobreakspace\fi\vref{#2}}
     257\newcommand{\VRefrange}[3][Sections]{\ifx#1\@empty\else{#1}\nobreakspace\fi\vrefrange{#2}{#3}}
     258\newcommand{\VPageref}[2][page]{\ifx#1\@empty\else{#1}\nobreakspace\fi\pageref{#2}}
     259\newcommand{\VPagerefrange}[3][pages]{\ifx#1\@empty\else{#1}\nobreakspace\fi\pageref{#2}{#3}}
    265260
    266261\let\Oldthebibliography\thebibliography
     
    287282\setlength{\columnposn}{\gcolumnposn}
    288283\newcommand{\setgcolumn}[1]{\global\gcolumnposn=#1\global\columnposn=\gcolumnposn}
    289 \newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstCommentStyle{#2}}}
    290 \newcommand{\CD}[2][\@empty]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstBasicStyle{#2}}}
     284\newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstCommentStyle{#2}}}
     285\newcommand{\CD}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstBasicStyle{#2}}}
    291286\newcommand{\CRT}{\global\columnposn=\gcolumnposn}
    292287
  • doc/LaTeXmacros/common.tex

    rbd72f517 r7d02d35  
    1111%% Created On       : Sat Apr  9 10:06:17 2016
    1212%% Last Modified By : Peter A. Buhr
    13 %% Last Modified On : Mon May  5 21:34:53 2025
    14 %% Update Count     : 709
     13%% Last Modified On : Wed Mar 19 07:37:17 2025
     14%% Update Count     : 688
    1515%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    16 
    17 % This latex idiom for checking empty optional parameters
    18 %    \ifx#1\@empty\else\if\relax\detokenize{#1}\relax
    19 % first checks if there is no optional parameter specified: \name{...} versus \name[]{...}
    20 % second checks if the optional parameter is specified but empty: \name[]{...} versus \name[...]{...}
    2116
    2217\setlength{\textheight}{9in}
     
    148143\newcommand{\Index}{\@ifstar\@sIndex\@Index}
    149144% inline text and as-in index: \Index[as-is index text]{inline text}
    150 \newcommand{\@Index}[2][\@empty]{\lowercase{\def\temp{#2}}#2\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\index{\temp}\else\index{#1@{\protect#2}}\fi\fi}
     145\newcommand{\@Index}[2][\@empty]{\lowercase{\def\temp{#2}}#2\ifx#1\@empty\index{\temp}\else\index{#1@{\protect#2}}\fi}
    151146% inline text but index with different as-is text: \Index[index text]{inline text}
    152 \newcommand{\@sIndex}[2][\@empty]{#2\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\index{#2}\else\index{#1@{\protect#2}}\fi\fi}
     147\newcommand{\@sIndex}[2][\@empty]{#2\ifx#1\@empty\index{#2}\else\index{#1@{\protect#2}}\fi}
    153148
    154149% inline text and code index (cannot use ©)
     
    162157\newcommand{\newtermFontInline}{\emph}
    163158\newcommand{\newterm}{\protect\@ifstar\@snewterm\@newterm}
    164 \newcommand{\@snewterm}[2][\@empty]{{\newtermFontInline{#2}}\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\index{#2}\else\index{#1@{\protect#2}}\fi\fi}
    165 \newcommand{\@newterm}[2][\@empty]{\lowercase{\def\temp{#2}}{\newtermFontInline{#2}}\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\index{\temp}\else\index{#1@{\protect#2}}\fi\fi}
     159\newcommand{\@snewterm}[2][\@empty]{{\newtermFontInline{#2}}\ifx#1\@empty\index{#2}\else\index{#1@{\protect#2}}\fi}
     160\newcommand{\@newterm}[2][\@empty]{\lowercase{\def\temp{#2}}{\newtermFontInline{#2}}\ifx#1\@empty\index{\temp}\else\index{#1@{\protect#2}}\fi}
    166161
    167162% \snake{<identifier>}
     
    261256\renewcommand{\reftextfaraway}[1]{\unskip, p.~\pageref{#1}}
    262257\renewcommand{\reftextpagerange}[2]{\unskip, pp.~\pageref{#1}--\pageref{#2}}
    263 \newcommand{\VRef}[2][Section]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else{#1}\nobreakspace\fi\fi\vref{#2}}
    264 \newcommand{\VRefrange}[3][Sections]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else{#1}\nobreakspace\fi\fi\vrefrange{#2}{#3}}
    265 \newcommand{\VPageref}[2][page]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else{#1}\nobreakspace\fi\fi\pageref{#2}}
    266 \newcommand{\VPagerefrange}[3][pages]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else{#1}\nobreakspace\fi\fi\pageref{#2}{#3}}
     258\newcommand{\VRef}[2][Section]{\ifx#1\@empty\else{#1}\nobreakspace\fi\vref{#2}}
     259\newcommand{\VRefrange}[3][Sections]{\ifx#1\@empty\else{#1}\nobreakspace\fi\vrefrange{#2}{#3}}
     260\newcommand{\VPageref}[2][page]{\ifx#1\@empty\else{#1}\nobreakspace\fi\pageref{#2}}
     261\newcommand{\VPagerefrange}[3][pages]{\ifx#1\@empty\else{#1}\nobreakspace\fi\pageref{#2}{#3}}
    267262
    268263\let\Oldthebibliography\thebibliography
     
    290285\setlength{\columnposn}{\gcolumnposn}
    291286\newcommand{\setgcolumn}[1]{\global\gcolumnposn=#1\global\columnposn=\gcolumnposn}
    292 \newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstCommentStyle{#2}}}
    293 \newcommand{\CD}[2][\@empty]{\ifx#1\@empty\else\if\relax\detokenize{#1}\relax\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstBasicStyle{#2}}}
     287\newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstCommentStyle{#2}}}
     288\newcommand{\CD}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\LstBasicStyle{#2}}}
    294289\newcommand{\CRT}{\global\columnposn=\gcolumnposn}
    295290
  • doc/bibliography/pl.bib

    rbd72f517 r7d02d35  
    52145214% M
    52155215
    5216 @misc{M4,
    5217     keywords    = {macros, preprocessor},
    5218     contributer = {pabuhr@plg},
    5219     author      = {Brian W. Kernighan and Dennis M. Ritchie},
    5220     title       = {The M4 Macro Processor},
    5221     year        = 1977,
    5222     howpublished= {\url{https://wolfram.schneider.org/bsd/7thEdManVol2/m4/m4.pdf}},
    5223     optnote     = {Accessed: 2016-09},
    5224 }
    5225 
    52265216@book{M68K,
    52275217    keywords    = {M680XX, Motorola},
     
    90919081}
    90929082
    9093 
    9094 @inproceedings{valgind,
    9095     keywords    = {Memcheck, Valgrind, dynamic binary analysis, dynamic binary instrumentation, shadow values},
    9096     contributer = {pabuhr@plg},
    9097     author      = {Nethercote, Nicholas and Seward, Julian},
    9098     title       = {{V}algrind: a framework for heavyweight dynamic binary instrumentation},
    9099     publisher   = {Association for Computing Machinery},
    9100     address     = {New York, NY, USA},
    9101     booktitle   = {Proceedings of the 28th ACM SIGPLAN Conference on Programming Language Design and Implementation},
    9102     pages       = {89-100},
    9103     location    = {San Diego, California, USA},
    9104     series      = {PLDI'07}
    9105     year        = {2007},
    9106 }
    9107 
    91089083@misc{Vala,
    91099084    keywords    = {GObject, Vala},
  • doc/theses/fangren_yu_MMath/background.tex

    rbd72f517 r7d02d35  
    2121Furthermore, Cyclone's polymorphic functions and types are restricted to abstraction over types with the same layout and calling convention as @void *@, \ie only pointer types and @int@.
    2222In \CFA terms, all Cyclone polymorphism must be dtype-static.
    23 While the Cyclone design provides the efficiency benefits discussed in~\VRef{s:GenericImplementation} for dtype-static polymorphism, it is more restrictive than \CFA's general model.
     23While the Cyclone design provides the efficiency benefits discussed in Section~\ref{sec:generic-apps} for dtype-static polymorphism, it is more restrictive than \CFA's general model.
    2424Smith and Volpano~\cite{Smith98} present Polymorphic C, an ML dialect with polymorphic functions, C-like syntax, and pointer types;
    2525it lacks many of C's features, most notably structure types, and hence, is not a practical C replacement.
  • doc/theses/fangren_yu_MMath/features.tex

    rbd72f517 r7d02d35  
    1313Here, manipulating the pointer address is the primary operation, while dereferencing the pointer to its value is the secondary operation.
    1414For example, \emph{within} a data structure, \eg stack or queue, all operations involve pointer addresses and the pointer may never be dereferenced because the referenced object is opaque.
    15 Alternatively, use a reference when its primary purpose is to alias a value, \eg a function parameter that does not copy the argument, for performance reasons.
     15Alternatively, use a reference when its primary purpose is to alias a value, \eg a function parameter that does not copy the argument (performance reason).
    1616Here, manipulating the value is the primary operation, while changing the pointer address is the secondary operation.
    1717Succinctly, if the address changes often, use a pointer;
     
    2323\CFA adopts a uniform policy between pointers and references where mutability is a separate property made at the declaration.
    2424
    25 The following examples show how pointers and references are treated uniformly in \CFA.
     25The following examples shows how pointers and references are treated uniformly in \CFA.
    2626\begin{cfa}[numbers=left,numberblanklines=false]
    2727int x = 1, y = 2, z = 3;$\label{p:refexamples}$
     
    3636@&@r3 = @&@y; @&&@r3 = @&&@r4;                          $\C{// change r1, r2}$
    3737\end{cfa}
    38 Like pointers, references can be cascaded, \ie a reference to a reference, \eg @&& r2@.\footnote{
     38Like pointers, reference can be cascaded, \ie a reference to a reference, \eg @&& r2@.\footnote{
    3939\CC uses \lstinline{&&} for rvalue reference, a feature for move semantics and handling the \lstinline{const} Hell problem.}
    4040Usage of a reference variable automatically performs the same number of dereferences as the number of references in its declaration, \eg @r2@ becomes @**r2@.
     
    6464The call applies an implicit dereference once to @x@ so the call is typed @f( int & )@ with @T = int@, rather than with @T = int &@.
    6565
    66 As with a pointer type, a reference type may have qualifiers, where @const@ is most common.
     66As for a pointer type, a reference type may have qualifiers, where @const@ is most common.
    6767\begin{cfa}
    6868int x = 3; $\C{// mutable}$
     
    101101Interestingly, C does not give a warning/error if a @const@ pointer is not initialized, while \CC does.
    102102Hence, type @& const@ is similar to a \CC reference, but \CFA does not preclude initialization with a non-variable address.
    103 For example, in systems programming, there are cases where an immutable address is initialized to a specific memory location.
     103For example, in system's programming, there are cases where an immutable address is initialized to a specific memory location.
    104104\begin{cfa}
    105105int & const mem_map = *0xe45bbc67@p@; $\C{// hardware mapped registers ('p' for pointer)}$
     
    122122\end{cfa}
    123123the call to @foo@ must pass @x@ by value, implying auto-dereference, while the call to @bar@ must pass @x@ by reference, implying no auto-dereference.
    124 
    125 \PAB{My analysis shows} without any restrictions, this ambiguity limits the behaviour of reference types in \CFA polymorphic functions, where a type @T@ can bind to a reference or non-reference type.
     124Without any restrictions, this ambiguity limits the behaviour of reference types in \CFA polymorphic functions, where a type @T@ can bind to a reference or non-reference type.
    126125This ambiguity prevents the type system treating reference types the same way as other types, even if type variables could be bound to reference types.
    127126The reason is that \CFA uses a common \emph{object trait}\label{p:objecttrait} (constructor, destructor and assignment operators) to handle passing dynamic concrete type arguments into polymorphic functions, and the reference types are handled differently in these contexts so they do not satisfy this common interface.
     
    158157\end{cfa}
    159158While it is possible to write a reference type as the argument to a generic type, it is disallowed in assertion checking, if the generic type requires the object trait \see{\VPageref{p:objecttrait}} for the type argument, a fairly common use case.
    160 Even if the object trait can be made optional, the current compiler implementation often misbehaves by adding undesirable auto-dereference on the referenced-to value rather than the reference variable itself, as intended.
     159Even if the object trait can be made optional, the current type system often misbehaves by adding undesirable auto-dereference on the referenced-to value rather than the reference variable itself, as intended.
    161160Some tweaks are necessary to accommodate reference types in polymorphic contexts and it is unclear what can or cannot be achieved.
    162161Currently, there are contexts where the \CFA programmer is forced to use a pointer type, giving up the benefits of auto-dereference operations and better syntax with reference types.
     
    189188@[x, y, z]@ = foo( 3, 4 );  // return 3 values into a tuple
    190189\end{cfa}
    191 Along with making returning multiple values a first-class feature, tuples were extended to simplify a number of other common contexts that normally require multiple statements and/or additional declarations.
     190Along with making returning multiple values a first-class feature, tuples were extended to simplify a number of other common context that normally require multiple statements and/or additional declarations, all of which reduces coding time and errors.
    192191\begin{cfa}
    193192[x, y, z] = 3; $\C[2in]{// x = 3; y = 3; z = 3, where types may be different}$
     
    206205Only when returning a tuple from a function is there the notion of a tuple value.
    207206
    208 Overloading in the \CFA type-system must support complex composition of tuples and C type conversions using a conversion cost scheme giving lower cost to widening conversions that do not truncate a value.
     207Overloading in the \CFA type-system must support complex composition of tuples and C type conversions using a costing scheme giving lower cost to widening conversions that do not truncate a value.
    209208\begin{cfa}
    210209[ int, int ] foo$\(_1\)$( int );                        $\C{// overloaded foo functions}$
     
    214213\end{cfa}
    215214The type resolver only has the tuple return types to resolve the call to @bar@ as the @foo@ parameters are identical.
    216 The resulution involves unifying the flattened @foo@ return values with @bar@'s parameter list.
     215The resultion involves unifying the flattened @foo@ return values with @bar@'s parameter list.
    217216However, no combination of @foo@s is an exact match with @bar@'s parameters;
    218217thus, the resolver applies C conversions to obtain a best match.
     
    224223bar( foo( 3 ) ) // only one tuple returning call
    225224\end{lstlisting}
    226 Hence, programmers cannot take advantage of the full power of tuples but type match is straightforward.
     225Hence, programers cannot take advantage of the full power of tuples but type match is straightforward.
    227226
    228227K-W C also supported tuple variables, but with a strong distinction between tuples and tuple values/variables.
     
    287286\end{figure}
    288287
    289 \PAB{I identified} the primary issues for tuples in the \CFA type system are polymorphism and conversions.
     288The primary issues for tuples in the \CFA type system are polymorphism and conversions.
    290289Specifically, does it make sense to have a generic (polymorphic) tuple type, as is possible for a structure?
    291290\begin{cfa}
     
    304303\section{Tuple Implementation}
    305304
    306 As noted, traditional languages manipulate multiple values by in/out parameters and/or structures.
     305As noted, tradition languages manipulate multiple values by in/out parameters and/or structures.
    307306K-W C adopted the structure for tuple values or variables, and as needed, the fields are extracted by field access operations.
    308307As well, for the tuple-assignment implementation, the left-hand tuple expression is expanded into assignments of each component, creating temporary variables to avoid unexpected side effects.
     
    357356\end{figure}
    358357
    359 Interestingly, in the third implementation of \CFA tuples by Robert Schluntz~\cite[\S~3]{Schluntz17}, the MVR functions revert back to structure based, and this remains in the current version of \CFA.
     358Interestingly, in the third implementation of \CFA tuples by Robert Schluntz~\cite[\S~3]{Schluntz17}, the MVR functions revert back to structure based, where it remains in the current version of \CFA.
    360359The reason for the reversion is a uniform approach for tuple values/variables making tuples first-class types in \CFA, \ie allow tuples with corresponding tuple variables.
    361360This reversion was possible, because in parallel with Schluntz's work, generic types were added independently by Moss~\cite{Moss19}, and the tuple variables leveraged the same implementation techniques as for generic variables~\cite[\S~3.7]{Schluntz17}.
     
    385384Scala, like \CC, provides tuple types through a library using this structural expansion, \eg Scala provides tuple sizes 1 through 22 via hand-coded generic data-structures.
    386385
    387 However, after experience gained building the \CFA runtime system, \PAB{I convinced them} making tuple-types first-class seems to add little benefit.
     386However, after experience gained building the \CFA runtime system, making tuple-types first-class seems to add little benefit.
    388387The main reason is that tuples usages are largely unstructured,
    389388\begin{cfa}
     
    513512looping is used to traverse the argument pack from left to right.
    514513The @va_list@ interface is walking up the stack (by address) looking at the arguments pushed by the caller.
    515 (Compiler-specific ABI knowledge is needed for arguments pushed using registers.)
     514(Magic knowledge is needed for arguments pushed using registers.)
    516515
    517516\begin{figure}
     
    572571
    573572Currently in \CFA, variadic polymorphic functions are the only place tuple types are used.
    574 \PAB{My analysis showed} many wrapper functions are generated to implement both user-defined generic-types and polymorphism with variadics, because \CFA compiles polymorphic functions versus template expansion.
     573And because \CFA compiles polymorphic functions versus template expansion, many wrapper functions are generated to implement both user-defined generic-types and polymorphism with variadics.
    575574Fortunately, the only permitted operations on polymorphic function parameters are given by the list of assertion (trait) functions.
    576575Nevertheless, this small set of functions eventually needs to be called with flattened tuple arguments.
    577576Unfortunately, packing the variadic arguments into a rigid @struct@ type and generating all the required wrapper functions is significant work and largely wasted because most are never called.
    578577Interested readers can refer to pages 77-80 of Robert Schluntz's thesis to see how verbose the translator output is to implement a simple variadic call with 3 arguments.
    579 As the number of arguments increases, \eg a call with 5 arguments, the translator generates concrete @struct@ types for a 4-tuple and a 3-tuple along with all the polymorphic type data for them.
     578As the number of arguments increases, \eg a call with 5 arguments, the translator generates a concrete @struct@ types for a 4-tuple and a 3-tuple along with all the polymorphic type data for them.
    580579An alternative approach is to put the variadic arguments into an array, along with an offset array to retrieve each individual argument.
    581580This method is similar to how the C @va_list@ object is used (and how \CFA accesses polymorphic fields in a generic type), but the \CFA variadics generate the required type information to guarantee type safety (like the @printf@ format string).
     
    684683
    685684Nested \emph{named} aggregates are allowed in C but there is no qualification operator, like the \CC type operator `@::@', to access an inner type.
    686 To compensate for the missing type operator, all named nested aggregates are hoisted to global scope, regardless of the nesting depth, and type usages within the nested type are replaced with global type name.
     685\emph{To compensate for the missing type operator, all named nested aggregates are hoisted to global scope, regardless of the nesting depth, and type usages within the nested type are replaced with global type name.}
    687686Hoisting nested types can result in name collisions among types at the global level, which defeats the purpose of nesting the type.
    688687\VRef[Figure]{f:NestedNamedAggregate} shows the nested type @T@ is hoisted to the global scope and the declaration rewrites within structure @S@.
     
    730729\end{figure}
    731730
    732 \CC chose to change this semantics:
     731For good reasons, \CC chose to change this semantics:
    733732\begin{cquote}
    734733\begin{description}[leftmargin=*,topsep=0pt,itemsep=0pt,parsep=0pt]
     
    770769Like an anonymous nested type, a named Plan-9 nested type has its field names hoisted into @struct S@, so there is direct access, \eg @s.x@ and @s.i@.
    771770Hence, the field names must be unique, unlike \CC nested types, but the type names are at a nested scope level, unlike type nesting in C.
    772 In addition, a pointer to a structure is automatically converted to a pointer to an anonymous field for assignments and function calls, providing containment inheritance with implicit subtyping, \ie @U@ $<:$ @S@ and @W@ $<:$ @S@, \eg:
     771In addition, a pointer to a structure is automatically converted to a pointer to an anonymous field for assignments and function calls, providing containment inheritance with implicit subtyping, \ie @U@ $\subset$ @S@ and @W@ $\subset$ @S@, \eg:
    773772\begin{cfa}
    774773void f( union U * u );
     
    782781Note, there is no value assignment, such as, @w = s@, to copy the @W@ field from @S@.
    783782
    784 Unfortunately, the Plan-9 designers did not look ahead to other useful features, specifically nested types.
     783Unfortunately, the Plan-9 designers did not lookahead to other useful features, specifically nested types.
    785784This nested type compiles in \CC and \CFA.
    786785\begin{cfa}
     
    809808In addition, a semi-non-compatible change is made so that Plan-9 syntax means a forward declaration in a nested type.
    810809Since the Plan-9 extension is not part of C and rarely used, this change has minimal impact.
    811 Hence, all Plan-9 semantics are denoted by the @inline@ qualifier, which clearly indicates the usage of Plan-9 definitions.
     810Hence, all Plan-9 semantics are denoted by the @inline@ qualifier, which is good ``eye-candy'' when reading a structure definition to spot Plan-9 definitions.
    812811Finally, the following code shows the value and pointer polymorphism.
    813812\begin{cfa}
     
    822821
    823822In general, non-standard C features (@gcc@) do not need any special treatment, as they are directly passed through to the C compiler.
    824 However, \PAB{I found} the Plan-9 semantics allow implicit conversions from the outer type to the inner type, which means the \CFA type resolver must take this information into account.
     823However, the Plan-9 semantics allow implicit conversions from the outer type to the inner type, which means the \CFA type resolver must take this information into account.
    825824Therefore, the \CFA resolver must implement the Plan-9 features and insert necessary type conversions into the translated code output.
    826825In the current version of \CFA, this is the only kind of implicit type conversion other than the standard C arithmetic conversions.
     
    848847\end{c++}
    849848and again the expression @d.x@ is ambiguous.
    850 While \CC has no direct syntax to disambiguate @x@, \eg @d.B.x@ or @d.C.x@, it is possible with casts, @((B)d).x@ or @((C)d).x@.
     849While \CC has no direct syntax to disambiguate @x@, \ie @d.B.x@ or @d.C.x@, it is possible with casts, @((B)d).x@ or @((C)d).x@.
    851850Like \CC, \CFA compiles the Plan-9 version and provides direct qualification and casts to disambiguate @x@.
    852 While ambiguous definitions are allowed, duplicate field names are poor practice and should be avoided if possible.
     851While ambiguous definitions are allowed, duplicate field names is poor practice and should be avoided if possible.
    853852However, when a programmer does not control all code, this problem can occur and a naming workaround must exist.
  • doc/theses/fangren_yu_MMath/future.tex

    rbd72f517 r7d02d35  
    44The following are feature requests related to type-system enhancements that have surfaced during the development of the \CFA language and library, but have not been implemented yet.
    55Currently, developers must work around these missing features, sometimes resulting in inefficiency.
    6 \PAB{The following sections discuss new features I am proposing to fix these problems.}
    76
    87
    98\section{Closed Trait Types}
    109
    11 Currently, \CFA does not have any closed types, as open types are the basis of its unique type-system, allowing new functions to be added at any time to override existing ones for trait satisfaction.
     10Currently, \CFA does not have any closed types, as open type are the basis of its unique type-system, allowing new functions to be added at any time to override existing ones for trait satisfaction.
    1211Locally-declared nested-functions,\footnote{
    1312Nested functions are not a feature in C but supported by \lstinline{gcc} for multiple decades and are used heavily in \CFA.}
     
    1817Library implementers normally do not want users to override certain operations and cause the behaviour of polymorphic invocations to change.
    1918\item
    20 Caching and reusing resolution results in the compiler is affected, as newly introduced declarations can participate in assertion resolution;
     19Caching and reusing resolution results in the compiler is effected, as newly introduced declarations can participate in assertion resolution;
    2120as a result, previously invalid subexpressions suddenly become valid, or alternatively cause ambiguity in assertions.
    2221\end{enumerate}
     
    7170\end{figure}
    7271
    73 A \CFA closed trait type is planned to be working similarly to a Haskell type class that requires an explicit instance declaration.
     72A \CFA closed trait type is similar to a Haskell type class requiring an explicit instance declaration.
    7473The syntax for the closed trait might look like:
    7574\begin{cfa}
     
    9291
    9392\section{Associated Types}
    94 \label{s:AssociatedTypes}
    9593
    96 The analysis presented in \VRef{s:AssertionSatisfaction} shows if all type parameters have to be bound before assertion resolution, the complexity of resolving assertions becomes much lower as every assertion parameter can be resolved independently.
     94The analysis presented in \VRef{s:AssertionSatisfaction} shows if all type parameters have to be bound before assertion resolution, the complexity of resolving assertions become much lower as every assertion parameter can be resolved independently.
    9795That is, by utilizing information from higher up the expression tree for return value overloading, most of the type bindings can be resolved.
    9896However, there are scenarios where some intermediate types need to be involved in certain operations, which are neither input nor output types.
     
    154152Note that the type @list *@ satisfies both @pointer_like( list *, int )@ and @pointer_like( list *,@ @list )@ (the latter by the built-in pointer dereference operator) and the expression @*it@ can be either a @struct list@ or an @int@.
    155153Requiring associated types to be unique makes the @pointer_like@ trait not applicable to @list *@, which is undesirable.
    156 I have not attempted to implement associated types in the \CFA compiler, but based on the above discussions, one option is to make associated type resolution and return type overloading coexist:
     154I have not attempted to implement associated types in \CFA compiler, but based on the above discussions, one option is to make associated type resolution and return type overloading coexist:
    157155when the associated type appears in returns, it is deduced from the context and then verify the trait with ordinary assertion resolution;
    158156when it does not appear in the returns, the type is required to be uniquely determined by the expression that defines the associated type.
     
    161159\section{User-defined Conversions}
    162160
    163 A missing type-system feature in \CFA is a scheme for user-defined conversions.
     161Missing type-system feature is a scheme for user-defined conversions.
    164162Conversion means one type goes through an arbitrary complex process of changing its value to some meaningful value in another type.
    165163Because the conversion process can be arbitrarily complex, it requires the power of a function.
  • doc/theses/fangren_yu_MMath/intro.tex

    rbd72f517 r7d02d35  
    3030
    3131\section{Overloading}
    32 \label{s:Overloading}
    33 
    34 \vspace*{-5pt}
     32
    3533\begin{quote}
    3634There are only two hard things in Computer Science: cache invalidation and \emph{naming things}. --- Phil Karlton
    3735\end{quote}
    38 \vspace*{-5pt}
    3936Overloading allows programmers to use the most meaningful names without fear of name clashes within a program or from external sources, like include files.
    40 Experience from \CC and \CFA developers shows the type system can implicitly and correctly disambiguate the majority of overloaded names, \ie it is rare to get an incorrect selection or ambiguity, even among hundreds of overloaded (variables and) functions.
     37Experience from \CC and \CFA developers shows the type system can implicitly and correctly disambiguates the majority of overloaded names, \ie it is rare to get an incorrect selection or ambiguity, even among hundreds of overloaded (variables and) functions.
    4138In many cases, a programmer is unaware of name clashes, as they are silently resolved, simplifying the development process.
    4239
    4340Disambiguating among overloads is implemented by examining each call site and selecting the best matching overloaded function based on criteria like the types and number of arguments and the return context.
    44 Since the hardware does not support mixed-mode operands, such as @2 + 3.5@, the type system must disallow it or (safely) convert the operands to a common type.
     41Since the hardware does not support mixed-mode operands, @2 + 3.5@, the type system must disallow it or (safely) convert the operands to a common type.
    4542Like overloading, the majority of mixed-mode conversions are silently resolved, simplifying the development process.
    4643This approach matches with programmer intuition and expectation, regardless of any \emph{safety} issues resulting from converted values.
     
    5249As well, many namespace systems provide a mechanism to open their scope returning to normal overloading, \ie no qualification.
    5350While namespace mechanisms are very important and provide a number of crucial program-development features, protection from overloading is overstated.
    54 Similarly, lexical nesting is another place where duplicate naming issues arise.
     51Similarly, lexical nesting is another place where overloading occurs.
    5552For example, in object-oriented programming, class member names \newterm{shadow} names within members.
    56 Some programmers qualify all member names with @class::@ or @this->@ to make them unique from names defined in members.
    57 Even nested lexical blocks result in shadowing, \eg multiple nested loop-indices called @i@, silently changing the meaning of @i@ at lower scope levels.
    58 Again, coding styles exist requiring all variables in nested block to be unique to prevent name shadowing problems.
     53Some programmers, qualify all member names with @class::@ or @this->@ to make them unique from names defined in members.
     54Even nested lexical blocks result in shadowing, \eg multiple nested loop-indices called @i@.
     55Again, coding styles exist requiring all variables in nested block to be unique to prevent name shadowing.
    5956Depending on the language, these possible ambiguities can be reported (as warnings or errors) and resolved explicitly using some form of qualification and/or cast.
    60 For example, if variables can be overloaded, shadowed variables of different type can produce ambiguities, indicating potential problems in lower scopes.
    61 
    62 Formally, overloading is defined by Strachey as one kind of \newterm{ad hoc polymorphism}:
    63 \vspace*{-5pt}
     57
     58Formally, overloading is defined by Strachey as \newterm{ad hoc polymorphism}:
    6459\begin{quote}
    6560In ad hoc polymorphism there is no single systematic way of determining the type of the result from the type of the arguments.
     
    6863It seems, moreover, that the automatic insertion of transfer functions by the compiling system is limited to this.~\cite[p.~37]{Strachey00}
    6964\end{quote}
    70 \vspace*{-5pt}
    7165where a \newterm{transfer function} is an implicit conversion to help find a matching overload:
    72 \vspace*{-5pt}
    7366\begin{quote}
    7467The problem of dealing with polymorphic operators is complicated by the fact that the range of types sometimes overlap.
     
    7669The functions which perform this operation are known as transfer functions and may either be used explicitly by the programmer, or, in some systems, inserted automatically by the compiling system.~\cite[p.~35]{Strachey00}
    7770\end{quote}
    78 \vspace*{-5pt}
    7971The differentiating characteristic between parametric polymorphism and overloading is often stated as: polymorphic functions use one algorithm to operate on arguments of many different types, whereas overloaded functions use a different algorithm for each type of argument.
    8072A similar differentiation is applicable for overloading and default parameters.
     
    136128\end{cfa}
    137129the overloaded names @S@ and @E@ are separated into the type and object domain, and C uses the type kinds @struct@ and @enum@ to disambiguate the names.
    138 In general, types are not overloaded because inferencing them is difficult to imagine in a statically typed programming language.
     130In general, types are not overloaded because inferencing them is difficult to imagine in a statically programming language.
    139131\begin{cquote}
    140132\setlength{\tabcolsep}{26pt}
     
    189181\noindent
    190182\newterm{General overloading} occurs when the type-system \emph{knows} a function's parameters and return types (or a variable's type for variable overloading).
    191 In functional programming-languages, there is always a return type.
     183In functional programming-languages, there is always a return type (except for a monad).
    192184If a return type is specified, the compiler does not have to inference the function body.
    193185For example, the compiler has complete knowledge about builtin types and their overloaded arithmetic operators.
     
    222214Hence, parametric overloading requires additional information about the universal types to make them useful.
    223215
    224 This additional information often comes as a set of operations that must be supply for a type, \eg \CFA/Rust/Go have traits, \CC template has concepts, Haskell has type-classes.
    225 These operations can then be used in the body of the function to manipulate the type's value.
    226 Here, a type binding to @T@ must have available a @++@ operation with the specified signature.
    227 \begin{cfa}
    228 forall( T | @T ?++( T, T )@ ) // trait
    229 T inc( T t ) { return t@++@; } // change type value
     216This additional information often comes as a set of operations a type must supply (@trait@/-@concept@) and these operations can then be used in the body of the function.
     217\begin{cfa}
     218forall( T | T ?@++@( T, T ) ) T inc( T t ) { return t@++@; }
    230219int i = 3
    231220i = inc( i )
     
    233222\end{cfa}
    234223Given a qualifying trait, are its elements inferred or declared?
    235 In the example, the type system infers @int@ for @T@, infers it needs an appropriately typed @++@ operator, and finds it in the enclosing environment, possibly in the language's prelude defining basic types and their operations.
     224In the above example, the type system infers @int@ for @T@, infers it needs a @++@ operator that takes an @int@ and returns an @int@, and finds this function in the enclosing environment (\eg standard prelude).
    236225This implicit inferencing is expensive if matched with implicit conversions when there is no exact match.
    237226Alternatively, types opt-in to traits via declarations.
     
    431420\subsection{Operator Overloading}
    432421
    433 Many programming languages provide general overloading of the arithmetic operators~\cite{OperOverloading} across the basic computational types using the number and type of parameters and returns.
     422Virtually all programming languages provide general overloading of the arithmetic operators across the basic computational types using the number and type of parameters and returns.
    434423However, in some languages, arithmetic operators may not be first class, and hence, cannot be overloaded.
    435424Like \CC, \CFA allows general operator overloading for user-defined types.
     
    449438\subsection{Function Overloading}
    450439
    451 Many programming languages provide general overloading for functions~\cite{FuncOverloading}, as long as their prototypes differ in the number and type of parameters.
    452 A few programming languages also use the return type for selecting overloaded functions \see{below}.
     440Both \CFA and \CC allow general overloading for functions, as long as their prototypes differ in the number and type of parameters and returns.
    453441\begin{cfa}
    454442void f( void );                 $\C[2in]{// (1): no parameter}$
     
    457445f( 'A' );                               $\C{// select (2)}\CRT$
    458446\end{cfa}
    459 The type system examines each call site and first looks for an exact match and then a best match using conversions.
     447The type system examines each call size and first looks for an exact match and then a best match using conversions.
    460448
    461449Ada, Scala, and \CFA type-systems also use the return type in resolving a call, to pinpoint the best overloaded name.
    462 Essentially, the return types are \emph{reversed curried} into output parameters of the function.
     450Essentailly, the return types are \emph{reversed curried} into output parameters of the function.
    463451For example, in many programming languages with overloading, the following functions are ambiguous without using the return type.
    464452\begin{cfa}
     
    490478\begin{cfa}
    491479void foo( double d );
    492 int v;                                  $\C[2in]{// (1)}$
     480int v;                              $\C[2in]{// (1)}$
    493481double v;                               $\C{// (2) variable overloading}$
    494482foo( v );                               $\C{// select (2)}$
     
    499487}
    500488\end{cfa}
    501 It is interesting that shadowing \see{namespace pollution in \VRef{s:Overloading}} is considered a normal programming-language feature with only slight software-engineering problems.
     489It is interesting that shadow overloading is considered a normal programming-language feature with only slight software-engineering problems.
    502490However, variable overloading within a scope is often considered dangerous, without any evidence to corroborate this claim.
    503491In contrast, function overloading in \CC occurs silently within the global scope from @#include@ files all the time without problems.
     
    566554The following covers these issues, and why this scheme is not amenable with the \CFA type system.
    567555
    568 One of the first and most powerful type-inferencing systems is Hindley--Milner~\cite{Damas82}.
     556One of the first and powerful type-inferencing system is Hindley--Milner~\cite{Damas82}.
    569557Here, the type resolver starts with the types of the program constants used for initialization and these constant types flow throughout the program, setting all variable and expression types.
    570558\begin{cfa}
     
    591579Note, return-type inferencing goes in the opposite direction to Hindley--Milner: knowing the type of the result and flowing back through an expression to help select the best possible overloads, and possibly converting the constants for a best match.
    592580
    593 There are multiple ways to indirectly specify a variable's type, \eg from a prior variable or expression.
     581In simpler type-inferencing systems, such as C/\CC/\CFA, there are more specific usages.
    594582\begin{cquote}
    595583\setlength{\tabcolsep}{10pt}
     
    618606\end{tabular}
    619607\end{cquote}
    620 Here, @type(expr)@ computes the same type as @auto@ righ-hand expression.
    621 The advantages are:
     608The two important capabilities are:
    622609\begin{itemize}[topsep=0pt]
    623610\item
     
    629616This issue is exaggerated with \CC templates, where type names are 100s of characters long, resulting in unreadable error messages.
    630617\item
    631 Ensuring the type of secondary variables match a primary variable.
     618Ensuring the type of secondary variables, match a primary variable.
    632619\begin{cfa}
    633620int x; $\C{// primary variable}$
     
    638625\end{itemize}
    639626Note, the use of @typeof@ is more restrictive, and possibly safer, than general type-inferencing.
    640 \begin{cquote}
    641 \setlength{\tabcolsep}{20pt}
    642 \begin{tabular}{@{}ll@{}}
    643627\begin{cfa}
    644628int x;
     
    646630type(x) z = ... // complex expression
    647631\end{cfa}
    648 &
    649 \begin{cfa}
    650 int x;
    651 auto y = ... // complex expression
    652 auto z = ... // complex expression
    653 \end{cfa}
    654 \end{tabular}
    655 \end{cquote}
    656 On the left, the types of @y@ and @z@ are fixed (branded), whereas on the right, the types of @y@ and @z@ can fluctuate.
     632Here, the types of @y@ and @z@ are fixed (branded), whereas with type inferencing, the types of @y@ and @z@ are potentially unknown.
    657633
    658634
    659635\subsection{Type-Inferencing Issues}
    660636
    661 Each kind of type-inferencing system has its own set of issues that affect the programmer in the form of convenience, restrictions, or confusions.
     637Each kind of type-inferencing system has its own set of issues that flow onto the programmer in the form of convenience, restrictions, or confusions.
    662638
    663639A convenience is having the compiler use its overarching program knowledge to select the best type for each variable based on some notion of \emph{best}, which simplifies the programming experience.
     
    667643For example, if a change is made in an initialization expression, it can cascade type changes producing many other changes and/or errors.
    668644At some point, a variable's type needs to remain constant and the initializing expression needs to be modified or be in error when it changes.
    669 Often type-inferencing systems allow restricting (\newterm{branding}) a variable or function type, so the compiler can report a mismatch with the constant initialization.
     645Often type-inferencing systems allow restricting (\newterm{branding}) a variable or function type, so the complier can report a mismatch with the constant initialization.
    670646\begin{cfa}
    671647void f( @int@ x, @int@ y ) {  // brand function prototype
     
    681657As a result, understanding and changing the code becomes almost impossible.
    682658Types provide important clues as to the behaviour of the code, and correspondingly to correctly change or add new code.
    683 In these cases, a programmer is forced to re-engineer types, which is fragile, or rely on an IDE that can re-engineer types for them.
     659In these cases, a programmer is forced to re-engineer types, which is fragile, or rely on a fancy IDE that can re-engineer types for them.
    684660For example, given:
    685661\begin{cfa}
     
    694670In this situation, having the type name or its short alias is essential.
    695671
    696 \CFA's type system tries to prevent type-resolution mistakes by relying heavily on the type of the left-hand side of assignment to pinpoint correct types within an expression.
     672\CFA's type system tries to prevent type-resolution mistakes by relying heavily on the type of the left-hand side of assignment to pinpoint the right types within an expression.
    697673Type inferencing defeats this goal because there is no left-hand type.
    698 Fundamentally, type inferencing tries to remove explicit typing from programming.
    699 However, writing down types is an important aspect of good programming, as it provides a check of the programmer's expected type and the actual type.
    700 Thinking carefully about types is similar to thinking carefully about date structures, often resulting in better performance and safety.
    701 Similarly, thinking carefully about storage management in unmanaged languages is an important aspect of good programming, versus implicit storage management (garbage collection) in managed language.\footnote{
    702 There are full-time Java consultants, who are hired to find memory-management problems in large Java programs, \eg Monika Beckworth.}
    703 Given @typedef@ and @typeof@, and the strong desire to use the left-hand type in resolution, no attempt has been made in \CFA to support implicit type-inferencing.
     674Fundamentally, type inferencing tries to magic away variable types from the programmer.
     675However, this results in lazy programming with the potential for poor performance and safety concerns.
     676Types are as important as control-flow in writing a good program, and should not be masked, even if it requires the programmer to think!
     677A similar issue is garbage collection, where storage management is magicked away, often resulting in poor program design and performance.\footnote{
     678There are full-time Java consultants, who are hired to find memory-management problems in large Java programs.}
     679The entire area of Computer-Science data-structures is obsessed with time and space, and that obsession should continue into regular programming.
     680Understanding space and time issues is an essential part of the programming craft.
     681Given @typedef@ and @typeof@ in \CFA, and the strong desire to use the left-hand type in resolution, the decision was made not to support implicit type-inferencing in the type system.
    704682Should a significant need arise, this decision can be revisited.
    705683
     
    724702int i, * ip = identity( &i );
    725703\end{cfa}
    726 Unlike \CC template functions, \CFA polymorphic functions are compatible with \emph{separate compilation}, preventing compilation and code bloat.
     704Unlike \CC template functions, \CFA polymorphic functions are compatible with C \emph{separate compilation}, preventing compilation and code bloat.
    727705
    728706To constrain polymorphic types, \CFA uses \newterm{type assertions}~\cite[pp.~37-44]{Alphard} to provide further type information, where type assertions may be variable or function declarations that depend on a polymorphic type variable.
     
    732710int val = twice( twice( 3 ) );  $\C{// val == 12}$
    733711\end{cfa}
    734 The closest approximation to parametric polymorphism and assertions in C is type-unsafe (@void *@) functions, like @qsort@ for sorting an array of unknown values.
     712Parametric polymorphism and assertions occur in existing type-unsafe (@void *@) C functions, like @qsort@ for sorting an array of unknown values.
    735713\begin{cfa}
    736714void qsort( void * base, size_t nmemb, size_t size, int (*cmp)( const void *, const void * ) );
     
    783761The @sized@ assertion passes size and alignment as a data object has no implicit assertions.
    784762Both assertions are used in @malloc@ via @sizeof@ and @_Alignof@.
    785 In practice, this polymorphic @malloc@ is unwrapped by the C compiler and the @if@ statement is elided producing a type-safe call to @malloc@ or @memalign@.
     763In practise, this polymorphic @malloc@ is unwrapped by the C compiler and the @if@ statement is elided producing a type-safe call to @malloc@ or @memalign@.
    786764
    787765This mechanism is used to construct type-safe wrapper-libraries condensing hundreds of existing C functions into tens of \CFA overloaded functions.
     
    809787forall( T @| sumable( T )@ )   // use trait
    810788T sum( T a[$\,$], size_t size ) {
    811         @T@ total = 0;            // initialize by 0 constructor
     789        @T@ total = 0;          // initialize by 0 constructor
    812790        for ( i; size )
    813                 total @+=@ a[i];        // select appropriate +
     791                total @+=@ a[i];    // select appropriate +
    814792        return total;
    815793}
     
    817795\end{tabular}
    818796\end{cquote}
    819 Traits are implemented by flattening them at use points, as if written in full by the programmer.
     797Traits are implemented by flatten them at use points, as if written in full by the programmer.
    820798Flattening often results in overlapping assertions, \eg operator @+@.
    821799Hence, trait names play no part in type equivalence.
     
    843821Write bespoke data structures for each context.
    844822While this approach is flexible and supports integration with the C type checker and tooling, it is tedious and error prone, especially for more complex data structures.
    845 
    846823\item
    847824Use @void *@-based polymorphism, \eg the C standard library functions @bsearch@ and @qsort@, which allow for the reuse of code with common functionality.
    848825However, this approach eliminates the type checker's ability to ensure argument types are properly matched, often requiring a number of extra function parameters, pointer indirection, and dynamic allocation that is otherwise unnecessary.
    849 
    850826\item
    851 Use an internal macro capability, like \CC @templates@, to generate code that is both generic and type checked, but errors may be difficult to interpret.
    852 Furthermore, writing complex template macros is difficult and complex.
    853 
    854 \item
    855 Use an external macro capability, like M4~\cite{M4}, to generate code that is generic code, but errors may be difficult to interpret.
    856 Like internal macros, writing and using external macros is equally difficult and complex.
     827Use preprocessor macros, similar to \CC @templates@, to generate code that is both generic and type checked, but errors may be difficult to interpret.
     828Furthermore, writing and using complex preprocessor macros is difficult and inflexible.
    857829\end{enumerate}
    858830
     
    885857\end{tabular}
    886858\end{cquote}
    887 \label{s:GenericImplementation}
    888859\CFA generic types are \newterm{fixed} or \newterm{dynamic} sized.
    889860Fixed-size types have a fixed memory layout regardless of type parameters, whereas dynamic types vary in memory layout depending on the type parameters.
     
    912883For software-engineering reasons, the set assertions would be refactored into a trait to allow alternative implementations, like a Java \lstinline[language=java]{interface}.
    913884
    914 In summation, the \CFA type system inherits \newterm{nominal typing} for concrete types from C;
    915 however, without inheritance in \CFA, nominal typing cannot be extended to polymorphic subtyping.
    916 Instead, \CFA adds \newterm{structural typing} and uses it to generate polymorphism.
    917 Here, traits are like interfaces in Java or abstract base-classes in \CC, but without the nominal inheritance relationships.
    918 Instead, each polymorphic function or generic type defines the structural requirements needed for its execution, which is fulfilled at each call site from the lexical environment, like Go~\cite{Go} or Rust~\cite{Rust} interfaces.
    919 Hence, lexical scopes and nested functions are used extensively to mimic subtypes, as in the @qsort@ example, without managing a nominal inheritance hierarchy.
     885In summation, the \CFA type system inherits \newterm{nominal typing} for concrete types from C, and adds \newterm{structural typing} for polymorphic types.
     886Traits are used like interfaces in Java or abstract base-classes in \CC, but without the nominal inheritance relationships.
     887Instead, each polymorphic function or generic type defines the structural type needed for its execution, which is fulfilled at each call site from the lexical environment, like Go~\cite{Go} or Rust~\cite{Rust} interfaces.
     888Hence, new lexical scopes and nested functions are used extensively to create local subtypes, as in the @qsort@ example, without having to manage a nominal inheritance hierarchy.
    920889
    921890
     
    933902general\footnote{overloadable entities: V $\Rightarrow$ variable, O $\Rightarrow$ operator, F $\Rightarrow$ function, M $\Rightarrow$ member}
    934903                                                & O\footnote{except assignment}/F       & O/F/M & V/O/F & M\footnote{not universal}     & O/M   & O/F/M & no    & no    \\
    935 general constraints\footnote{T $\Rightarrow$ parameter type, \# $\Rightarrow$ parameter count, N $\Rightarrow$ parameter name; R $\Rightarrow$ return type}
     904general constraints\footnote{T $\Rightarrow$ parameter type, \# $\Rightarrow$ parameter number, N $\Rightarrow$ parameter name; R $\Rightarrow$ return type}
    936905                                                & T/\#//R\footnote{parameter names can be used to disambiguate among overloads but not create overloads}
    937906                                                                        & T/\#  & T/\#/R        & T/\#  & T/\#/N/R      & T/\#/N/R      & T/\#/N        & T/R \\
     
    1030999
    10311000However, the parameter operations are severely restricted because universal types have few operations.
    1032 For example, Swift provides a @print@ operation for its universal type, and the Java @Object@ class provides general methods: @toString@, @hashCode@, @equals@, @finalize@, \etc.
     1001For example, swift provides a @print@ operation for its universal type, and the java @Object@ class provides general methods: @toString@, @hashCode@, @equals@, @finalize@, \etc.
    10331002This restricted mechanism still supports a few useful functions, where the parameters are abstract entities, \eg:
    10341003\begin{swift}
     
    10401009\end{swift}
    10411010To make a universal function useable, an abstract description is needed for the operations used on the parameters within the function body.
    1042 Type matching these operations can be done by using techniques like \CC template expansion, or explicit stating, \eg interfaces, subtyping (inheritance), assertions (traits), type classes, type bounds.
     1011Type matching these operations can occur by discover using techniques like \CC template expansion, or explicit stating, \eg interfaces, subtyping (inheritance), assertions (traits), type classes, type bounds.
    10431012The mechanism chosen can affect separate compilation or require runtime type information (RTTI).
    10441013\begin{description}
     
    10611030
    10621031\begin{figure}
    1063 \setlength{\tabcolsep}{12pt}
     1032\setlength{\tabcolsep}{15pt}
    10641033\begin{tabular}{@{}ll@{}}
    10651034\multicolumn{1}{c}{\textbf{\CFA}} & \multicolumn{1}{c}{\textbf{Haskell}} \\
     
    10671036forall( T ) trait sumable {
    10681037        void ?{}( T &, zero_t );
    1069         T ?+=?( T &, T );  };
     1038        T ?+=?( T &, T );
     1039};
    10701040forall( T | sumable( T ) )
    10711041T sum( T a[], size_t size ) {
    10721042        T total = 0;
    10731043        for ( i; size ) total += a[i];
    1074         return total;  }
     1044        return total;
     1045}
    10751046struct S { int i, j; };
    10761047void ?{}( S & s, zero_t ) { s.[i, j] = 0; }
     
    10781049void ?{}( S & s, int i, int j ) { s.[i, j] = [i, j]; }
    10791050S ?+=?( S & l, S r ) { l.[i, j] += r.[i, j]; }
    1080 int main() {            // trait inferencing
    1081         sout | sum( (int []){ 1, 2, 3 }, 3 );
    1082         sout | sum( (double []){ 1.5, 2.5, 3.5 }, 3 );
    1083         sout | sum( (S []){ {1,1}, {2,2}, {3,3} }, 3 ).[i, j];  }
    1084 
    1085 
    1086 
     1051
     1052int main() {
     1053        int ia[] = { 1, 2, 3 };
     1054        sout | sum( ia, 3 );        // trait inference
     1055        double da[] = { 1.5, 2.5, 3.5 };
     1056        sout | sum( da, 3 );        // trait inference
     1057        S sa[] = { {1, 1}, {2, 2}, {3, 3 } };
     1058        sout | sum( sa, 3 ).[i, j]; // trait inference
     1059}
    10871060\end{cfa}
    10881061&
     
    10911064        szero :: a
    10921065        sadd :: a -> a -> a
     1066
    10931067ssum ::  Sumable a $=>$ [a] -> a
    10941068ssum (x:xs) = sadd x (ssum xs)
    10951069ssum [] = szero
     1070
     1071
     1072
    10961073data S = S Int Int deriving Show
    10971074@instance Sumable Int@ where
     
    11001077@instance Sumable Float@ where
    11011078        szero = 0.0
    1102         sadd = (+)
     1079   sadd = (+)
    11031080@instance Sumable S@ where
    11041081        szero = S 0 0
     
    11101087\end{haskell}
    11111088\end{tabular}
    1112 \caption{Implicit/Explicit Trait Inferencing}
    1113 \label{f:ImplicitExplicitTraitInferencing}
     1089\caption{Implicitly/Explicitly Trait Inferencing}
     1090\label{f:ImplicitlyExplicitlyTraitInferencing}
    11141091\end{figure}
    11151092
    11161093One differentiating feature among these specialization techniques is the ability to implicitly or explicitly infer the trait information at a class site.
    1117 \VRef[Figure]{f:ImplicitExplicitTraitInferencing} compares the @sumable@ trait and polymorphic @sum@ function \see{\VRef{s:Traits}} for \CFA and Haskell.
     1094\VRef[Figure]{f:ImplicitlyExplicitlyTraitInferencing} compares the @sumable@ trait and polymorphic @sum@ function \see{\VRef{s:Traits}} for \CFA and Haskell.
    11181095Here, the \CFA type system inferences the trait functions at each call site, so no additional specification is necessary by the programmer.
    11191096The Haskell program requires the programmer to explicitly bind the trait and to each type that can be summed.
     
    11251102\end{ada}
    11261103Finally, there is a belief that certain type systems cannot support general overloading, \eg Haskell.
    1127 As \VRef[Table]{t:OverloadingFeatures} shows, there are multiple languages with both general and parametric overloading, so the decision to not support general overloading is based on design choices made by the language designers not any reason in type theory.
     1104As \VRef[Table]{t:OverloadingFeatures} shows, there are multiple languages with both general and parametric overloading, so the decision to not support general overloading is based on the opinion of the language designers and the type system they choose, not any reason in type theory.
    11281105
    11291106The fourth row classifies if conversions are attempted beyond exact match.
     
    11441121The details of compiler optimization work are covered in a previous technical report~\cite{Yu20}, which essentially forms part of this thesis.
    11451122\item
    1146 This thesis presents a systematic review of the new features added to the \CFA language and its type system.
     1123The thesis presents a systematic review of the new features added to the \CFA language and its type system.
    11471124Some of the more recent inclusions to \CFA, such as tuples and generic structure types, were not well tested during development due to the limitation of compiler performance.
    11481125Several issues coming from the interactions of various language features are identified and discussed in this thesis;
  • doc/theses/fangren_yu_MMath/resolution.tex

    rbd72f517 r7d02d35  
    22\label{c:content2}
    33
    4 Recapping, \CFA's type-system provides expressive polymorphism: variables can be overloaded, functions can be overloaded by argument and return types, tuple types, generic (polymorphic) functions and types (aggregates) can have multiple type parameters with assertion restrictions;
     4Recapping, the \CFA's type-system provides expressive polymorphism: variables can be overloaded, functions can be overloaded by argument and return types, tuple types, generic (polymorphic) functions and types (aggregates) can have multiple type parameters with assertion restrictions;
    55in addition, C's multiple implicit type-conversions must be respected.
    66This generality leads to internal complexity and correspondingly higher compilation cost directly related to type resolution.
     
    2424\end{enumerate}
    2525\VRef[Table]{t:SelectedFileByCompilerBuild} shows improvements for selected tests with accumulated reductions in compile time across each of the 5 fixes.
    26 The large reduction in compilation time significantly improves the development of the \CFA's runtime because of its frequent compilation cycles.
     26To this day, the large reduction in compilation time significantly improves the development of the \CFA's runtime because of its frequent compilation cycles.
    2727
    2828\begin{table}[htb]
     
    5454Some of those problems arise from the newly introduced language features described in the previous chapter.
    5555In addition, fixing unexpected interactions within the type system has presented challenges.
    56 This chapter describes in detail the type-resolution rules currently in use and some major problems \PAB{I} have identified.
     56This chapter describes in detail the type-resolution rules currently in use and some major problems that have been identified.
    5757Not all of those problems have immediate solutions, because fixing them may require redesigning parts of the \CFA type system at a larger scale, which correspondingly affects the language design.
    5858
     
    6969\begin{enumerate}[leftmargin=*]
    7070\item \textbf{Unsafe} cost representing a narrowing conversion of arithmetic types, \eg @int@ to @short@, and qualifier-dropping conversions for pointer and reference types.
    71 Narrowing conversions have the potential to lose (truncate) data.
     71Narrowing conversions have the potential to lose (truncation) data.
    7272A programmer must decide if the computed data-range can safely be shorted in the smaller storage.
    7373Warnings for unsafe conversions are helpful.
     
    8686
    8787\item \textbf{Safe} cost representing a widening conversion \eg @short@ to @int@, qualifier-adding conversions for pointer and reference types, and value conversion for enumeration constants.
    88 When all conversions are safe, closer conversions are ranked better, \eg @short@ to @int@ versus @short@ to @long int@.
     88Even when conversions are safe, the fewest conversions it ranked better, \eg @short@ to @int@ versus @short@ to @long int@.
    8989\begin{cfa}
    9090void f( long int p ); $\C[2.5in]{// 1}$
     
    103103
    104104\item \textbf{Specialization} cost counting the number of restrictions introduced by type assertions.
    105 Fewer restriction means fewer parametric variables passed at the function call giving better performance.
     105Fewer restriction means fews parametric variables passed at the function call giving better performance.
    106106\begin{cfa}
    107107forall( T | { T ?+?( T, T ) } ) void f( T ); $\C[3.25in]{// 1}$
     
    110110\end{cfa}
    111111\end{enumerate}
    112 Cost tuples are compared in lexicographical order, from unsafe (highest) to specialization (lowest), with ties moving to the next lowest item.
     112Cost tuples are compared by lexicographical order, from unsafe (highest) to specialization (lowest), with ties moving to the next lowest item.
    113113At a subexpression level, the lowest cost candidate for each result type is included as a possible interpretation of the expression;
    114114at the top level, all possible interpretations of different types are considered (generating a total ordering) and the overall lowest cost is selected as the final interpretation of the expression.
    115115Glen Ditchfield first proposed this costing model~\cite[\S~4.4.5]{Ditchfield92} to generate a resolution behaviour that is reasonable to C programmers based on existing conversions in the C programming language.
    116116This model carried over into the first implementation of the \CFA type-system by Richard Bilson~\cite[\S~2.2]{Bilson03}, and was extended but not redesigned by Aaron Moss~\cite[chap.~4]{Moss19}.
    117 Moss's work began to show problems with the underlying cost model;
     117Moss's work began to show problems with the underlying costing model;
    118118these design issues are part of this work.
    119119
     
    152152Therefore, at each resolution step, the arguments are already given unique interpretations, so the ordering only needs to compare different sets of conversion targets (function parameter types) on the same set of input.
    153153
    154 \PAB{My conclusion} is that trying to use such a system in \CFA is problematic because of the presence of return-type overloading of functions and variables.
     154In \CFA, trying to use such a system is problematic because of the presence of return-type overloading of functions and variable.
    155155Specifically, \CFA expression resolution considers multiple interpretations of argument subexpressions with different types, \eg:
    156156so it is possible that both the selected function and the set of arguments are different, and cannot be compared with a partial-ordering system.
     
    165165\end{quote}
    166166However, I was unable to generate any Ada example program that demonstrates this preference.
    167 In contrast, the \CFA overload resolution-system is at the other end of the spectrum, as it tries to order all legal interpretations of an expression and chooses the best one according to cost, occasionally giving unexpected results rather than an ambiguity.
     167In contrast, the \CFA overload resolution-system is at the other end of the spectrum, as it tries to order every legal interpretations of an expression and chooses the best one according to cost, occasionally giving unexpected results rather than an ambiguity.
    168168
    169169Interestingly, the \CFA cost-based model can sometimes make expression resolution too permissive because it always attempts to select the lowest cost option, and only when there are multiple options tied at the lowest cost does it report the expression is ambiguous.
     
    171171Other than the case of multiple exact matches, where all have cost zero, incomparable candidates under a partial ordering can often have different expression costs since different kinds of implicit conversions are involved, resulting in seemingly arbitrary overload selections.
    172172
    173 There are currently at least three different situations where the polymorphic cost element of the cost model does not yield a candidate selection that is justifiable, and one of them is clearly wrong.
     173There are currently at least three different situations where the polymorphic cost element of the cost model does not yield a candidate selection that is clearly justifiable, and one of them is straight up wrong.
    174174\begin{enumerate}[leftmargin=*]
    175175\item Polymorphic exact match versus non-polymorphic inexact match.
     
    193193\end{itemize}
    194194In this example, option 1 produces the prototype @void f( int )@, which gives an exact match and therefore takes priority.
    195 The \CC resolution rules effectively make option 2 a specialization that only applies to type @long@ exactly,\footnote{\CC does have explicit template specializations, however they do not participate directly in overload resolution and can sometimes lead to unintuitive results.} while the current \CFA rules make option 2 apply for all integral types ranked lower than @long@ as well.
     195The \CC resolution rules effectively makes option 2 a specialization that only applies to type @long@ exactly,\footnote{\CC does have explicit template specializations, however they do not participate directly in overload resolution and can sometimes lead to unintuitive results.} while the current \CFA rules make option 2 apply for all integral types below @long@.
    196196This difference could be explained as compensating for \CFA polymorphic functions being separately compiled versus template inlining;
    197197hence, calling them requires passing type information and assertions increasing the runtime cost.
     
    211211Although it is true that both the sequence 1, 2 and 1, 3, 4 are increasingly more constrained on the argument types, option 2 is not comparable to either of option 3 or 4;
    212212they actually describe independent constraints on the two arguments.
    213 Specifically, option 2 says the two arguments must have the same type, while option 3 states the second argument must have type @int@.
     213Specifically, option 2 says the two arguments must have the same type, while option 3 states the second argument must have type @int@,
    214214Because two constraints can independently be satisfied, neither should be considered a better match when trying to resolve a call to @f@ with argument types @(int, int)@;
    215215reporting such an expression as ambiguous is more appropriate.
     
    227227Passing a @pair@ variable to @f@
    228228\begin{cfa}
    229 pair(int, double) p;
     229pair p;
    230230f( p );
    231231\end{cfa}
    232232gives a cost of 1 poly, 2 variable for the @pair@ overload, versus a cost of 1 poly, 1 variable for the unconstrained overload.
    233233Programmer expectation is to select option 1 because of the exact match, but the cost model selects 2;
    234 it is not possible to write a specialization for @f@ that works on any pair type and gets selected by the type resolver as intended.
     234while either could work, the type system should select a call that meets expectation of say the call is ambiguous, forcing the programmer to mediate.
    235235As a result, simply counting the number of polymorphic type variables is no longer correct to order the function candidates as being more constrained.
    236236\end{enumerate}
    237237
    238 These inconsistencies are not easily solvable in the current cost-model, meaning that currently the \CFA codebase has to workaround these defects.
     238These inconsistencies are not easily solvable in the current cost-model, meaning the currently \CFA codebase has to workaround these defects.
    239239One potential solution is to mix the conversion cost and \CC-like partial ordering of specializations.
    240240For example, observe that the first three elements (unsafe, polymorphic and safe conversions) in the \CFA cost-tuple are related to the argument/parameter types, while the other two elements (polymorphic variable and assertion counts) are properties of the function declaration.
     
    352352Here, the unsafe cost of signed to unsigned is factored into the ranking, so the safe conversion is selected over an unsafe one.
    353353Furthermore, an integral option is taken before considering a floating option.
    354 This model locally matches the C approach, but provides an ordering when there are many overload alternatives.
     354This model locally matches the C approach, but provides an ordering when there are many overloaded alternative.
    355355However, as Moss pointed out overload resolution by total cost has problems, \eg handling cast expressions.
    356356\begin{cquote}
     
    379379if an expression has any legal interpretations as a C builtin operation, only the lowest cost one is kept, regardless of the result type.
    380380
    381 \VRef[Figure]{f:CFAArithmeticConversions} shows \PAB{my} alternative \CFA partial-order arithmetic-conversions graphically.
     381\VRef[Figure]{f:CFAArithmeticConversions} shows an alternative \CFA partial-order arithmetic-conversions graphically.
    382382The idea here is to first look for the best integral alternative because integral calculations are exact and cheap.
    383383If no integral solution is found, than there are different rules to select among floating-point alternatives.
     
    387387\section{Type Unification}
    388388
    389 Type unification is the algorithm that assigns values to each (free) type parameter such that the types of the provided arguments and function parameters match.
     389Type unification is the algorithm that assigns values to each (free) type parameters such that the types of the provided arguments and function parameters match.
    390390
    391391\CFA does not attempt to do any type \textit{inference} \see{\VRef{s:IntoTypeInferencing}}: it has no anonymous functions (\ie lambdas, commonly found in functional programming and also used in \CC and Java), and the variable types must all be explicitly defined (no auto typing).
     
    408408With the introduction of generic record types, the parameters must match exactly as well; currently there are no covariance or contravariance supported for the generics.
    409409
    410 \PAB{I made} one simplification to the \CFA language that makes modelling the type system easier: polymorphic function pointer types are no longer allowed.
     410One simplification was made to the \CFA language that makes modelling the type system easier: polymorphic function pointer types are no longer allowed.
    411411The polymorphic function declarations themselves are still treated as function pointer types internally, however the change means that formal parameter types can no longer be polymorphic.
    412412Previously it was possible to write function prototypes such as
     
    418418A function operates on the call-site arguments together with any local and global variables.
    419419When the function is polymorphic, the types are inferred at each call site.
    420 On each invocation, the types to be operated on are determined from the arguments provided, and therefore, there is no need to pass a polymorphic function pointer, which can take any type in principle.
     420On each invocation, the types to be operate on are determined from the arguments provided, and therefore, there is no need to pass a polymorphic function pointer, which can take any type in principle.
    421421For example, consider a polymorphic function that takes one argument of type @T@ and polymorphic function pointer.
    422422\begin{cfa}
     
    441441The assertion set that needs to be resolved is just the declarations on the function prototype, which also simplifies the assertion satisfaction algorithm, which is discussed further in the next section.
    442442
    443 \PAB{My} implementation sketch stores type unification results in a type-environment data-structure, which represents all the type variables currently in scope as equivalent classes, together with their bound types and information such as whether the bound type is allowed to be opaque (\ie a forward declaration without definition in scope) and whether the bounds are allowed to be widened.
     443An implementation sketch stores type unification results in a type-environment data-structure, which represents all the type variables currently in scope as equivalent classes, together with their bound types and information such as whether the bound type is allowed to be opaque (\ie a forward declaration without definition in scope) and whether the bounds are allowed to be widened.
    444444In the general approach commonly used in functional languages, the unification variables are given a lower bound and an upper bound to account for covariance and contravariance of types.
    445445\CFA does not implement any variance with its generic types and does not allow polymorphic function types, therefore no explicit upper bound is needed and one binding value for each equivalence class suffices.
     
    469469
    470470
    471 In previous versions of \CFA, this number was set at 4; as the compiler has become more optimized and capable of handling more complex expressions in a reasonable amount of time, I have increased the limit to 8 and it has not led to problems.
     471In previous versions of \CFA, this number was set at 4; as the compiler becomes more optimized and capable of handling more complex expressions in a reasonable amount of time, I have increased the limit to 8 and it does not lead to problems.
    472472Only rarely is there a case where the infinite recursion produces an exponentially growing assertion set, causing minutes of time wasted before the limit is reached.
    473473Fortunately, it is very hard to generate this situation with realistic \CFA code, and the ones that have occurred have clear characteristics, which can be prevented by alternative approaches.
     
    475475One example is analysed in this section.
    476476
    477 \PAB{My analysis shows that} while the assertion satisfaction problem in isolation looks like just another expression to resolve, its recursive nature makes some techniques for expression resolution no longer possible.
     477While the assertion satisfaction problem in isolation looks like just another expression to resolve, its recursive nature makes some techniques for expression resolution no longer possible.
    478478The most significant impact is that type unification has a side effect, namely editing the type environment (equivalence classes and bindings), which means if one expression has multiple associated assertions it is dependent, as the changes to the type environment must be compatible for all the assertions to be resolved.
    479479Particularly, if one assertion parameter can be resolved in multiple different ways, all of the results need to be checked to make sure the change to type variable bindings are compatible with other assertions to be resolved.
     
    481481In many cases, these problems can be avoided by examining other assertions that provide insight on the desired type binding: if one assertion parameter can only be matched by a unique option, the type bindings can be updated confidently without the need for backtracking.
    482482
    483 The Moss algorithm currently used in \CFA was developed using a simplified type system that captures most of \CFA's type system features.
     483The Moss algorithm currently used in \CFA was developed using a simplified type-simulator that capture most of \CFA type-system features.
    484484The simulation results were then ported back to the actual language.
    485485The simulator used a mix of breadth- and depth-first search in a staged approach.
     
    494494If any new assertions are introduced by the selected candidates, the algorithm is applied recursively, until there are none pending resolution or the recursion limit is reached, which results in a failure.
    495495
    496 However, \PAB{I identify that} in practice the efficiency of this algorithm can be sensitive to the order of resolving assertions.
     496However, in practice the efficiency of this algorithm can be sensitive to the order of resolving assertions.
    497497Suppose an unbound type variable @T@ appears in two assertions:
    498498\begin{cfa}
     
    517517A type variable introduced by the @forall@ clause of function declaration can appear in parameter types, return types and assertion variables.
    518518If it appears in parameter types, it can be bound when matching the arguments to parameters at the call site.
    519 If it only appears in the return type, it can be eventually determined from the call-site context.
     519If it only appears in the return type, it can be eventually be determined from the call-site context.
    520520Currently, type resolution cannot do enough return-type inferencing while performing eager assertion resolution: the return type information is unknown before the parent expression is resolved, unless the expression is an initialization context where the variable type is known.
    521521By delaying the assertion resolution until the return type becomes known, this problem can be circumvented.
    522 The truly problematic case occurs if a type variable does not appear in either of the parameter or return types and only appears in assertions or variables (\newterm{associate types}).
     522The truly problematic case occurs if a type variable does not appear in either of the parameter or return types and only appears in assertions or variables (associate types).
    523523\begin{cfa}
    524524forall( T | { void foo( @T@ ) } ) int f( float ) {
     
    526526}
    527527\end{cfa}
    528 This case is rare so forcing every type variable to appear at least once in parameter or return types does not limit the expressiveness of \CFA type system to a significant extent.
    529 \VRef{s:AssociatedTypes} presents a proposal for including type declarations in traits rather than having all type variables appear in the trait parameter list, which provides equivalent functionality to an unbound type parameter in assertion variables, and also addresses some of the variable cost issue discussed in \VRef{s:ExpressionCostModel}.
     528This case is rare so forcing every type variable to appear at least once in parameter or return types limits does not limit the expressiveness of \CFA type system to a significant extent.
     529The next section presents a proposal for including type declarations in traits rather than having all type variables appear in the trait parameter list, which is provides equivalent functionality to an unbound type parameter in assertion variables, and also addresses some of the variable cost issue discussed in \VRef{s:ExpressionCostModel}.
    530530
    531531
     
    535535Based on the experiment results, this approach can improve the performance of expression resolution in general, and sometimes allow difficult instances of assertion resolution problems to be solved that are otherwise infeasible, \eg when the resolution encounters an infinite loop.
    536536
    537 \PAB{I identify that} the tricky problem in implementing this approach is that the resolution algorithm has side effects, namely modifying the type bindings in the environment.
     537The tricky problem in implementing this approach is that the resolution algorithm has side effects, namely modifying the type bindings in the environment.
    538538If the modifications are cached, \ie the results that cause the type bindings to be modified, it is also necessary to store the changes to type bindings, too.
    539539Furthermore, in cases where multiple candidates can be used to satisfy one assertion parameter, all of them must be cached including those that are not eventually selected, since the side effect can produce different results depending on the context.
     
    583583However, the implementation of the type environment is simplified;
    584584it only stores a tentative type binding with a flag indicating whether \emph{widening} is possible for an equivalence class of type variables.
    585 Formally speaking, \PAB{I concluded} the type environment used in \CFA is only capable of representing \emph{lower-bound} constraints.
     585Formally speaking, this means the type environment used in \CFA is only capable of representing \emph{lower-bound} constraints.
    586586This simplification works most of the time, given the following properties of the existing \CFA type system and the resolution algorithms:
    587587\begin{enumerate}
     
    599599\end{enumerate}
    600600
    601 \CFA does attempt to incorporate upstream type information propagated from a variable declaration with initializer, since the type of the variable being initialized is known.
     601\CFA does attempt to incorporate upstream type information propagated from variable a declaration with initializer, since the type of the variable being initialized is known.
    602602However, the current type-environment representation is flawed in handling such type inferencing, when the return type in the initializer is polymorphic.
    603603Currently, an inefficient workaround is performed to create the necessary effect.
  • doc/theses/fangren_yu_MMath/uw-ethesis.bib

    rbd72f517 r7d02d35  
    22% For use with BibTeX
    33
    4 @misc{OperOverloading,
    5     contributer = {pabuhr@plg},
    6     key         = {Operator Overloading},
    7     title       = {Operator Overloading},
    8     author      = {{WikipediA}},
    9     howpublished= {\url{https://en.wikipedia.org/wiki/Operator_overloading}},
    10     year        = 2025,
    11 }
    12 
    13 @misc{FuncOverloading,
    14     contributer = {pabuhr@plg},
    15     key         = {Function Overloading},
    16     title       = {Function Overloading},
    17     author      = {{WikipediA}},
    18     howpublished= {\url{https://en.wikipedia.org/wiki/Function_overloading}},
    19     year        = 2025,
    20 }
  • doc/theses/fangren_yu_MMath/uw-ethesis.tex

    rbd72f517 r7d02d35  
    100100\lstnewenvironment{ada}[1][]{\lstset{language=Ada,escapechar=\$,moredelim=**[is][\color{red}]{@}{@},}\lstset{#1}}{}
    101101
    102 \newcommand{\PAB}[1]{{\color{magenta}#1}}
     102\newcommand{\PAB}[1]{{\color{red}PAB: #1}}
    103103\newcommand{\newtermFont}{\emph}
    104104\newcommand{\Newterm}[1]{\newtermFont{#1}}
  • doc/theses/mike_brooks_MMath/Makefile

    rbd72f517 r7d02d35  
    1313TeXSRC = ${wildcard *.tex}
    1414PicSRC = ${notdir ${wildcard ${Pictures}/*.png}} ${notdir ${wildcard ${Pictures}/*.fig}}
    15 PicSRC := ${PicSRC:.fig=.pdf}                   # substitute ".fig" with ".pdf"
     15PicSRC := ${PicSRC:.fig=.pdf}           # substitute ".fig" with ".pdf"
     16GraphSRC_OLD = ${notdir ${wildcard ${Pictures}/*.dat}}
     17GraphSRC_OLD := ${GraphSRC_OLD:.dat=.pdf}               # substitute ".dat" with ".pdf"
     18PlotINPUTS = ${wildcard ${Plots}/*.gp} ${wildcard ${Plots}/*.py}
     19PlotINPUTS := ${addsuffix .INPUTS,${PlotINPUTS}}
    1620PlotSRC = ${notdir ${wildcard ${Plots}/*.gp}}
    17 PlotSRC := ${addprefix ${Build}/plot-,${PlotSRC:.gp=.pdf}} # substitute ".gp" with ".pdf"
     21PlotSRC := ${addprefix ${Build}/plot-,${PlotSRC:.gp=.pdf}}              # substitute ".gp" with ".pdf"
    1822DemoPgmSRC = ${notdir ${wildcard ${Programs}/*-demo.cfa}}
    1923PgmSRC = ${notdir ${wildcard ${Programs}/*}}
     
    4549# Rules and Recipes
    4650
    47 .PHONY : all clean                              # not file names
     51.PHONY : all clean                      # not file names
    4852.SECONDARY:
    4953#.PRECIOUS : ${Build}/%                         # don't delete intermediates
     
    5761# File Dependencies
    5862
    59 ${DOCUMENT}: ${TeXSRC} $(RunPgmOut) ${DemoPgmOut} ${PlotSRC} ${PicSRC} ${BibSRC} ${BibRep}/pl.bib ${LaTMac}/common.tex Makefile | ${Build}
     63${DOCUMENT}: ${TeXSRC} $(RunPgmOut) ${DemoPgmOut} ${GraphSRC_OLD} ${PlotSRC} ${PicSRC} ${BibSRC} ${BibRep}/pl.bib ${LaTMac}/common.tex Makefile | ${Build}
    6064        echo ${PicSRC}
    6165        echo ${GraphSRC_OLD}
     
    7882        ${CFA} $< -o $@
    7983
    80 ${Build}/%: ${Programs}/%.run.cfa | ${Build}    # cfa cannot handle pipe
     84${Build}/%: ${Programs}/%.run.cfa | ${Build} # cfa cannot handle pipe
    8185        sed -f ${Programs}/sedcmd $< > ${Build}/tmp.cfa; ${CFA} ${Build}/tmp.cfa -o $@
    8286
     
    9094        $< > $@
    9195
     96string-graph-peq-sharing.pdf: string-graph-peq-sharing.dat plot-peq-sharing.gp | ${Build}
     97        gnuplot plot-peq-sharing.gp
     98
     99string-graph-pta-sharing.pdf: string-graph-pta-sharing.dat plot-pta-sharing.gp | ${Build}
     100        gnuplot plot-pta-sharing.gp
     101
     102string-graph-pbv.pdf: string-graph-pbv.dat plot-pbv.gp | ${Build}
     103        gnuplot plot-pbv.gp
     104
     105string-graph-allocn.pdf: string-graph-allocn.dat plot-allocn.gp | ${Build}
     106        gnuplot plot-allocn.gp
     107
    92108%.pdf: %.fig | ${Build}
    93109        fig2dev -L pdf $< > ${Build}/$@
    94110
    95 -include $(Plots)/*.d
     111-include $(Plots)/string-peq-cppemu.d
    96112
    97113${Build}/plot-%.dat: ${Plots}/%.py ${Plots}/%.py.INPUTS | ${Build}
     114        echo ${PlotINPUTS}     
    98115        python3 $< > $@
    99116
  • doc/theses/mike_brooks_MMath/array.tex

    rbd72f517 r7d02d35  
    1717though using a new style of generic parameter.
    1818\begin{cfa}
    19 @array( float, 99 )@ x;                                 $\C[2.5in]{// x contains 99 floats}$
    20 \end{cfa}
    21 Here, the arguments to the @array@ type are @float@ (element type) and @99@ (dimension).
    22 When this type is used as a function parameter, the type-system requires the argument is a perfect match.
     19@array( float, 99 )@ x;                                 $\C[2.75in]{// x contains 99 floats}$
     20\end{cfa}
     21Here, the arguments to the @array@ type are @float@ (element type) and @99@ (length).
     22When this type is used as a function parameter, the type-system requires that a call's argument is a perfect match.
    2323\begin{cfa}
    2424void f( @array( float, 42 )@ & p ) {}   $\C{// p accepts 42 floats}$
    2525f( x );                                                                 $\C{// statically rejected: type lengths are different, 99 != 42}$
     26
    2627test2.cfa:3:1 error: Invalid application of existing declaration(s) in expression.
    2728Applying untyped:  Name: f ... to:  Name: x
    2829\end{cfa}
    29 Function @f@'s parameter expects an array with dimension 42, but the argument dimension 99 does not match.
    30 
    31 A function can be polymorphic over @array@ arguments using the \CFA @forall@ declaration prefix.
     30Here, the function @f@'s parameter @p@ is declared with length 42.
     31However, the call @f( x )@ is invalid, because @x@'s length is @99@, which does not match @42@.
     32
     33A function declaration can be polymorphic over these @array@ arguments by using the \CFA @forall@ declaration prefix.
    3234\begin{cfa}
    3335forall( T, @[N]@ )
     
    3638}
    3739g( x, 0 );                                                              $\C{// T is float, N is 99, dynamic subscript check succeeds}$
    38 g( x, 1000 );                                                   $\C{// T is float, N is 99, dynamic subscript check fails}$
     40g( x, 1000 );                                                   $\C{// T is float, N is 99, dynamic subscript check fails}\CRT$
     41
    3942Cforall Runtime error: subscript 1000 exceeds dimension range [0,99) $for$ array 0x555555558020.
    4043\end{cfa}
    41 Function @g@ takes an arbitrary type parameter @T@ and an unsigned integer \emph{dimension} @N@.
    42 The dimension represents a to-be-determined number of elements, managed by the type system, where 0 represents an empty array.
    43 The type system implicitly infers @float@ for @T@ and @99@ for @N@.
    44 Furthermore, the runtime subscript @x[0]@ (parameter @i@ being @0@) in @g@ is valid because 0 is in the dimension range $[0,99)$ for argument @x@.
    45 The call @g( x, 1000 )@ is also accepted at compile time.
    46 However, the subscript, @x[1000]@, generates a runtime error, because @1000@ is outside the dimension range $[0,99)$ of argument @x@.
     44Function @g@ takes an arbitrary type parameter @T@ and a \emph{dimension parameter} @N@.
     45A dimension parameter represents a to-be-determined count of elements, managed by the type system.
     46The call @g( x, 0 )@ is valid because @g@ accepts any length of array, where the type system infers @float@ for @T@ and length @99@ for @N@.
     47Inferring values for @T@ and @N@ is implicit.
     48Furthermore, in this case, the runtime subscript @x[0]@ (parameter @i@ being @0@) in @g@ is valid because 0 is in the dimension range $[0,99)$ of argument @x@.
     49However, the call @g( x, 1000 )@ is also accepted through compile time;
     50however, this case's subscript, @x[1000]@, generates an error, because @1000@ is outside the dimension range $[0,99)$ of argument @x@.
    4751In general, the @forall( ..., [N] )@ participates in the user-relevant declaration of the name @N@, which becomes usable in parameter/return declarations and within a function.
    4852The syntactic form is chosen to parallel other @forall@ forms:
    4953\begin{cfa}
    50 forall( @[N]@ ) ...     $\C{// dimension}$
    51 forall( T ) ...         $\C{// value datatype}$
    52 forall( T & ) ...       $\C{// opaque datatype}$
     54forall( @[N]@ ) ...     $\C[1.5in]{// dimension}$
     55forall( T ) ...         $\C{// value datatype (formerly, "otype")}$
     56forall( T & ) ...       $\C{// opaque datatype (formerly, "dtype")}\CRT$
    5357\end{cfa}
    5458% The notation @array(thing, N)@ is a single-dimensional case, giving a generic type instance.
     
    5963\begin{cfa}
    6064forall( [N] )
    61 void f( ... ) {
    62         float x1[@N@];                                          $\C{// C array, no subscript checking}$
    63         array(float, N) x2;                                     $\C{// \CFA array, subscript checking}\CRT$
    64 }
    65 \end{cfa}
    66 Both of the stack declared array variables, @x1@ and @x2@, have 42 elements, each element being a @float@.
    67 The two variables have identical size and layout, with no additional ``bookkeeping'' allocations or headers.
    68 The C array, @x1@, has no subscript checking, while \CFA array, @x2@, does.
     65void declDemo( ... ) {
     66        float x1[N];                                            $\C{// built-in type ("C array")}$
     67        array(float, N) x2;                                     $\C{// type from library}$
     68}
     69\end{cfa}
     70Both of the locally-declared array variables, @x1@ and @x2@, have 42 elements, each element being a @float@.
     71The two variables have identical size and layout; they both encapsulate 42-float stack allocations, with no additional ``bookkeeping'' allocations or headers.
    6972Providing this explicit generic approach requires a significant extension to the \CFA type system to support a full-feature, safe, efficient (space and time) array-type, which forms the foundation for more complex array forms in \CFA.
    70 In all following discussion, ``C array'' means types like @x1@ and ``\CFA array'' means types like @x2@.
    71 
    72 A future goal is to provide the new @array@ features with syntax approaching C's (declaration style of @x1@).
     73In all following discussion, ``C array'' means the types like that of @x@ and ``\CFA array'' means the standard-library @array@ type (instantiations), like the type of @x2@.
     74
     75Admittedly, the @array@ library type for @x2@ is syntactically different from its C counterpart.
     76A future goal (TODO xref) is to provide the new @array@ features with syntax approaching C's (declaration style of @x1@).
    7377Then, the library @array@ type could be removed, giving \CFA a largely uniform array type.
    74 At present, the C-syntax @array@ is only partially supported, so the generic @array@ is used exclusively in the thesis.
     78At present, the C-syntax @array@ is only partially supported, so the generic @array@ is used exclusively in the thesis;
     79feature support and C compatibility are revisited in Section ? TODO.
    7580
    7681My contributions in this chapter are:
    77 \begin{enumerate}[leftmargin=*]
     82\begin{enumerate}
    7883\item A type system enhancement that lets polymorphic functions and generic types be parameterized by a numeric value: @forall( [N] )@.
    79 \item Provide a dimension/subscript-checked array-type in the \CFA standard library, where the array's length is statically managed and dynamically valued.
     84\item Provide a length-checked array-type in the \CFA standard library, where the array's length is statically managed and dynamically valued.
    8085\item Provide argument/parameter passing safety for arrays and subscript safety.
     86\item TODO: general parking...
    8187\item Identify the interesting specific abilities available by the new @array@ type.
    8288\item Where there is a gap concerning this feature's readiness for prime-time, identification of specific workable improvements that are likely to close the gap.
     
    8490
    8591
    86 \begin{comment}
    8792\section{Dependent Typing}
    8893
    89 General dependent typing allows a type system to encode arbitrary predicates, \eg behavioural specifications for functions, which is an anti-goal for my work.
     94General dependent typing allows the type system to encode arbitrary predicates (\eg behavioural specifications for functions),
     95which is an anti-goal for my work.
    9096Firstly, this application is strongly associated with pure functional languages,
    9197where a characterization of the return value (giving it a precise type, generally dependent upon the parameters)
     
    95101Secondly, TODO: bash Rust.
    96102TODO: cite the crap out of these claims.
    97 \end{comment}
    98103
    99104
    100105\section{Features Added}
    101106
    102 This section shows more about using the \CFA array and dimension parameters, demonstrating syntax and semantics by way of motivating examples.
     107This section shows more about using the \CFA array and dimension parameters, demonstrating their syntax and semantics by way of motivating examples.
    103108As stated, the core capability of the new array is tracking all dimensions within the type system, where dynamic dimensions are represented using type variables.
    104109
    105110By declaring type variables at the front of object declarations, an array dimension is lexically referenceable where it is needed.
    106 For example, a declaration can share one length, @N@, among a pair of parameters and return type, meaning the input arrays and return array are the same length.
     111For example, a declaration can share one length, @N@, among a pair of parameters and the return,
     112meaning that it requires both input arrays to be of the same length, and guarantees that the result is of that length as well.
    107113\lstinput{10-17}{hello-array.cfa}
    108114Function @f@ does a pointwise comparison of its two input arrays, checking if each pair of numbers is within half a percent of each other, returning the answers in a newly allocated @bool@ array.
    109 The dynamic allocation of the @ret@ array uses the library @alloc@ function,
     115The dynamic allocation of the @ret@ array, by the library @alloc@ function,
    110116\begin{cfa}
    111117forall( T & | sized(T) )
     
    114120}
    115121\end{cfa}
    116 which captures the parameterized dimension information implicitly within its @sizeof@ determination, and casts the return type.
    117 Note, @alloc@ only sees the whole type for its @T@, @array(bool, N)@, where this type's size is a computation based on @N@.
     122uses the parameterized dimension information implicitly within its @sizeof@ determination, and casts the return type.
     123Note that @alloc@ only sees one whole type for its @T@ (which is @f@'s @array(bool, N)@); this type's size is a computation based on @N@.
    118124This example illustrates how the new @array@ type plugs into existing \CFA behaviour by implementing necessary \emph{sized} assertions needed by other types.
    119125(\emph{sized} implies a concrete \vs abstract type with a runtime-available size, exposed as @sizeof@.)
     
    123129\lstinput{30-43}{hello-array.cfa}
    124130\lstinput{45-48}{hello-array.cfa}
    125 \caption{\lstinline{f} Example}
    126 \label{f:fExample}
     131\caption{\lstinline{f} Harness}
     132\label{f:fHarness}
    127133\end{figure}
    128134
    129 \VRef[Figure]{f:fExample} shows an example using function @f@, illustrating how dynamic values are fed into the @array@ type.
     135\VRef[Figure]{f:fHarness} shows a harness that uses function @f@, illustrating how dynamic values are fed into the @array@ type.
    130136Here, the dimension of arrays @x@, @y@, and @result@ is specified from a command-line value, @dim@, and these arrays are allocated on the stack.
    131137Then the @x@ array is initialized with decreasing values, and the @y@ array with amounts offset by constant @0.005@, giving relative differences within tolerance initially and diverging for later values.
     
    138144
    139145In summary:
    140 \begin{itemize}[leftmargin=*]
     146\begin{itemize}
    141147\item
    142148@[N]@ within a @forall@ declares the type variable @N@ to be a managed length.
    143149\item
    144 @N@ can be used in an expression with type @size_t@ within the function body.
     150@N@ can be used an expression of type @size_t@ within the declared function body.
    145151\item
    146152The value of an @N@-expression is the acquired length, derived from the usage site, \ie generic declaration or function call.
     
    152158\begin{enumerate}[leftmargin=*]
    153159\item
    154 The \CC template @N@ can only be a compile-time value, while the \CFA @N@ may be a runtime value.
     160The \CC template @N@ can only be compile-time value, while the \CFA @N@ may be a runtime value.
     161% agreed, though already said
    155162\item
    156163\CC does not allow a template function to be nested, while \CFA lets its polymorphic functions to be nested.
    157 Hence, \CC precludes a simple form of information hiding.
    158 \item
    159 \label{p:DimensionPassing}
     164% why is this important?
     165\item
    160166The \CC template @N@ must be passed explicitly at the call, unless @N@ has a default value, even when \CC can deduct the type of @T@.
    161167The \CFA @N@ is part of the array type and passed implicitly at the call.
     
    163169% mycode/arrr/thesis-examples/check-peter/cs-cpp.cpp, v2
    164170\item
    165 \CC cannot have an array of references, but can have an array of @const@ pointers.
     171\CC cannot have an array of references, but can have an array of pointers.
    166172\CC has a (mistaken) belief that references are not objects, but pointers are objects.
    167173In the \CC example, the arrays fall back on C arrays, which have a duality with references with respect to automatic dereferencing.
     
    172178% https://stackoverflow.com/questions/922360/why-cant-i-make-a-vector-of-references
    173179\item
    174 \label{p:ArrayCopy}
    175180C/\CC arrays cannot be copied, while \CFA arrays can be copied, making them a first-class object (although array copy is often avoided for efficiency).
    176181% fixed by comparing to std::array
    177182% mycode/arrr/thesis-examples/check-peter/cs-cpp.cpp, v10
    178183\end{enumerate}
    179 The \CC template @array@ type mitigates points \VRef[]{p:DimensionPassing} and \VRef[]{p:ArrayCopy}, but it is also trying to accomplish a similar mechanism to \CFA @array@.
     184TODO: settle Mike's concerns with this comparison (perhaps, remove)
    180185
    181186\begin{figure}
     
    221226Just as the first example in \VRef[Section]{s:ArrayIntro} shows a compile-time rejection of a length mismatch,
    222227so are length mismatches stopped when they involve dimension parameters.
    223 While \VRef[Figure]{f:fExample} shows successfully calling a function @f@ expecting two arrays of the same length,
     228While \VRef[Figure]{f:fHarness} shows successfully calling a function @f@ expecting two arrays of the same length,
    224229\begin{cfa}
    225230array( bool, N ) & f( array( float, N ) &, array( float, N ) & );
     
    241246The same argument safety and the associated implicit communication of array length occurs.
    242247Preexisting \CFA allowed aggregate types to be generalized with type parameters, enabling parameterizing of element types.
    243 This has been extended to allow parameterizing by dimension.
    244 Doing so gives a refinement of C's ``flexible array member''~\cite[\S~6.7.2.1.18]{C11}.
    245 \begin{cfa}
    246 struct S {
    247         ...
    248         double d []; // incomplete array type => flexible array member
    249 } * s = malloc( sizeof( struct S ) + sizeof( double [10] ) );
    250 \end{cfa}
    251 which creates a VLA of size 10 @double@s at the end of the structure.
    252 A C flexible array member can only occur at the end of a structure;
    253 \CFA allows length-parameterized array members to be nested at arbitrary locations, with intervening member declarations.
     248Now, \CFA also allows parameterizing them by length.
     249Doing so gives a refinement of C's ``flexible array member'' pattern[TODO: cite ARM 6.7.2.1 pp18]\cite{arr:gnu-flex-mbr}.
     250While a C flexible array member can only occur at the end of the enclosing structure,
     251\CFA allows length-parameterized array members to be nested at arbitrary locations.
     252This flexibility, in turn, allows for multiple array members.
    254253\lstinput{10-15}{hello-accordion.cfa}
    255254The structure has course- and student-level metatdata (their respective field names) and a position-based preferences' matrix.
    256255Its layout has the starting offset of @studentIds@ varying according to the generic parameter @C@, and the offset of @preferences@ varying according to both generic parameters.
    257256
    258 \VRef[Figure]{f:checkExample} shows a program main using @School@ and results with different array sizes.
     257\VRef[Figure]{f:checkHarness} shows a program main using @School@ and results with different array sizes.
    259258The @school@ variable holds many students' course-preference forms.
    260259It is on the stack and its initialization does not use any casting or size arithmetic.
     
    286285\end{cquote}
    287286
    288 \caption{\lstinline{School} Example, Input and Output}
    289 \label{f:checkExample}
     287\caption{\lstinline{School} harness, input and output}
     288\label{f:checkHarness}
    290289\end{figure}
    291290
    292291When a function operates on a @School@ structure, the type system handles its memory layout transparently.
    293292\lstinput{30-37}{hello-accordion.cfa}
    294 In the example, function @getPref@ returns, for the student at position @is@, what is the position of their @pref@\textsuperscript{th}-favoured class?
     293In the example, this @getPref@ function answers, for the student at position @is@, what is the position of its @pref@\textsuperscript{th}-favoured class?
    295294
    296295
     
    325324
    326325The repurposed heavy equipment is
    327 \begin{itemize}[leftmargin=*]
     326\begin{itemize}
    328327\item
    329328        Resolver provided values for a used declaration's type-system variables,
     
    349348int main() {
    350349        thing( @10@ ) x;  f( x );  $\C{// prints 10, [4]}$
    351         thing( @100@ ) y;  f( y );  $\C{// prints 100}$
     350        thing( 100 ) y;  f( y );  $\C{// prints 100}$
     351        return 0;
    352352}
    353353\end{cfa}
    354354This example has:
    355 \begin{enumerate}[leftmargin=*]
     355\begin{enumerate}
    356356\item
    357357        The symbol @N@ being declared as a type variable (a variable of the type system).
     
    369369Because the box pass handles a type's size as its main datum, the encoding is chosen to use it.
    370370The production and recovery are then straightforward.
    371 \begin{itemize}[leftmargin=*]
     371\begin{itemize}
    372372\item
    373373        The value $n$ is encoded as a type whose size is $n$.
    374374\item
    375         Given a dimension expression $e$, produce an internal type @char[@$e$@]@ to represent it.
     375        Given a dimension expression $e$, produce type @char[@$e$@]@ to represent it.
    376376        If $e$ evaluates to $n$ then the encoded type has size $n$.
    377377\item
     
    389389}
    390390int main() {
    391         thing( @char[10]@ ) x;  f( x );  $\C{// prints 10, [4]}$
    392         thing( @char[100]@ ) y;  f( y );  $\C{// prints 100}$
     391        thing( char[@10@] ) x;  f( x );  $\C{// prints 10, [4]}$
     392        thing( char[100] ) y;  f( y );  $\C{// prints 100}$
     393        return 0;
    393394}
    394395\end{cfa}
    395396Observe:
    396 \begin{enumerate}[leftmargin=*]
     397\begin{enumerate}
    397398\item
    398399        @N@ is now declared to be a type.
    399         It is declared to be \emph{sized} (by the @*@), meaning that the box pass shall do its @sizeof(N)@$\rightarrow$@__sizeof_N@ extra parameter and expression translation.
     400        It is declared to be \emph{sized} (by the @*@), meaning that the box pass shall do its @sizeof(N)@--@__sizeof_N@ extra parameter and expression translation.
    400401\item
    401402        @thing(N)@ is a type; the argument to the generic @thing@ is a type (type variable).
     
    403404        The @sout...@ expression (being an application of the @?|?@ operator) has a second argument that is an ordinary expression.
    404405\item
    405         The type of variable @x@ is another @thing(-)@ type; the argument to the generic @thing@ is a type (array type of bytes, @char[@$e$@]@).
     406        The type of variable @x@ is another @thing(-)@ type; the argument to the generic @thing@ is a type (array type of bytes, @char@).
    406407\end{enumerate}
    407408
     
    414415        struct __conc_thing_10 {} x;  f( @10@, &x );  $\C{// prints 10, [4]}$
    415416        struct __conc_thing_100 {} y;  f( @100@, &y );  $\C{// prints 100}$
     417        return 0;
    416418}
    417419\end{cfa}
    418420Observe:
    419 \begin{enumerate}[leftmargin=*]
     421\begin{enumerate}
    420422\item
    421423        The type parameter @N@ is gone.
     
    425427        The @sout...@ expression (being an application of the @?|?@ operator) has a regular variable (parameter) usage for its second argument.
    426428\item
    427         Information about the particular @thing@ instantiation (value 10) is moved, from the type, to a regular function-call argument.
     429        Information about the particular @thing@ instantiation (value 10) has moved, from the type, to a regular function-call argument.
    428430\end{enumerate}
    429431At the end of the desugaring and downstream processing, the original C idiom of ``pass both a length parameter and a pointer'' has been reconstructed.
     
    431433The compiler's action produces the more complex form, which if handwritten, would be error-prone.
    432434
    433 At the compiler front end, the parsing changes AST schema extensions and validation rules for enabling the sugared user input.
    434 \begin{itemize}[leftmargin=*]
     435Back at the compiler front end, the parsing changes AST schema extensions and validation rules for enabling the sugared user input.
     436\begin{itemize}
    435437\item
    436438        Recognize the form @[N]@ as a type-variable declaration within a @forall@.
     
    438440        Have the new brand of type-variable, \emph{Dimension}, in the AST form of a type-variable, to represent one parsed from @[-]@.
    439441\item
    440         Allow a type variable to occur in an expression.  Validate (after parsing) that only dimension-branded type-variables are used here.
     442        Allow a type variable to occur in an expression.  Validate (after parsing) that only dimension-branded type variables are used here.
    441443\item
    442444        Allow an expression to occur in type-argument position.  Brand the resulting type argument as a dimension.
     
    455457\label{s:ArrayTypingC}
    456458
    457 Essential in giving a guarantee of accurate length is the compiler's ability to reject a program that presumes to mishandle length.
    458 By contrast, most discussion so far deals with communicating length, from one party who knows it, to another willing to work with any given length.
    459 For scenarios where the concern is a mishandled length, the interaction is between two parties who both claim to know something about it.
    460 
    461 C and \CFA can check when working with two static values.
    462 \begin{cfa}
    463 enum { n = 42 };
    464 float x[@n@];   // or just 42
    465 float (*xp1)[@42@] = &x;    // accept
    466 float (*xp2)[@999@] = &x;   // reject
    467 warning: initialization of 'float (*)[999]' from incompatible pointer type 'float (*)[42]'
    468 \end{cfa}
    469 When a variable is involved, C and \CFA take two different approaches.
    470 Today's C compilers accept the following without warning.
    471 \begin{cfa}
    472 static const int n = 42;
    473 float x[@n@];
    474 float (* xp)[@999@] = &x; $\C{// should be static rejection here}$
     459Essential in giving a guarantee of accurate length is the compiler's ability
     460to reject a program that presumes to mishandle length.
     461By contrast, most discussion so far dealt with communicating length,
     462from one party who knows it, to another who is willing to work with any given length.
     463For scenarios where the concern is a mishandled length,
     464the interaction is between two parties who both claim to know something about it.
     465Such a scenario occurs in this pure C fragment, which today's C compilers accept:
     466\begin{cfa}
     467int n = @42@;
     468float x[n];
     469float (*xp)[@999@] = &x;
    475470(*xp)[@500@]; $\C{// in "bound"?}$
    476471\end{cfa}
    477472Here, the array @x@ has length 42, while a pointer to it (@xp@) claims length 999.
    478 So, while the subscript of @xp@ at position 500 is out of bound with its referent @x@,
     473So, while the subscript of @xp@ at position 500 is out of bound of its referent @x@,
    479474the access appears in-bound of the type information available on @xp@.
    480 In fact, length is being mishandled in the previous step, where the type-carried length information on @x@ is not compatible with that of @xp@.
    481 
    482 In \CFA, I choose to reject this C example at the point where the type-carried length information on @x@ is not compatible with that of @xp@, and correspondingly, its array counterpart at the same location:
    483 \begin{cfa}
    484 static const int n = 42;
    485 array( float, @n@ ) x;
    486 array( float, @999@ ) * xp = &x; $\C{// static rejection here}$
    487 (*xp)[@500@]; $\C{// runtime check passes}$
    488 \end{cfa}
    489 The way the \CFA array is implemented, the type analysis for this case reduces to a case similar to the earlier C version.
     475Truly, length is being mishandled in the previous step,
     476where the type-carried length information on @x@ is not compatible with that of @xp@.
     477
     478The \CFA new-array rejects the analogous case:
     479\begin{cfa}
     480int n = @42@;
     481array(float, n) x;
     482array(float, 999) * xp = x; $\C{// static rejection here}$
     483(*xp)[@500@]; $\C{// runtime check vs len 999}$
     484\end{cfa}
     485The way the \CFA array is implemented, the type analysis of this case reduces to a case similar to the earlier C version.
    490486The \CFA compiler's compatibility analysis proceeds as:
    491487\begin{itemize}[parsep=0pt]
    492488\item
    493         Is @array( float, 999 )@ type-compatible with @array( float, n )@?
    494 \item
    495         Is desugared @array( float, char[999] )@ type-compatible with desugared @array( float, char[n] )@?
    496 %               \footnote{
    497 %               Here, \lstinline{arrayX} represents the type that results from desugaring the \lstinline{array} type into a type whose generic parameters are all types.
    498 %               This presentation elides the noisy fact that \lstinline{array} is actually a macro for something bigger;
    499 %               the reduction to \lstinline{char [-]} still proceeds as sketched.}
    500 \item
    501         Is internal type @char[999]@ type-compatible with internal type @char[n]@?
     489        Is @array(float, 999)@ type-compatible with @array(float, n)@?
     490\item
     491        Is @arrayX(float, char[999])@ type-compatible with @arrayX(float, char[n])@?\footnote{
     492                Here, \lstinline{arrayX} represents the type that results
     493                from desugaring the \lstinline{array} type
     494                into a type whose generic parameters are all types.
     495                This presentation elides the noisy fact that
     496                \lstinline{array} is actually a macro for something bigger;
     497                the reduction to \lstinline{char[-]} still proceeds as sketched.}
     498\item
     499        Is @char[999]@ type-compatible with @char[n]@?
    502500\end{itemize}
    503 The answer is false because, in general, the value of @n@ is unknown at compile time, and hence, an error is raised.
    504 For safety, it makes sense to reject the corresponding C case, which is a non-backwards compatible change.
    505 There are two mitigations for this incompatibility.
    506 
    507 First, a simple recourse is available in a situation where @n@ is \emph{known} to be 999 by using a cast.
    508 \begin{cfa}
    509 float (* xp)[999] = @(float (*)[999])@&x;
    510 \end{cfa}
    511 The cast means the programmer has accepted blame.
    512 Moreover, the cast is ``eye candy'' marking where the unchecked length knowledge is used.
    513 Therefore, a program being onboarded to \CFA requires some upgrading to satisfy the \CFA rules (and arguably become clearer), without giving up its validity to a plain C compiler.
    514 Second, the incompatibility only affects types like pointer-to-array, which are infrequently used in C.
    515 The more common C idiom for aliasing an array is to use a pointer-to-first-element type, which does not participate in the \CFA array's length checking.\footnote{
     501To achieve the necessary \CFA rejections meant rejecting the corresponding C case, which is not backward compatible.
     502There are two complementary mitigations for this incompatibility.
     503
     504First, a simple recourse is available to a programmer who intends to proceed
     505with the statically unsound assignment.
     506This situation might arise if @n@ were known to be 999,
     507rather than 42, as in the introductory examples.
     508The programmer can add a cast in the \CFA code.
     509\begin{cfa}
     510xp = @(float (*)[999])@ &x;
     511\end{cfa}
     512This addition causes \CFA to accept, because now, the programmer has accepted blame.
     513This addition is benign in plain C, because the cast is valid, just unnecessary there.
     514Moreover, the addition can even be seen as appropriate ``eye candy,''
     515marking where the unchecked length knowledge is used.
     516Therefore, a program being onboarded to \CFA can receive a simple upgrade,
     517to satisfy the \CFA rules (and arguably become clearer),
     518without giving up its validity to a plain C compiler.
     519
     520Second, the incompatibility only affects types like pointer-to-array,
     521which are are infrequently used in C.
     522The more common C idiom for aliasing an array is to use a pointer-to-first-element type,
     523which does not participate in the \CFA array's length checking.\footnote{
    516524        Notably, the desugaring of the \lstinline{array} type avoids letting any \lstinline{-[-]} type decay,
    517525        in order to preserve the length information that powers runtime bound-checking.}
    518 Therefore, the need to upgrade legacy C code is low.
    519 Finally, if this incompatibility is a problem onboarding C programs to \CFA, it is should be possible to change the C type check to a warning rather than an error, acting as a \emph{lint} of the original code for a missing type annotation.
    520 
    521 To handle two occurrences of the same variable, more information is needed, \eg, this is fine,
    522 \begin{cfa}
    523 int n = 42;
    524 float x[@n@];
    525 float (*xp)[@n@] = x;   // accept
    526 \end{cfa}
    527 where @n@ remains fixed across a contiguous declaration context.
    528 However, intervening dynamic statement cause failures.
    529 \begin{cfa}
    530 int n = 42;
    531 float x[@n@];
    532 @n@ = 999; // dynamic change
    533 float (*xp)[@n@] = x;   // reject
    534 \end{cfa}
    535 However, side-effects can occur in a contiguous declaration context.
     526Therefore, the frequency of needing to upgrade legacy C code (as discussed in the first mitigation)
     527is anticipated to be low.
     528
     529Because the incompatibility represents a low cost to a \CFA onboarding effort
     530(with a plausible side benefit of linting the original code for a missing annotation),
     531no special measures were added to retain the compatibility.
     532It would be possible to flag occurrences of @-[-]@ types that come from @array@ desugaring,
     533treating those with stricter \CFA rules, while treating others with classic C rules.
     534If future lessons from C project onboarding warrant it,
     535this special compatibility measure can be added.
     536
     537Having allowed that both the initial C example's check
     538\begin{itemize}
     539        \item
     540                Is @float[999]@ type-compatible with @float[n]@?
     541\end{itemize}
     542and the second \CFA example's induced check
     543\begin{itemize}
     544        \item
     545                Is @char[999]@ type-compatible with @char[n]@?
     546\end{itemize}
     547shall have the same answer, (``no''),
     548discussion turns to how I got the \CFA compiler to produce this answer.
     549In its preexisting form, it produced a (buggy) approximation of the C rules.
     550To implement the new \CFA rules, I took the syntactic recursion a step further, obtaining,
     551in both cases:
     552\begin{itemize}
     553        \item
     554                Is @999@ compatible with @n@?
     555\end{itemize}
     556This compatibility question applies to a pair of expressions, where the earlier implementation were to types.
     557Such an expression-compatibility question is a new addition to the \CFA compiler.
     558Note, these questions only arise in the context of dimension expressions on (C) array types.
     559
     560TODO: ensure these compiler implementation matters are treated under \CFA compiler background:
     561type unification,
     562cost calculation,
     563GenPoly.
     564
     565The relevant technical component of the \CFA compiler is the type unification procedure within the type resolver.
     566I added rules for continuing this unification into expressions that occur within types.
     567It is still fundamentally doing \emph{type} unification
     568because it is participating in binding type variables,
     569and not participating in binding any variables that stand in for expression fragments
     570(for there is no such sort of variable in \CFA's analysis.)
     571An unfortunate fact about the \CFA compiler's preexisting implementation is that
     572type unification suffers from two forms of duplication.
     573
     574The first duplication has (many of) the unification rules stated twice.
     575As a result, my additions for dimension expressions are stated twice.
     576The extra statement of the rules occurs in the @GenPoly@ module,
     577where concrete types like @array(int, 5)@\footnote{
     578        Again, the presentation is simplified
     579        by leaving the \lstinline{array} macro unexpanded.}
     580are lowered into corresponding C types @struct __conc_array_1234@ (the suffix being a generated index).
     581In this case, the struct's definition contains fields that hardcode the argument values of @float@ and @5@.
     582The next time an @array(-,-)@ concrete instance is encountered, it checks if the previous @struct __conc_array_1234@ is suitable for it.
     583Yes, for another occurrence of @array(int, 5)@;
     584no, for either @array(rational(int), 5)@ or @array(int, 42)@.
     585By the last example, this phase must ``reject''
     586the hypothesis that it should reuse the dimension-5 instance's C-lowering for a dimension-42 instance.
     587
     588The second duplication has unification (proper) being invoked at two stages of expression resolution.
     589As a result, my added rule set needs to handle more cases than the preceding discussion motivates.
     590In the program
     591\begin{cfa}
     592void @f@( double );
     593forall( T & ) void @f@( T & );
     594void g( int n ) {
     595        array( float, n + 1 ) x;
     596        f(x);   // overloaded
     597}
     598\end{cfa}
     599when resolving the function call, @g@, the first unification stage
     600compares the type @T@ of the parameter with @array( float, n + 1 )@, of the argument.
     601TODO: finish.
     602
     603The actual rules for comparing two dimension expressions are conservative.
     604To answer, ``yes, consider this pair of expressions to be matching,''
     605is to imply, ``all else being equal, allow an array with length calculated by $e_1$
     606to be passed to a function expecting a length-$e_2$ array.''\footnote{
     607        TODO: Deal with directionality, that I'm doing exact-match, no ``at least as long as,'' no subtyping.
     608        Should it be an earlier scoping principle?  Feels like it should matter in more places than here.}
     609So, a ``yes'' answer must represent a guarantee that both expressions evaluate the
     610same result, while a ``no'' can tolerate ``they might, but we're not sure'',
     611provided that practical recourses are available
     612to let programmers express better knowledge.
     613The new rule-set in the current release is, in fact, extremely conservative.
     614I chose to keep things simple,
     615and allow future needs to drive adding additional complexity, within the new framework.
     616
     617For starters, the original motivating example's rejection
     618is not based on knowledge that
     619the @xp@ length of (the literal) 999 is value-unequal to
     620the (obvious) runtime value of the variable @n@, which is the @x@ length.
     621Rather, the analysis assumes a variable's value can be anything,
     622and so there can be no guarantee that its value is 999.
     623So, a variable and a literal can never match.
     624
     625Two occurrences of the same literal value are obviously a fine match.
     626For two occurrences of the same variable, more information is needed.
     627For example, this one is fine
     628\begin{cfa}
     629void f( const int n ) {
     630        float x[n];
     631        float (*xp)[n] = x;   // accept
     632}
     633\end{cfa}
     634while this one is not:
     635\begin{cfa}
     636void f() {
     637        int n = 42;
     638        float x[n];
     639        n = 999;
     640        float (*xp)[n] = x;   // reject
     641}
     642\end{cfa}
     643Furthermore, the fact that the first example sees @n@ as @const@
     644is not actually sufficient.
     645In this example, @f@'s length expression's declaration is as @const@ as it can be,
     646yet its value still changes between the two invocations:
    536647\begin{cquote}
    537 \setlength{\tabcolsep}{20pt}
     648\setlength{\tabcolsep}{15pt}
    538649\begin{tabular}{@{}ll@{}}
    539650\begin{cfa}
    540651// compile unit 1
    541 extern int @n@;
    542 extern float g();
    543 void f() {
    544         float x[@n@] = { g() };
    545         float (*xp)[@n@] = x;   // reject
     652void g();
     653void f( const int & const nr ) {
     654        float x[nr];
     655        g();    // change n
     656        @float (*xp)[nr] = x;@   // reject
    546657}
    547658\end{cfa}
     
    549660\begin{cfa}
    550661// compile unit 2
    551 int @n@ = 42;
     662static int n = 42;
    552663void g() {
    553         @n@ = 99;
    554 }
    555 
    556 
     664        n = 99;
     665}
     666
     667f( n );
    557668\end{cfa}
    558669\end{tabular}
     
    560671The issue here is that knowledge needed to make a correct decision is hidden by separate compilation.
    561672Even within a translation unit, static analysis might not be able to provide all the information.
    562 However, if the example uses @const@, the check is possible.
    563 \begin{cquote}
    564 \setlength{\tabcolsep}{20pt}
    565 \begin{tabular}{@{}ll@{}}
    566 \begin{cfa}
    567 // compile unit 1
    568 extern @const@ int n;
    569 extern float g();
    570 void f() {
    571         float x[n] = { g() };
    572         float (*xp)[n] = x;   // reject
    573 }
    574 \end{cfa}
    575 &
    576 \begin{cfa}
    577 // compile unit 2
    578 @const@ int n = 42;
    579 void g() {
    580         @n = 99@; // allowed
    581 }
    582 
    583 
    584 \end{cfa}
    585 \end{tabular}
    586 \end{cquote}
     673
     674My rule set also respects a traditional C feature: In spite of the several limitations of the C rules
     675accepting cases that produce different values, there are a few mismatches that C stops.
     676C is quite precise when working with two static values.
     677\begin{cfa}
     678enum { fortytwo = 42 };
     679float x[fortytwo];
     680float (*xp1)[42] = &x;    // accept
     681float (*xp2)[999] = &x;   // reject
     682\end{cfa}
     683My \CFA rules agree with C's on these cases.
    587684
    588685In summary, the new rules classify expressions into three groups:
    589686\begin{description}
    590687\item[Statically Evaluable]
    591         Expressions for which a specific value can be calculated (conservatively) at compile-time.
    592         A preexisting \CFA compiler module defines which literals, enumerators, and expressions qualify and evaluates them.
     688        Expressions for which a specific value can be calculated (conservatively)
     689        at compile-time.
     690        A preexisting \CFA compiler module defines which literals, enumerators, and expressions qualify,
     691        and evaluates them.
    593692\item[Dynamic but Stable]
    594693        The value of a variable declared as @const@, including a @const@ parameter.
    595694\item[Potentially Unstable]
    596695        The catch-all category.  Notable examples include:
    597         any function-call result, @float x[foo()]@, the particular function-call result that is a pointer dereference, @void f(const int * n)@ @{ float x[*n]; }@, and any use of a reference-typed variable.
     696        any function-call result, @float x[foo()];@,
     697        the particular function-call result that is a pointer dereference, @void f(const int * n)@ @{ float x[*n]; }@, and
     698        any use of a reference-typed variable.
    598699\end{description}
    599700Within these groups, my \CFA rules are:
    600 \begin{itemize}[leftmargin=*]
     701\begin{itemize}
    601702\item
    602703        Accept a Statically Evaluable pair, if both expressions have the same value.
     
    609710\end{itemize}
    610711The traditional C rules are:
    611 \begin{itemize}[leftmargin=*]
     712\begin{itemize}
    612713\item
    613714        Reject a Statically Evaluable pair, if the expressions have two different values.
     
    615716        Otherwise, accept.
    616717\end{itemize}
    617 \VRef[Figure]{f:DimexprRuleCompare} gives a case-by-case comparison of the consequences of these rule sets.
    618 It demonstrates that the \CFA false alarms occur in the same cases as C treats unsafe.
    619 It also shows that C-incompatibilities only occur in cases that C treats unsafe.
    620718
    621719\begin{figure}
     
    648746                where \lstinline{expr1} and \lstinline{expr2} are meta-variables varying according to the row's Case.
    649747                Each row's claim applies to other harnesses too, including,
    650                 \begin{itemize}[leftmargin=*]
     748                \begin{itemize}
    651749                \item
    652750                        calling a function with a parameter like \lstinline{x} and an argument of the \lstinline{xp} type,
     
    664762                The table treats symbolic identity (Same/Different on rows)
    665763                apart from value equality (Equal/Unequal on columns).
    666                 \begin{itemize}[leftmargin=*]
     764                \begin{itemize}
    667765                \item
    668766                        The expressions \lstinline{1}, \lstinline{0+1} and \lstinline{n}
     
    683781\end{figure}
    684782
    685 \begin{comment}
    686 Given that the above check
    687 \begin{itemize}
    688         \item
    689         Is internal type @char[999]@ type-compatible with internal type @char[n]@?
    690 \end{itemize}
    691 answers false, discussion turns to how I got the \CFA compiler to produce this answer.
    692 In its preexisting form, the type system had a buggy approximation of the C rules.
    693 To implement the new \CFA rules, I added one further step.
    694 \begin{itemize}
    695         \item
    696                 Is @999@ compatible with @n@?
    697 \end{itemize}
    698 This question applies to a pair of expressions, where the earlier question applies to types.
    699 An expression-compatibility question is a new addition to the \CFA compiler, and occurs in the context of dimension expressions, and possibly enumerations assigns, which must be unique.
    700 
    701 % TODO: ensure these compiler implementation matters are treated under \CFA compiler background: type unification, cost calculation, GenPoly.
    702 
    703 The relevant technical component of the \CFA compiler is the standard type-unification within the type resolver.
    704 \begin{cfa}
    705 example
    706 \end{cfa}
    707 I added rules for continuing this unification into expressions that occur within types.
    708 It is still fundamentally doing \emph{type} unification because it is participating in binding type variables, and not participating in binding any variables that stand in for expression fragments (for there is no such sort of variable in \CFA's analysis.)
    709 An unfortunate fact about the \CFA compiler's preexisting implementation is that type unification suffers from two forms of duplication.
    710 
    711 In detail, the first duplication has (many of) the unification rules stated twice.
    712 As a result, my additions for dimension expressions are stated twice.
    713 The extra statement of the rules occurs in the @GenPoly@ module, where concrete types like @array( int, 5 )@\footnote{
    714         Again, the presentation is simplified
    715         by leaving the \lstinline{array} macro unexpanded.}
    716 are lowered into corresponding C types @struct __conc_array_1234@ (the suffix being a generated index).
    717 In this case, the struct's definition contains fields that hardcode the argument values of @float@ and @5@.
    718 The next time an @array( -, - )@ concrete instance is encountered, it checks if the previous @struct __conc_array_1234@ is suitable for it.
    719 Yes, for another occurrence of @array( int, 5 )@;
    720 no, for examples like @array( int, 42 )@ or @array( rational(int), 5 )@.
    721 In the first example, it must reject the reuse hypothesis for a dimension-@5@ and a dimension-@42@ instance.
    722 
    723 The second duplication has unification (proper) being invoked at two stages of expression resolution.
    724 As a result, my added rule set needs to handle more cases than the preceding discussion motivates.
    725 In the program
    726 \begin{cfa}
    727 void @f@( double ); // overload
    728 forall( T & ) void @f@( T & ); // overload
    729 void g( int n ) {
    730         array( float, n + 1 ) x;
    731         f(x);   // overloaded
    732 }
    733 \end{cfa}
    734 when resolving a function call to @g@, the first unification stage compares the type @T@ of the parameter with @array( float, n + 1 )@, of the argument.
    735 \PAB{TODO: finish.}
    736 
    737 The actual rules for comparing two dimension expressions are conservative.
    738 To answer, ``yes, consider this pair of expressions to be matching,''
    739 is to imply, ``all else being equal, allow an array with length calculated by $e_1$
    740 to be passed to a function expecting a length-$e_2$ array.''\footnote{
    741         TODO: Deal with directionality, that I'm doing exact-match, no ``at least as long as,'' no subtyping.
    742         Should it be an earlier scoping principle?  Feels like it should matter in more places than here.}
    743 So, a ``yes'' answer must represent a guarantee that both expressions evaluate the
    744 same result, while a ``no'' can tolerate ``they might, but we're not sure'',
    745 provided that practical recourses are available
    746 to let programmers express better knowledge.
    747 The new rule-set in the current release is, in fact, extremely conservative.
    748 I chose to keep things simple,
    749 and allow future needs to drive adding additional complexity, within the new framework.
    750 
    751 For starters, the original motivating example's rejection is not based on knowledge that the @xp@ length of (the literal) 999 is value-unequal to the (obvious) runtime value of the variable @n@, which is the @x@ length.
    752 Rather, the analysis assumes a variable's value can be anything, and so there can be no guarantee that its value is 999.
    753 So, a variable and a literal can never match.
    754 
    755 TODO: Discuss the interaction of this dimension hoisting with the challenge of extra unification for cost calculation
    756 \end{comment}
    757 
    758 The conservatism of the new rule set can leave a programmer needing a recourse, when needing to use a dimension expression whose stability argument is more subtle than current-state analysis.
     783
     784\VRef[Figure]{f:DimexprRuleCompare} gives a case-by-case comparison of the consequences of these rule sets.
     785It demonstrates that the \CFA false alarms occur in the same cases as C treats unsafe.
     786It also shows that C-incompatibilities only occur in cases that C treats unsafe.
     787
     788
     789The conservatism of the new rule set can leave a programmer needing a recourse,
     790when needing to use a dimension expression whose stability argument
     791is more subtle than current-state analysis.
    759792This recourse is to declare an explicit constant for the dimension value.
    760 Consider these two dimension expressions, whose uses are rejected by the blunt current-state rules:
    761 \begin{cfa}
    762 void f( int @&@ nr, @const@ int nv ) {
    763         float x[@nr@];
    764         float (*xp)[@nr@] = &x;   // reject: nr varying (no references)
    765         float y[@nv + 1@];
    766         float (*yp)[@nv + 1@] = &y;   // reject: ?+? unpredictable (no functions)
     793Consider these two dimension expressions,
     794whose reuses are rejected by the blunt current-state rules:
     795\begin{cfa}
     796void f( int & nr, const int nv ) {
     797        float x[nr];
     798        float (*xp)[nr] = &x;   // reject: nr varying (no references)
     799        float y[nv + 1];
     800        float (*yp)[nv + 1] = &y;   // reject: ?+? unpredictable (no functions)
    767801}
    768802\end{cfa}
    769803Yet, both dimension expressions are reused safely.
    770 The @nr@ reference is never written, not volatile meaning no implicit code (load) between declarations, and control does not leave the function between the uses.
    771 As well, the build-in @?+?@ function is predictable.
    772 To make these cases work, the programmer must add the follow constant declarations (cast does not work):
     804The @nr@ reference is never written, not volatile
     805and control does not leave the function between the uses.
     806The name @?+?@ resolves to a function that is quite predictable.
     807Here, the programmer can add the constant declarations (cast does not work):
    773808\begin{cfa}
    774809void f( int & nr, const int nv ) {
     
    784819achieved by adding a superfluous ``snapshot it as of now'' directive.
    785820
    786 The snapshot trick is also used by the \CFA translation, though to achieve a different outcome.
     821The snapshotting trick is also used by the translation, though to achieve a different outcome.
    787822Rather obviously, every array must be subscriptable, even a bizarre one:
    788823\begin{cfa}
    789 array( float, @rand(10)@ ) x;
    790 x[@0@];  // 10% chance of bound-check failure
    791 \end{cfa}
    792 Less obvious is that the mechanism of subscripting is a function call, which must communicate length accurately.
    793 The bound-check above (callee logic) must use the actual allocated length of @x@, without mistakenly reevaluating the dimension expression, @rand(10)@.
     824array( float, rand(10) ) x;
     825x[0];  // 10% chance of bound-check failure
     826\end{cfa}
     827Less obvious is that the mechanism of subscripting is a function call,
     828which must communicate length accurately.
     829The bound-check above (callee logic) must use the actual allocated length of @x@,
     830without mistakenly reevaluating the dimension expression, @rand(10)@.
    794831Adjusting the example to make the function's use of length more explicit:
    795832\begin{cfa}
    796 forall( T * )
    797 void f( T * x ) { sout | sizeof( *x ); }
     833forall ( T * )
     834void f( T * x ) { sout | sizeof(*x); }
    798835float x[ rand(10) ];
    799836f( x );
     
    803840void f( size_t __sizeof_T, void * x ) { sout | __sizeof_T; }
    804841\end{cfa}
    805 the translation calls the dimension argument twice:
     842the translation must call the dimension argument twice:
    806843\begin{cfa}
    807844float x[ rand(10) ];
    808845f( rand(10), &x );
    809846\end{cfa}
    810 The correct form is:
     847Rather, the translation is:
    811848\begin{cfa}
    812849size_t __dim_x = rand(10);
     
    814851f( __dim_x, &x );
    815852\end{cfa}
    816 Dimension hoisting already existed in the \CFA compiler.
    817 But its was buggy, particularly with determining, ``Can hoisting the expression be skipped here?'', for skipping this hoisting is clearly desirable in some cases.
    818 For example, when a programmer has already hoisted to perform an optimiation to prelude duplicate code (expression) and/or expression evaluation.
     853The occurrence of this dimension hoisting during translation was in the preexisting \CFA compiler.
     854But its cases were buggy, particularly with determining, ``Can hoisting the expression be skipped here?'', for skipping this hoisting is clearly desirable in some cases.
     855For example, when the programmer has already done so manually. \PAB{I don't know what this means.}
    819856In the new implementation, these cases are correct, harmonized with the accept/reject criteria.
     857
     858TODO: Discuss the interaction of this dimension hoisting with the challenge of extra unification for cost calculation
    820859
    821860
     
    824863
    825864A multidimensional array implementation has three relevant levels of abstraction, from highest to lowest, where the array occupies \emph{contiguous memory}.
    826 \begin{enumerate}[leftmargin=*]
     865\begin{enumerate}
    827866\item
    828867Flexible-stride memory:
     
    10611100Preexisting \CFA mechanisms achieve this requirement, but with poor performance.
    10621101Furthermore, advanced array users need an exception to the basic mechanism, which does not occur with other aggregates.
    1063 Hence, arrays introduce subtleties in supporting an element's lifecycle.
     1102Hence, arrays introduce subleties in supporting an element's lifecycle.
    10641103
    10651104The preexisting \CFA support for contained-element lifecycle is based on recursive occurrences of the object-type (@otype@) pseudo-trait.
     
    12801319The @worker@ type is designed this way to work with the threading system.
    12811320A thread type forks a thread at the end of each constructor and joins with it at the start of each destructor.
    1282 But a @worker@ cannot begin its forked-thread work without knowing its @id@.
     1321But a @worker@ cannot begin its forked-thead work without knowing its @id@.
    12831322Therefore, there is a conflict between the implicit actions of the builtin @thread@ type and a user's desire to defer these actions.
    12841323
     
    14211460
    14221461The \CFA array and the field of ``array language'' comparators all leverage dependent types to improve on the expressiveness over C and Java, accommodating examples such as:
    1423 \begin{itemize}[leftmargin=*]
     1462\begin{itemize}
    14241463\item a \emph{zip}-style operation that consumes two arrays of equal length
    14251464\item a \emph{map}-style operation whose produced length matches the consumed length
     
    15051544The details in this presentation aren't meant to be taken too precisely as suggestions for how it should look in \CFA.
    15061545But the example shows these abilities:
    1507 \begin{itemize}[leftmargin=*]
     1546\begin{itemize}
    15081547\item a built-in way (the @is_enum@ trait) for a generic routine to require enumeration-like information about its instantiating type
    15091548\item an implicit implementation of the trait whenever a user-written enum occurs (@weekday@'s declaration implies @is_enum@)
  • doc/theses/mike_brooks_MMath/background.tex

    rbd72f517 r7d02d35  
    995995                Illustrated by pseudocode implementation of an STL-compatible API fragment using LQ as the underlying implementation.
    996996                The gap that makes it pseudocode is that
    997                 the LQ C macros do not expand to valid \CC when instantiated with template parameters---there is no \lstinline{struct El}.
     997                the LQ C macros do not expand to valid C++ when instantiated with template parameters---there is no \lstinline{struct El}.
    998998                When using a custom-patched version of LQ to work around this issue,
    999999                the programs of \VRef[Figure]{f:WrappedRef} and wrapped value work with this shim in place of real STL.
  • doc/theses/mike_brooks_MMath/benchmarks/string/result-append-pbv.csv

    rbd72f517 r7d02d35  
    1 perfexp-cfa-pta-ll-share-reuse,corpus-100-1-1.txt,100,100,1.000000,220120000,10.000178
    2 perfexp-cfa-pta-ll-share-reuse,corpus-100-10-1.txt,100,100,9.500000,177430000,10.000414
    3 perfexp-cfa-pta-ll-share-reuse,corpus-100-100-1.txt,100,100,106.370000,142410000,10.000162
    4 perfexp-cfa-pta-ll-share-reuse,corpus-100-2-1.txt,100,100,2.030000,195500000,10.000161
    5 perfexp-cfa-pta-ll-share-reuse,corpus-100-20-1.txt,100,100,22.960000,164560000,10.000548
    6 perfexp-cfa-pta-ll-share-reuse,corpus-100-200-1.txt,100,100,177.280000,122260000,10.000279
    7 perfexp-cfa-pta-ll-share-reuse,corpus-100-5-1.txt,100,100,5.270000,193960000,10.000071
    8 perfexp-cfa-pta-ll-share-reuse,corpus-100-50-1.txt,100,100,43.320000,163430000,10.000175
    9 perfexp-cfa-pta-ll-share-reuse,corpus-100-500-1.txt,100,100,557.260000,87960000,10.001073
    10 perfexp-cfa-pta-ll-share-reuse,corpus-1-1-1.txt,100,1,1.000000,224420000,10.000135
    11 perfexp-cfa-pta-ll-share-reuse,corpus-1-10-1.txt,100,1,10.000000,223740000,10.000014
    12 perfexp-cfa-pta-ll-share-reuse,corpus-1-100-1.txt,100,1,100.000000,153300000,10.000091
    13 perfexp-cfa-pta-ll-share-reuse,corpus-1-2-1.txt,100,1,2.000000,223430000,10.000120
    14 perfexp-cfa-pta-ll-share-reuse,corpus-1-20-1.txt,100,1,20.000000,210640000,10.000385
    15 perfexp-cfa-pta-ll-share-reuse,corpus-1-200-1.txt,100,1,200.000000,129790000,10.000596
    16 perfexp-cfa-pta-ll-share-reuse,corpus-1-5-1.txt,100,1,5.000000,222850000,10.000361
    17 perfexp-cfa-pta-ll-share-reuse,corpus-1-50-1.txt,100,1,50.000000,201700000,10.000220
    18 perfexp-cfa-pta-ll-share-reuse,corpus-1-500-1.txt,100,1,500.000000,110000000,10.000407
    19 perfexp-cfa-pta-ll-share-fresh,corpus-100-1-1.txt,100,100,1.000000,225030000,10.000360
    20 perfexp-cfa-pta-ll-share-fresh,corpus-100-10-1.txt,100,100,9.500000,192640000,10.000254
    21 perfexp-cfa-pta-ll-share-fresh,corpus-100-100-1.txt,100,100,106.370000,143960000,10.000633
    22 perfexp-cfa-pta-ll-share-fresh,corpus-100-2-1.txt,100,100,2.030000,204500000,10.000450
    23 perfexp-cfa-pta-ll-share-fresh,corpus-100-20-1.txt,100,100,22.960000,185400000,10.000274
    24 perfexp-cfa-pta-ll-share-fresh,corpus-100-200-1.txt,100,100,177.280000,126420000,10.000791
    25 perfexp-cfa-pta-ll-share-fresh,corpus-100-5-1.txt,100,100,5.270000,194450000,10.000396
    26 perfexp-cfa-pta-ll-share-fresh,corpus-100-50-1.txt,100,100,43.320000,173140000,10.000364
    27 perfexp-cfa-pta-ll-share-fresh,corpus-100-500-1.txt,100,100,557.260000,92390000,10.000098
    28 perfexp-cfa-pta-ll-share-fresh,corpus-1-1-1.txt,100,1,1.000000,222210000,10.000426
    29 perfexp-cfa-pta-ll-share-fresh,corpus-1-10-1.txt,100,1,10.000000,209110000,10.000235
    30 perfexp-cfa-pta-ll-share-fresh,corpus-1-100-1.txt,100,1,100.000000,154750000,10.000076
    31 perfexp-cfa-pta-ll-share-fresh,corpus-1-2-1.txt,100,1,2.000000,222030000,10.000114
    32 perfexp-cfa-pta-ll-share-fresh,corpus-1-20-1.txt,100,1,20.000000,208680000,10.000050
    33 perfexp-cfa-pta-ll-share-fresh,corpus-1-200-1.txt,100,1,200.000000,133490000,10.000231
    34 perfexp-cfa-pta-ll-share-fresh,corpus-1-5-1.txt,100,1,5.000000,217740000,10.000425
    35 perfexp-cfa-pta-ll-share-fresh,corpus-1-50-1.txt,100,1,50.000000,200340000,10.000126
    36 perfexp-cfa-pta-ll-share-fresh,corpus-1-500-1.txt,100,1,500.000000,109570000,10.000365
    37 perfexp-cfa-pta-ll-noshare-reuse,corpus-100-1-1.txt,100,100,1.000000,146130000,10.000557
    38 perfexp-cfa-pta-ll-noshare-reuse,corpus-100-10-1.txt,100,100,9.500000,110430000,10.000456
    39 perfexp-cfa-pta-ll-noshare-reuse,corpus-100-100-1.txt,100,100,106.370000,17440000,10.003114
    40 perfexp-cfa-pta-ll-noshare-reuse,corpus-100-2-1.txt,100,100,2.030000,139540000,10.000128
    41 perfexp-cfa-pta-ll-noshare-reuse,corpus-100-20-1.txt,100,100,22.960000,70380000,10.000395
    42 perfexp-cfa-pta-ll-noshare-reuse,corpus-100-200-1.txt,100,100,177.280000,8670000,10.001712
    43 perfexp-cfa-pta-ll-noshare-reuse,corpus-100-5-1.txt,100,100,5.270000,127040000,10.000370
    44 perfexp-cfa-pta-ll-noshare-reuse,corpus-100-50-1.txt,100,100,43.320000,44250000,10.002214
    45 perfexp-cfa-pta-ll-noshare-reuse,corpus-100-500-1.txt,100,100,557.260000,3290000,10.007370
    46 perfexp-cfa-pta-ll-noshare-reuse,corpus-1-1-1.txt,100,1,1.000000,139870000,10.000356
    47 perfexp-cfa-pta-ll-noshare-reuse,corpus-1-10-1.txt,100,1,10.000000,115500000,10.000281
    48 perfexp-cfa-pta-ll-noshare-reuse,corpus-1-100-1.txt,100,1,100.000000,18830000,10.003277
    49 perfexp-cfa-pta-ll-noshare-reuse,corpus-1-2-1.txt,100,1,2.000000,144880000,10.000426
    50 perfexp-cfa-pta-ll-noshare-reuse,corpus-1-20-1.txt,100,1,20.000000,82050000,10.001071
    51 perfexp-cfa-pta-ll-noshare-reuse,corpus-1-200-1.txt,100,1,200.000000,8870000,10.002904
    52 perfexp-cfa-pta-ll-noshare-reuse,corpus-1-5-1.txt,100,1,5.000000,138400000,10.000130
    53 perfexp-cfa-pta-ll-noshare-reuse,corpus-1-50-1.txt,100,1,50.000000,38130000,10.002351
    54 perfexp-cfa-pta-ll-noshare-reuse,corpus-1-500-1.txt,100,1,500.000000,3890000,10.003849
    55 perfexp-cfa-pta-ll-noshare-fresh,corpus-100-1-1.txt,100,100,1.000000,143100000,10.000056
    56 perfexp-cfa-pta-ll-noshare-fresh,corpus-100-10-1.txt,100,100,9.500000,97990000,10.000081
    57 perfexp-cfa-pta-ll-noshare-fresh,corpus-100-100-1.txt,100,100,106.370000,16950000,10.004190
    58 perfexp-cfa-pta-ll-noshare-fresh,corpus-100-2-1.txt,100,100,2.030000,135210000,10.000137
    59 perfexp-cfa-pta-ll-noshare-fresh,corpus-100-20-1.txt,100,100,22.960000,69270000,10.000092
    60 perfexp-cfa-pta-ll-noshare-fresh,corpus-100-200-1.txt,100,100,177.280000,8840000,10.000491
    61 perfexp-cfa-pta-ll-noshare-fresh,corpus-100-5-1.txt,100,100,5.270000,112610000,10.000397
    62 perfexp-cfa-pta-ll-noshare-fresh,corpus-100-50-1.txt,100,100,43.320000,42480000,10.001402
    63 perfexp-cfa-pta-ll-noshare-fresh,corpus-100-500-1.txt,100,100,557.260000,3250000,10.027871
    64 perfexp-cfa-pta-ll-noshare-fresh,corpus-1-1-1.txt,100,1,1.000000,139830000,10.000681
    65 perfexp-cfa-pta-ll-noshare-fresh,corpus-1-10-1.txt,100,1,10.000000,102320000,10.000624
    66 perfexp-cfa-pta-ll-noshare-fresh,corpus-1-100-1.txt,100,1,100.000000,17610000,10.000917
    67 perfexp-cfa-pta-ll-noshare-fresh,corpus-1-2-1.txt,100,1,2.000000,134520000,10.000287
    68 perfexp-cfa-pta-ll-noshare-fresh,corpus-1-20-1.txt,100,1,20.000000,78150000,10.000982
    69 perfexp-cfa-pta-ll-noshare-fresh,corpus-1-200-1.txt,100,1,200.000000,8930000,10.010066
    70 perfexp-cfa-pta-ll-noshare-fresh,corpus-1-5-1.txt,100,1,5.000000,119920000,10.000537
    71 perfexp-cfa-pta-ll-noshare-fresh,corpus-1-50-1.txt,100,1,50.000000,38540000,10.001545
    72 perfexp-cfa-pta-ll-noshare-fresh,corpus-1-500-1.txt,100,1,500.000000,3900000,10.024468
    73 perfexp-cfa-peq-ll-share-reuse,corpus-100-1-1.txt,100,100,1.000000,580710000,10.000065
    74 perfexp-cfa-peq-ll-share-reuse,corpus-100-10-1.txt,100,100,9.500000,430790000,10.000116
    75 perfexp-cfa-peq-ll-share-reuse,corpus-100-100-1.txt,100,100,106.370000,247640000,10.000266
    76 perfexp-cfa-peq-ll-share-reuse,corpus-100-2-1.txt,100,100,2.030000,464050000,10.000189
    77 perfexp-cfa-peq-ll-share-reuse,corpus-100-20-1.txt,100,100,22.960000,377820000,10.000065
    78 perfexp-cfa-peq-ll-share-reuse,corpus-100-200-1.txt,100,100,177.280000,195030000,10.000477
    79 perfexp-cfa-peq-ll-share-reuse,corpus-100-5-1.txt,100,100,5.270000,430190000,10.000121
    80 perfexp-cfa-peq-ll-share-reuse,corpus-100-50-1.txt,100,100,43.320000,331580000,10.000295
    81 perfexp-cfa-peq-ll-share-reuse,corpus-100-500-1.txt,100,100,557.260000,123230000,10.000186
    82 perfexp-cfa-peq-ll-share-reuse,corpus-1-1-1.txt,100,1,1.000000,572750000,10.000172
    83 perfexp-cfa-peq-ll-share-reuse,corpus-1-10-1.txt,100,1,10.000000,558790000,10.000101
    84 perfexp-cfa-peq-ll-share-reuse,corpus-1-100-1.txt,100,1,100.000000,291780000,10.000230
    85 perfexp-cfa-peq-ll-share-reuse,corpus-1-2-1.txt,100,1,2.000000,571220000,10.000023
    86 perfexp-cfa-peq-ll-share-reuse,corpus-1-20-1.txt,100,1,20.000000,461020000,10.000045
    87 perfexp-cfa-peq-ll-share-reuse,corpus-1-200-1.txt,100,1,200.000000,220880000,10.000260
    88 perfexp-cfa-peq-ll-share-reuse,corpus-1-5-1.txt,100,1,5.000000,555180000,10.000153
    89 perfexp-cfa-peq-ll-share-reuse,corpus-1-50-1.txt,100,1,50.000000,433290000,10.000123
    90 perfexp-cfa-peq-ll-share-reuse,corpus-1-500-1.txt,100,1,500.000000,165210000,10.000260
    91 perfexp-cfa-peq-ll-share-fresh,corpus-100-1-1.txt,100,100,1.000000,591360000,10.000013
    92 perfexp-cfa-peq-ll-share-fresh,corpus-100-10-1.txt,100,100,9.500000,432580000,10.000103
    93 perfexp-cfa-peq-ll-share-fresh,corpus-100-100-1.txt,100,100,106.370000,253100000,10.000162
    94 perfexp-cfa-peq-ll-share-fresh,corpus-100-2-1.txt,100,100,2.030000,470710000,10.000018
    95 perfexp-cfa-peq-ll-share-fresh,corpus-100-20-1.txt,100,100,22.960000,381580000,10.000172
    96 perfexp-cfa-peq-ll-share-fresh,corpus-100-200-1.txt,100,100,177.280000,197910000,10.000400
    97 perfexp-cfa-peq-ll-share-fresh,corpus-100-5-1.txt,100,100,5.270000,437470000,10.000123
    98 perfexp-cfa-peq-ll-share-fresh,corpus-100-50-1.txt,100,100,43.320000,337150000,10.000065
    99 perfexp-cfa-peq-ll-share-fresh,corpus-100-500-1.txt,100,100,557.260000,127310000,10.000685
    100 perfexp-cfa-peq-ll-share-fresh,corpus-1-1-1.txt,100,1,1.000000,581300000,10.000103
    101 perfexp-cfa-peq-ll-share-fresh,corpus-1-10-1.txt,100,1,10.000000,566650000,10.000166
    102 perfexp-cfa-peq-ll-share-fresh,corpus-1-100-1.txt,100,1,100.000000,295340000,10.000202
    103 perfexp-cfa-peq-ll-share-fresh,corpus-1-2-1.txt,100,1,2.000000,579220000,10.000012
    104 perfexp-cfa-peq-ll-share-fresh,corpus-1-20-1.txt,100,1,20.000000,470040000,10.000180
    105 perfexp-cfa-peq-ll-share-fresh,corpus-1-200-1.txt,100,1,200.000000,223060000,10.000188
    106 perfexp-cfa-peq-ll-share-fresh,corpus-1-5-1.txt,100,1,5.000000,563440000,10.000100
    107 perfexp-cfa-peq-ll-share-fresh,corpus-1-50-1.txt,100,1,50.000000,438260000,10.000200
    108 perfexp-cfa-peq-ll-share-fresh,corpus-1-500-1.txt,100,1,500.000000,166830000,10.000225
    109 perfexp-cfa-peq-ll-noshare-reuse,corpus-100-1-1.txt,100,100,1.000000,603080000,10.000107
    110 perfexp-cfa-peq-ll-noshare-reuse,corpus-100-10-1.txt,100,100,9.500000,439540000,10.000078
    111 perfexp-cfa-peq-ll-noshare-reuse,corpus-100-100-1.txt,100,100,106.370000,279990000,10.000309
    112 perfexp-cfa-peq-ll-noshare-reuse,corpus-100-2-1.txt,100,100,2.030000,509720000,10.000099
    113 perfexp-cfa-peq-ll-noshare-reuse,corpus-100-20-1.txt,100,100,22.960000,405590000,10.000206
    114 perfexp-cfa-peq-ll-noshare-reuse,corpus-100-200-1.txt,100,100,177.280000,230400000,10.000124
    115 perfexp-cfa-peq-ll-noshare-reuse,corpus-100-5-1.txt,100,100,5.270000,454270000,10.000057
    116 perfexp-cfa-peq-ll-noshare-reuse,corpus-100-50-1.txt,100,100,43.320000,375090000,10.000225
    117 perfexp-cfa-peq-ll-noshare-reuse,corpus-100-500-1.txt,100,100,557.260000,134440000,10.000290
    118 perfexp-cfa-peq-ll-noshare-reuse,corpus-1-1-1.txt,100,1,1.000000,588100000,10.000124
    119 perfexp-cfa-peq-ll-noshare-reuse,corpus-1-10-1.txt,100,1,10.000000,577110000,10.000002
    120 perfexp-cfa-peq-ll-noshare-reuse,corpus-1-100-1.txt,100,1,100.000000,319990000,10.000151
    121 perfexp-cfa-peq-ll-noshare-reuse,corpus-1-2-1.txt,100,1,2.000000,586540000,10.000010
    122 perfexp-cfa-peq-ll-noshare-reuse,corpus-1-20-1.txt,100,1,20.000000,480940000,10.000047
    123 perfexp-cfa-peq-ll-noshare-reuse,corpus-1-200-1.txt,100,1,200.000000,300590000,10.000162
    124 perfexp-cfa-peq-ll-noshare-reuse,corpus-1-5-1.txt,100,1,5.000000,577530000,10.000120
    125 perfexp-cfa-peq-ll-noshare-reuse,corpus-1-50-1.txt,100,1,50.000000,454950000,10.000114
    126 perfexp-cfa-peq-ll-noshare-reuse,corpus-1-500-1.txt,100,1,500.000000,186210000,10.000221
    127 perfexp-cfa-peq-ll-noshare-fresh,corpus-100-1-1.txt,100,100,1.000000,546170000,10.000079
    128 perfexp-cfa-peq-ll-noshare-fresh,corpus-100-10-1.txt,100,100,9.500000,403120000,10.000222
    129 perfexp-cfa-peq-ll-noshare-fresh,corpus-100-100-1.txt,100,100,106.370000,214740000,10.000444
    130 perfexp-cfa-peq-ll-noshare-fresh,corpus-100-2-1.txt,100,100,2.030000,449080000,10.000157
    131 perfexp-cfa-peq-ll-noshare-fresh,corpus-100-20-1.txt,100,100,22.960000,351690000,10.000146
    132 perfexp-cfa-peq-ll-noshare-fresh,corpus-100-200-1.txt,100,100,177.280000,174630000,10.000540
    133 perfexp-cfa-peq-ll-noshare-fresh,corpus-100-5-1.txt,100,100,5.270000,419160000,10.000085
    134 perfexp-cfa-peq-ll-noshare-fresh,corpus-100-50-1.txt,100,100,43.320000,296590000,10.000200
    135 perfexp-cfa-peq-ll-noshare-fresh,corpus-100-500-1.txt,100,100,557.260000,78000000,10.000539
    136 perfexp-cfa-peq-ll-noshare-fresh,corpus-1-1-1.txt,100,1,1.000000,541890000,10.000021
    137 perfexp-cfa-peq-ll-noshare-fresh,corpus-1-10-1.txt,100,1,10.000000,511140000,10.000142
    138 perfexp-cfa-peq-ll-noshare-fresh,corpus-1-100-1.txt,100,1,100.000000,243680000,10.000252
    139 perfexp-cfa-peq-ll-noshare-fresh,corpus-1-2-1.txt,100,1,2.000000,532730000,10.000135
    140 perfexp-cfa-peq-ll-noshare-fresh,corpus-1-20-1.txt,100,1,20.000000,413610000,10.000113
    141 perfexp-cfa-peq-ll-noshare-fresh,corpus-1-200-1.txt,100,1,200.000000,192770000,10.000185
    142 perfexp-cfa-peq-ll-noshare-fresh,corpus-1-5-1.txt,100,1,5.000000,495980000,10.000162
    143 perfexp-cfa-peq-ll-noshare-fresh,corpus-1-50-1.txt,100,1,50.000000,367590000,10.000269
    144 perfexp-cfa-peq-ll-noshare-fresh,corpus-1-500-1.txt,100,1,500.000000,111560000,10.000455
    145 perfexp-cfa-pbv-ll-share-na,corpus-100-1-1.txt,xxx,100,1.000000,638780000,10.000008
    146 perfexp-cfa-pbv-ll-share-na,corpus-100-10-1.txt,xxx,100,9.500000,637840000,10.000004
    147 perfexp-cfa-pbv-ll-share-na,corpus-100-100-1.txt,xxx,100,106.370000,635130000,10.000003
    148 perfexp-cfa-pbv-ll-share-na,corpus-100-2-1.txt,xxx,100,2.030000,639810000,10.000140
    149 perfexp-cfa-pbv-ll-share-na,corpus-100-20-1.txt,xxx,100,22.960000,552670000,10.000089
    150 perfexp-cfa-pbv-ll-share-na,corpus-100-200-1.txt,xxx,100,177.280000,639550000,10.000019
    151 perfexp-cfa-pbv-ll-share-na,corpus-100-5-1.txt,xxx,100,5.270000,636230000,10.000044
    152 perfexp-cfa-pbv-ll-share-na,corpus-100-50-1.txt,xxx,100,43.320000,631470000,10.000125
    153 perfexp-cfa-pbv-ll-share-na,corpus-100-500-1.txt,xxx,100,557.260000,628330000,10.000127
    154 perfexp-cfa-pbv-ll-share-na,corpus-1-1-1.txt,xxx,1,1.000000,589760000,10.000044
    155 perfexp-cfa-pbv-ll-share-na,corpus-1-10-1.txt,xxx,1,10.000000,589790000,10.000151
    156 perfexp-cfa-pbv-ll-share-na,corpus-1-100-1.txt,xxx,1,100.000000,587540000,10.000128
    157 perfexp-cfa-pbv-ll-share-na,corpus-1-2-1.txt,xxx,1,2.000000,580790000,10.000102
    158 perfexp-cfa-pbv-ll-share-na,corpus-1-20-1.txt,xxx,1,20.000000,586470000,10.000154
    159 perfexp-cfa-pbv-ll-share-na,corpus-1-200-1.txt,xxx,1,200.000000,587510000,10.000005
    160 perfexp-cfa-pbv-ll-share-na,corpus-1-5-1.txt,xxx,1,5.000000,582120000,10.000163
    161 perfexp-cfa-pbv-ll-share-na,corpus-1-50-1.txt,xxx,1,50.000000,587990000,10.000127
    162 perfexp-cfa-pbv-ll-share-na,corpus-1-500-1.txt,xxx,1,500.000000,587590000,10.000046
    163 perfexp-cfa-pbv-ll-noshare-na,corpus-100-1-1.txt,xxx,100,1.000000,218340000,10.000321
    164 perfexp-cfa-pbv-ll-noshare-na,corpus-100-10-1.txt,xxx,100,9.500000,189550000,10.000174
    165 perfexp-cfa-pbv-ll-noshare-na,corpus-100-100-1.txt,xxx,100,106.370000,169280000,10.000141
    166 perfexp-cfa-pbv-ll-noshare-na,corpus-100-2-1.txt,xxx,100,2.030000,197840000,10.000383
    167 perfexp-cfa-pbv-ll-noshare-na,corpus-100-20-1.txt,xxx,100,22.960000,182700000,10.000041
    168 perfexp-cfa-pbv-ll-noshare-na,corpus-100-200-1.txt,xxx,100,177.280000,157120000,10.000522
    169 perfexp-cfa-pbv-ll-noshare-na,corpus-100-5-1.txt,xxx,100,5.270000,155160000,10.000322
    170 perfexp-cfa-pbv-ll-noshare-na,corpus-100-50-1.txt,xxx,100,43.320000,179110000,10.000218
    171 perfexp-cfa-pbv-ll-noshare-na,corpus-100-500-1.txt,xxx,100,557.260000,113620000,10.000140
    172 perfexp-cfa-pbv-ll-noshare-na,corpus-1-1-1.txt,xxx,1,1.000000,216270000,10.000367
    173 perfexp-cfa-pbv-ll-noshare-na,corpus-1-10-1.txt,xxx,1,10.000000,214390000,10.000157
    174 perfexp-cfa-pbv-ll-noshare-na,corpus-1-100-1.txt,xxx,1,100.000000,165440000,10.000095
    175 perfexp-cfa-pbv-ll-noshare-na,corpus-1-2-1.txt,xxx,1,2.000000,217150000,10.000044
    176 perfexp-cfa-pbv-ll-noshare-na,corpus-1-20-1.txt,xxx,1,20.000000,216760000,10.000321
    177 perfexp-cfa-pbv-ll-noshare-na,corpus-1-200-1.txt,xxx,1,200.000000,176930000,10.000100
    178 perfexp-cfa-pbv-ll-noshare-na,corpus-1-5-1.txt,xxx,1,5.000000,200840000,10.000229
    179 perfexp-cfa-pbv-ll-noshare-na,corpus-1-50-1.txt,xxx,1,50.000000,212960000,10.000273
    180 perfexp-cfa-pbv-ll-noshare-na,corpus-1-500-1.txt,xxx,1,500.000000,163340000,10.000196
    181 perfexp-stl-pta-na-na-reuse,corpus-100-1-1.txt,100,100,1.000000,151210000,10.000032
    182 perfexp-stl-pta-na-na-reuse,corpus-100-10-1.txt,100,100,9.500000,92400000,10.000662
    183 perfexp-stl-pta-na-na-reuse,corpus-100-100-1.txt,100,100,106.370000,16700000,10.003595
    184 perfexp-stl-pta-na-na-reuse,corpus-100-2-1.txt,100,100,2.030000,132700000,10.000666
    185 perfexp-stl-pta-na-na-reuse,corpus-100-20-1.txt,100,100,22.960000,61670000,10.001135
    186 perfexp-stl-pta-na-na-reuse,corpus-100-200-1.txt,100,100,177.280000,8950000,10.005903
    187 perfexp-stl-pta-na-na-reuse,corpus-100-5-1.txt,100,100,5.270000,105760000,10.000126
    188 perfexp-stl-pta-na-na-reuse,corpus-100-50-1.txt,100,100,43.320000,38020000,10.001290
    189 perfexp-stl-pta-na-na-reuse,corpus-100-500-1.txt,100,100,557.260000,3080000,10.009583
    190 perfexp-stl-pta-na-na-reuse,corpus-1-1-1.txt,100,1,1.000000,150070000,10.000505
    191 perfexp-stl-pta-na-na-reuse,corpus-1-10-1.txt,100,1,10.000000,96240000,10.000747
    192 perfexp-stl-pta-na-na-reuse,corpus-1-100-1.txt,100,1,100.000000,17380000,10.005677
    193 perfexp-stl-pta-na-na-reuse,corpus-1-2-1.txt,100,1,2.000000,136340000,10.000556
    194 perfexp-stl-pta-na-na-reuse,corpus-1-20-1.txt,100,1,20.000000,69290000,10.000979
    195 perfexp-stl-pta-na-na-reuse,corpus-1-200-1.txt,100,1,200.000000,9140000,10.005445
    196 perfexp-stl-pta-na-na-reuse,corpus-1-5-1.txt,100,1,5.000000,114030000,10.000605
    197 perfexp-stl-pta-na-na-reuse,corpus-1-50-1.txt,100,1,50.000000,33470000,10.000871
    198 perfexp-stl-pta-na-na-reuse,corpus-1-500-1.txt,100,1,500.000000,3760000,10.021431
    199 perfexp-stl-pta-na-na-fresh,corpus-100-1-1.txt,100,100,1.000000,151890000,10.000693
    200 perfexp-stl-pta-na-na-fresh,corpus-100-10-1.txt,100,100,9.500000,97910000,10.000289
    201 perfexp-stl-pta-na-na-fresh,corpus-100-100-1.txt,100,100,106.370000,16740000,10.000756
    202 perfexp-stl-pta-na-na-fresh,corpus-100-2-1.txt,100,100,2.030000,134890000,10.000666
    203 perfexp-stl-pta-na-na-fresh,corpus-100-20-1.txt,100,100,22.960000,61040000,10.000514
    204 perfexp-stl-pta-na-na-fresh,corpus-100-200-1.txt,100,100,177.280000,8950000,10.004888
    205 perfexp-stl-pta-na-na-fresh,corpus-100-5-1.txt,100,100,5.270000,101780000,10.000043
    206 perfexp-stl-pta-na-na-fresh,corpus-100-50-1.txt,100,100,43.320000,38440000,10.000510
    207 perfexp-stl-pta-na-na-fresh,corpus-100-500-1.txt,100,100,557.260000,3060000,10.007733
    208 perfexp-stl-pta-na-na-fresh,corpus-1-1-1.txt,100,1,1.000000,149360000,10.000168
    209 perfexp-stl-pta-na-na-fresh,corpus-1-10-1.txt,100,1,10.000000,98400000,10.000118
    210 perfexp-stl-pta-na-na-fresh,corpus-1-100-1.txt,100,1,100.000000,17440000,10.004379
    211 perfexp-stl-pta-na-na-fresh,corpus-1-2-1.txt,100,1,2.000000,130340000,10.000520
    212 perfexp-stl-pta-na-na-fresh,corpus-1-20-1.txt,100,1,20.000000,69280000,10.001377
    213 perfexp-stl-pta-na-na-fresh,corpus-1-200-1.txt,100,1,200.000000,9070000,10.004963
    214 perfexp-stl-pta-na-na-fresh,corpus-1-5-1.txt,100,1,5.000000,114390000,10.000315
    215 perfexp-stl-pta-na-na-fresh,corpus-1-50-1.txt,100,1,50.000000,34350000,10.001033
    216 perfexp-stl-pta-na-na-fresh,corpus-1-500-1.txt,100,1,500.000000,3720000,10.009015
    217 perfexp-stl-peq-na-na-reuse,corpus-100-1-1.txt,100,100,1.000000,867730000,10.000040
    218 perfexp-stl-peq-na-na-reuse,corpus-100-10-1.txt,100,100,9.500000,470370000,10.000155
    219 perfexp-stl-peq-na-na-reuse,corpus-100-100-1.txt,100,100,106.370000,287440000,10.000190
    220 perfexp-stl-peq-na-na-reuse,corpus-100-2-1.txt,100,100,2.030000,667180000,10.000145
    221 perfexp-stl-peq-na-na-reuse,corpus-100-20-1.txt,100,100,22.960000,430260000,10.000102
    222 perfexp-stl-peq-na-na-reuse,corpus-100-200-1.txt,100,100,177.280000,232720000,10.000418
    223 perfexp-stl-peq-na-na-reuse,corpus-100-5-1.txt,100,100,5.270000,515130000,10.000118
    224 perfexp-stl-peq-na-na-reuse,corpus-100-50-1.txt,100,100,43.320000,401280000,10.000122
    225 perfexp-stl-peq-na-na-reuse,corpus-100-500-1.txt,100,100,557.260000,135350000,10.000692
    226 perfexp-stl-peq-na-na-reuse,corpus-1-1-1.txt,100,1,1.000000,847560000,10.000010
    227 perfexp-stl-peq-na-na-reuse,corpus-1-10-1.txt,100,1,10.000000,641250000,10.000095
    228 perfexp-stl-peq-na-na-reuse,corpus-1-100-1.txt,100,1,100.000000,300130000,10.000199
    229 perfexp-stl-peq-na-na-reuse,corpus-1-2-1.txt,100,1,2.000000,680950000,10.000050
    230 perfexp-stl-peq-na-na-reuse,corpus-1-20-1.txt,100,1,20.000000,515190000,10.000051
    231 perfexp-stl-peq-na-na-reuse,corpus-1-200-1.txt,100,1,200.000000,271800000,10.000194
    232 perfexp-stl-peq-na-na-reuse,corpus-1-5-1.txt,100,1,5.000000,611640000,10.000133
    233 perfexp-stl-peq-na-na-reuse,corpus-1-50-1.txt,100,1,50.000000,483780000,10.000084
    234 perfexp-stl-peq-na-na-reuse,corpus-1-500-1.txt,100,1,500.000000,191470000,10.000243
    235 perfexp-stl-peq-na-na-fresh,corpus-100-1-1.txt,100,100,1.000000,779650000,10.000085
    236 perfexp-stl-peq-na-na-fresh,corpus-100-10-1.txt,100,100,9.500000,419300000,10.000184
    237 perfexp-stl-peq-na-na-fresh,corpus-100-100-1.txt,100,100,106.370000,224270000,10.000410
    238 perfexp-stl-peq-na-na-fresh,corpus-100-2-1.txt,100,100,2.030000,545330000,10.000073
    239 perfexp-stl-peq-na-na-fresh,corpus-100-20-1.txt,100,100,22.960000,385000000,10.000210
    240 perfexp-stl-peq-na-na-fresh,corpus-100-200-1.txt,100,100,177.280000,174520000,10.000360
    241 perfexp-stl-peq-na-na-fresh,corpus-100-5-1.txt,100,100,5.270000,443460000,10.000165
    242 perfexp-stl-peq-na-na-fresh,corpus-100-50-1.txt,100,100,43.320000,310460000,10.000174
    243 perfexp-stl-peq-na-na-fresh,corpus-100-500-1.txt,100,100,557.260000,92820000,10.000352
    244 perfexp-stl-peq-na-na-fresh,corpus-1-1-1.txt,100,1,1.000000,774230000,10.000110
    245 perfexp-stl-peq-na-na-fresh,corpus-1-10-1.txt,100,1,10.000000,554850000,10.000064
    246 perfexp-stl-peq-na-na-fresh,corpus-1-100-1.txt,100,1,100.000000,227540000,10.000041
    247 perfexp-stl-peq-na-na-fresh,corpus-1-2-1.txt,100,1,2.000000,616830000,10.000134
    248 perfexp-stl-peq-na-na-fresh,corpus-1-20-1.txt,100,1,20.000000,436800000,10.000038
    249 perfexp-stl-peq-na-na-fresh,corpus-1-200-1.txt,100,1,200.000000,185050000,10.000439
    250 perfexp-stl-peq-na-na-fresh,corpus-1-5-1.txt,100,1,5.000000,569030000,10.000125
    251 perfexp-stl-peq-na-na-fresh,corpus-1-50-1.txt,100,1,50.000000,387710000,10.000249
    252 perfexp-stl-peq-na-na-fresh,corpus-1-500-1.txt,100,1,500.000000,113890000,10.000075
    253 perfexp-stl-pbv-na-na-na,corpus-100-1-1.txt,xxx,100,1.000000,1267570000,10.000072
    254 perfexp-stl-pbv-na-na-na,corpus-100-10-1.txt,xxx,100,9.500000,476260000,10.000192
    255 perfexp-stl-pbv-na-na-na,corpus-100-100-1.txt,xxx,100,106.370000,271870000,10.000171
    256 perfexp-stl-pbv-na-na-na,corpus-100-2-1.txt,xxx,100,2.030000,807830000,10.000110
    257 perfexp-stl-pbv-na-na-na,corpus-100-20-1.txt,xxx,100,22.960000,373160000,10.000221
    258 perfexp-stl-pbv-na-na-na,corpus-100-200-1.txt,xxx,100,177.280000,233700000,10.000081
    259 perfexp-stl-pbv-na-na-na,corpus-100-5-1.txt,xxx,100,5.270000,536240000,10.000165
    260 perfexp-stl-pbv-na-na-na,corpus-100-50-1.txt,xxx,100,43.320000,297400000,10.000317
    261 perfexp-stl-pbv-na-na-na,corpus-100-500-1.txt,xxx,100,557.260000,159500000,10.000290
    262 perfexp-stl-pbv-na-na-na,corpus-1-1-1.txt,xxx,1,1.000000,1089370000,10.000024
    263 perfexp-stl-pbv-na-na-na,corpus-1-10-1.txt,xxx,1,10.000000,722490000,10.000040
    264 perfexp-stl-pbv-na-na-na,corpus-1-100-1.txt,xxx,1,100.000000,311250000,10.000116
    265 perfexp-stl-pbv-na-na-na,corpus-1-2-1.txt,xxx,1,2.000000,747630000,10.000103
    266 perfexp-stl-pbv-na-na-na,corpus-1-20-1.txt,xxx,1,20.000000,348820000,10.000149
    267 perfexp-stl-pbv-na-na-na,corpus-1-200-1.txt,xxx,1,200.000000,302220000,10.000223
    268 perfexp-stl-pbv-na-na-na,corpus-1-5-1.txt,xxx,1,5.000000,725430000,10.000110
    269 perfexp-stl-pbv-na-na-na,corpus-1-50-1.txt,xxx,1,50.000000,335730000,10.000280
    270 perfexp-stl-pbv-na-na-na,corpus-1-500-1.txt,xxx,1,500.000000,258380000,10.000052
     1perfexp-cfa-pta-ll-share-reuse,corpus-100-1-1.txt,100,100,1.000000,219460000,10.000260
     2perfexp-cfa-pta-ll-share-reuse,corpus-100-10-1.txt,100,100,9.500000,180250000,10.000486
     3perfexp-cfa-pta-ll-share-reuse,corpus-100-100-1.txt,100,100,106.370000,152790000,10.000441
     4perfexp-cfa-pta-ll-share-reuse,corpus-100-2-1.txt,100,100,2.030000,206090000,10.000311
     5perfexp-cfa-pta-ll-share-reuse,corpus-100-20-1.txt,100,100,22.960000,184330000,10.000328
     6perfexp-cfa-pta-ll-share-reuse,corpus-100-200-1.txt,100,100,177.280000,125090000,10.000138
     7perfexp-cfa-pta-ll-share-reuse,corpus-100-5-1.txt,100,100,5.270000,199130000,10.000180
     8perfexp-cfa-pta-ll-share-reuse,corpus-100-50-1.txt,100,100,43.320000,167720000,10.000327
     9perfexp-cfa-pta-ll-share-reuse,corpus-100-500-1.txt,100,100,557.260000,93560000,10.001058
     10perfexp-cfa-pta-ll-share-fresh,corpus-100-1-1.txt,100,100,1.000000,225090000,10.000393
     11perfexp-cfa-pta-ll-share-fresh,corpus-100-10-1.txt,100,100,9.500000,196300000,10.000221
     12perfexp-cfa-pta-ll-share-fresh,corpus-100-100-1.txt,100,100,106.370000,150670000,10.000337
     13perfexp-cfa-pta-ll-share-fresh,corpus-100-2-1.txt,100,100,2.030000,206600000,10.000182
     14perfexp-cfa-pta-ll-share-fresh,corpus-100-20-1.txt,100,100,22.960000,188400000,10.000199
     15perfexp-cfa-pta-ll-share-fresh,corpus-100-200-1.txt,100,100,177.280000,125880000,10.000489
     16perfexp-cfa-pta-ll-share-fresh,corpus-100-5-1.txt,100,100,5.270000,185930000,10.000231
     17perfexp-cfa-pta-ll-share-fresh,corpus-100-50-1.txt,100,100,43.320000,170660000,10.000491
     18perfexp-cfa-pta-ll-share-fresh,corpus-100-500-1.txt,100,100,557.260000,91520000,10.000640
     19perfexp-cfa-pta-ll-noshare-reuse,corpus-100-1-1.txt,100,100,1.000000,146200000,10.000520
     20perfexp-cfa-pta-ll-noshare-reuse,corpus-100-10-1.txt,100,100,9.500000,114140000,10.000734
     21perfexp-cfa-pta-ll-noshare-reuse,corpus-100-100-1.txt,100,100,106.370000,17630000,10.000889
     22perfexp-cfa-pta-ll-noshare-reuse,corpus-100-2-1.txt,100,100,2.030000,139700000,10.000460
     23perfexp-cfa-pta-ll-noshare-reuse,corpus-100-20-1.txt,100,100,22.960000,71910000,10.000768
     24perfexp-cfa-pta-ll-noshare-reuse,corpus-100-200-1.txt,100,100,177.280000,8540000,10.009186
     25perfexp-cfa-pta-ll-noshare-reuse,corpus-100-5-1.txt,100,100,5.270000,129810000,10.000379
     26perfexp-cfa-pta-ll-noshare-reuse,corpus-100-50-1.txt,100,100,43.320000,45280000,10.000006
     27perfexp-cfa-pta-ll-noshare-reuse,corpus-100-500-1.txt,100,100,557.260000,3300000,10.021088
     28perfexp-cfa-pta-ll-noshare-fresh,corpus-100-1-1.txt,100,100,1.000000,146050000,10.000551
     29perfexp-cfa-pta-ll-noshare-fresh,corpus-100-10-1.txt,100,100,9.500000,102800000,10.000490
     30perfexp-cfa-pta-ll-noshare-fresh,corpus-100-100-1.txt,100,100,106.370000,17060000,10.001677
     31perfexp-cfa-pta-ll-noshare-fresh,corpus-100-2-1.txt,100,100,2.030000,137470000,10.000361
     32perfexp-cfa-pta-ll-noshare-fresh,corpus-100-20-1.txt,100,100,22.960000,69520000,10.001142
     33perfexp-cfa-pta-ll-noshare-fresh,corpus-100-200-1.txt,100,100,177.280000,8830000,10.010528
     34perfexp-cfa-pta-ll-noshare-fresh,corpus-100-5-1.txt,100,100,5.270000,117120000,10.000681
     35perfexp-cfa-pta-ll-noshare-fresh,corpus-100-50-1.txt,100,100,43.320000,42960000,10.001950
     36perfexp-cfa-pta-ll-noshare-fresh,corpus-100-500-1.txt,100,100,557.260000,3220000,10.010203
     37perfexp-cfa-peq-ll-share-reuse,corpus-100-1-1.txt,100,100,1.000000,583560000,10.000070
     38perfexp-cfa-peq-ll-share-reuse,corpus-100-10-1.txt,100,100,9.500000,451400000,10.000013
     39perfexp-cfa-peq-ll-share-reuse,corpus-100-100-1.txt,100,100,106.370000,253260000,10.000275
     40perfexp-cfa-peq-ll-share-reuse,corpus-100-2-1.txt,100,100,2.030000,483580000,10.000140
     41perfexp-cfa-peq-ll-share-reuse,corpus-100-20-1.txt,100,100,22.960000,396550000,10.000060
     42perfexp-cfa-peq-ll-share-reuse,corpus-100-200-1.txt,100,100,177.280000,199760000,10.000416
     43perfexp-cfa-peq-ll-share-reuse,corpus-100-5-1.txt,100,100,5.270000,454790000,10.000069
     44perfexp-cfa-peq-ll-share-reuse,corpus-100-50-1.txt,100,100,43.320000,339690000,10.000243
     45perfexp-cfa-peq-ll-share-reuse,corpus-100-500-1.txt,100,100,557.260000,123840000,10.000724
     46perfexp-cfa-peq-ll-share-fresh,corpus-100-1-1.txt,100,100,1.000000,577650000,10.000157
     47perfexp-cfa-peq-ll-share-fresh,corpus-100-10-1.txt,100,100,9.500000,445260000,10.000186
     48perfexp-cfa-peq-ll-share-fresh,corpus-100-100-1.txt,100,100,106.370000,259650000,10.000273
     49perfexp-cfa-peq-ll-share-fresh,corpus-100-2-1.txt,100,100,2.030000,485650000,10.000026
     50perfexp-cfa-peq-ll-share-fresh,corpus-100-20-1.txt,100,100,22.960000,386150000,10.000120
     51perfexp-cfa-peq-ll-share-fresh,corpus-100-200-1.txt,100,100,177.280000,197690000,10.000077
     52perfexp-cfa-peq-ll-share-fresh,corpus-100-5-1.txt,100,100,5.270000,443650000,10.000006
     53perfexp-cfa-peq-ll-share-fresh,corpus-100-50-1.txt,100,100,43.320000,339190000,10.000037
     54perfexp-cfa-peq-ll-share-fresh,corpus-100-500-1.txt,100,100,557.260000,122740000,10.000753
     55perfexp-cfa-peq-ll-noshare-reuse,corpus-100-1-1.txt,100,100,1.000000,595700000,10.000119
     56perfexp-cfa-peq-ll-noshare-reuse,corpus-100-10-1.txt,100,100,9.500000,452000000,10.000055
     57perfexp-cfa-peq-ll-noshare-reuse,corpus-100-100-1.txt,100,100,106.370000,280570000,10.000281
     58perfexp-cfa-peq-ll-noshare-reuse,corpus-100-2-1.txt,100,100,2.030000,501040000,10.000073
     59perfexp-cfa-peq-ll-noshare-reuse,corpus-100-20-1.txt,100,100,22.960000,422280000,10.000131
     60perfexp-cfa-peq-ll-noshare-reuse,corpus-100-200-1.txt,100,100,177.280000,235640000,10.000126
     61perfexp-cfa-peq-ll-noshare-reuse,corpus-100-5-1.txt,100,100,5.270000,461250000,10.000197
     62perfexp-cfa-peq-ll-noshare-reuse,corpus-100-50-1.txt,100,100,43.320000,369020000,10.000057
     63perfexp-cfa-peq-ll-noshare-reuse,corpus-100-500-1.txt,100,100,557.260000,135050000,10.000682
     64perfexp-cfa-peq-ll-noshare-fresh,corpus-100-1-1.txt,100,100,1.000000,529900000,10.000150
     65perfexp-cfa-peq-ll-noshare-fresh,corpus-100-10-1.txt,100,100,9.500000,408530000,10.000108
     66perfexp-cfa-peq-ll-noshare-fresh,corpus-100-100-1.txt,100,100,106.370000,217530000,10.000334
     67perfexp-cfa-peq-ll-noshare-fresh,corpus-100-2-1.txt,100,100,2.030000,463860000,10.000166
     68perfexp-cfa-peq-ll-noshare-fresh,corpus-100-20-1.txt,100,100,22.960000,360110000,10.000008
     69perfexp-cfa-peq-ll-noshare-fresh,corpus-100-200-1.txt,100,100,177.280000,176490000,10.000131
     70perfexp-cfa-peq-ll-noshare-fresh,corpus-100-5-1.txt,100,100,5.270000,424710000,10.000106
     71perfexp-cfa-peq-ll-noshare-fresh,corpus-100-50-1.txt,100,100,43.320000,290930000,10.000172
     72perfexp-cfa-peq-ll-noshare-fresh,corpus-100-500-1.txt,100,100,557.260000,90430000,10.000065
     73perfexp-cfa-pbv-ll-share-na,corpus-100-1-1.txt,xxx,100,1.000000,578040000,10.000159
     74perfexp-cfa-pbv-ll-share-na,corpus-100-10-1.txt,xxx,100,9.500000,573200000,10.000098
     75perfexp-cfa-pbv-ll-share-na,corpus-100-100-1.txt,xxx,100,106.370000,575160000,10.000149
     76perfexp-cfa-pbv-ll-share-na,corpus-100-2-1.txt,xxx,100,2.030000,573780000,10.000134
     77perfexp-cfa-pbv-ll-share-na,corpus-100-20-1.txt,xxx,100,22.960000,574500000,10.000156
     78perfexp-cfa-pbv-ll-share-na,corpus-100-200-1.txt,xxx,100,177.280000,577170000,10.000125
     79perfexp-cfa-pbv-ll-share-na,corpus-100-5-1.txt,xxx,100,5.270000,577820000,10.000046
     80perfexp-cfa-pbv-ll-share-na,corpus-100-50-1.txt,xxx,100,43.320000,578770000,10.000033
     81perfexp-cfa-pbv-ll-share-na,corpus-100-500-1.txt,xxx,100,557.260000,579540000,10.000128
     82perfexp-cfa-pbv-ll-noshare-na,corpus-100-1-1.txt,xxx,100,1.000000,191420000,10.000232
     83perfexp-cfa-pbv-ll-noshare-na,corpus-100-10-1.txt,xxx,100,9.500000,186330000,10.000046
     84perfexp-cfa-pbv-ll-noshare-na,corpus-100-100-1.txt,xxx,100,106.370000,164610000,10.000463
     85perfexp-cfa-pbv-ll-noshare-na,corpus-100-2-1.txt,xxx,100,2.030000,182390000,10.000409
     86perfexp-cfa-pbv-ll-noshare-na,corpus-100-20-1.txt,xxx,100,22.960000,182280000,10.000252
     87perfexp-cfa-pbv-ll-noshare-na,corpus-100-200-1.txt,xxx,100,177.280000,149840000,10.000281
     88perfexp-cfa-pbv-ll-noshare-na,corpus-100-5-1.txt,xxx,100,5.270000,152370000,10.000284
     89perfexp-cfa-pbv-ll-noshare-na,corpus-100-50-1.txt,xxx,100,43.320000,177430000,10.000397
     90perfexp-cfa-pbv-ll-noshare-na,corpus-100-500-1.txt,xxx,100,557.260000,113440000,10.000150
     91perfexp-stl-pta-na-na-reuse,corpus-100-1-1.txt,100,100,1.000000,152870000,10.000280
     92perfexp-stl-pta-na-na-reuse,corpus-100-10-1.txt,100,100,9.500000,98530000,10.000299
     93perfexp-stl-pta-na-na-reuse,corpus-100-100-1.txt,100,100,106.370000,16690000,10.005783
     94perfexp-stl-pta-na-na-reuse,corpus-100-2-1.txt,100,100,2.030000,136230000,10.000196
     95perfexp-stl-pta-na-na-reuse,corpus-100-20-1.txt,100,100,22.960000,62110000,10.001423
     96perfexp-stl-pta-na-na-reuse,corpus-100-200-1.txt,100,100,177.280000,8960000,10.005548
     97perfexp-stl-pta-na-na-reuse,corpus-100-5-1.txt,100,100,5.270000,104790000,10.000889
     98perfexp-stl-pta-na-na-reuse,corpus-100-50-1.txt,100,100,43.320000,39170000,10.000011
     99perfexp-stl-pta-na-na-reuse,corpus-100-500-1.txt,100,100,557.260000,3100000,10.015093
     100perfexp-stl-pta-na-na-fresh,corpus-100-1-1.txt,100,100,1.000000,154450000,10.000054
     101perfexp-stl-pta-na-na-fresh,corpus-100-10-1.txt,100,100,9.500000,96570000,10.000834
     102perfexp-stl-pta-na-na-fresh,corpus-100-100-1.txt,100,100,106.370000,16400000,10.000697
     103perfexp-stl-pta-na-na-fresh,corpus-100-2-1.txt,100,100,2.030000,133450000,10.000440
     104perfexp-stl-pta-na-na-fresh,corpus-100-20-1.txt,100,100,22.960000,62540000,10.001476
     105perfexp-stl-pta-na-na-fresh,corpus-100-200-1.txt,100,100,177.280000,8960000,10.006817
     106perfexp-stl-pta-na-na-fresh,corpus-100-5-1.txt,100,100,5.270000,106470000,10.000109
     107perfexp-stl-pta-na-na-fresh,corpus-100-50-1.txt,100,100,43.320000,37460000,10.000100
     108perfexp-stl-pta-na-na-fresh,corpus-100-500-1.txt,100,100,557.260000,3090000,10.000541
     109perfexp-stl-peq-na-na-reuse,corpus-100-1-1.txt,100,100,1.000000,863350000,10.000092
     110perfexp-stl-peq-na-na-reuse,corpus-100-10-1.txt,100,100,9.500000,471070000,10.000189
     111perfexp-stl-peq-na-na-reuse,corpus-100-100-1.txt,100,100,106.370000,287660000,10.000105
     112perfexp-stl-peq-na-na-reuse,corpus-100-2-1.txt,100,100,2.030000,669380000,10.000082
     113perfexp-stl-peq-na-na-reuse,corpus-100-20-1.txt,100,100,22.960000,432290000,10.000131
     114perfexp-stl-peq-na-na-reuse,corpus-100-200-1.txt,100,100,177.280000,241690000,10.000290
     115perfexp-stl-peq-na-na-reuse,corpus-100-5-1.txt,100,100,5.270000,510990000,10.000082
     116perfexp-stl-peq-na-na-reuse,corpus-100-50-1.txt,100,100,43.320000,396380000,10.000235
     117perfexp-stl-peq-na-na-reuse,corpus-100-500-1.txt,100,100,557.260000,135830000,10.000603
     118perfexp-stl-peq-na-na-fresh,corpus-100-1-1.txt,100,100,1.000000,785420000,10.000062
     119perfexp-stl-peq-na-na-fresh,corpus-100-10-1.txt,100,100,9.500000,418030000,10.000094
     120perfexp-stl-peq-na-na-fresh,corpus-100-100-1.txt,100,100,106.370000,225290000,10.000237
     121perfexp-stl-peq-na-na-fresh,corpus-100-2-1.txt,100,100,2.030000,550120000,10.000151
     122perfexp-stl-peq-na-na-fresh,corpus-100-20-1.txt,100,100,22.960000,386080000,10.000206
     123perfexp-stl-peq-na-na-fresh,corpus-100-200-1.txt,100,100,177.280000,176890000,10.000155
     124perfexp-stl-peq-na-na-fresh,corpus-100-5-1.txt,100,100,5.270000,441830000,10.000135
     125perfexp-stl-peq-na-na-fresh,corpus-100-50-1.txt,100,100,43.320000,310200000,10.000299
     126perfexp-stl-peq-na-na-fresh,corpus-100-500-1.txt,100,100,557.260000,90360000,10.000474
     127perfexp-stl-pbv-na-na-na,corpus-100-1-1.txt,xxx,100,1.000000,1267670000,10.000039
     128perfexp-stl-pbv-na-na-na,corpus-100-10-1.txt,xxx,100,9.500000,482210000,10.000013
     129perfexp-stl-pbv-na-na-na,corpus-100-100-1.txt,xxx,100,106.370000,268680000,10.000097
     130perfexp-stl-pbv-na-na-na,corpus-100-2-1.txt,xxx,100,2.030000,806650000,10.000104
     131perfexp-stl-pbv-na-na-na,corpus-100-20-1.txt,xxx,100,22.960000,369490000,10.000159
     132perfexp-stl-pbv-na-na-na,corpus-100-200-1.txt,xxx,100,177.280000,227020000,10.000244
     133perfexp-stl-pbv-na-na-na,corpus-100-5-1.txt,xxx,100,5.270000,534150000,10.000061
     134perfexp-stl-pbv-na-na-na,corpus-100-50-1.txt,xxx,100,43.320000,298950000,10.000190
     135perfexp-stl-pbv-na-na-na,corpus-100-500-1.txt,xxx,100,557.260000,158310000,10.000104
  • doc/theses/mike_brooks_MMath/plots/string-peq-cppemu.gp

    rbd72f517 r7d02d35  
    1313set xtics (1,2,5,10,20,50,100,200,500)
    1414set logscale x
    15 #set logscale y
    16 set yrange [0:115]
     15set logscale y
     16set yrange [10:200]
    1717set xlabel "String Length being appended (mean, geo. dist.), log scale" offset 2,0
    1818set ylabel "Time per append (ns, mean)"
  • doc/theses/mike_brooks_MMath/plots/string-peq-cppemu.py

    rbd72f517 r7d02d35  
    1212import pandas as pd
    1313import numpy as np
    14 import sys
    1514import os
    1615
    17 sys.path.insert(0, os.path.dirname(__file__))
    18 from common import *
     16infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/result-append-pbv.csv'
    1917
    2018prettyFieldNames = {
     
    2523}
    2624
    27 timings = loadParseTimingData('result-append-pbv.csv')
     25timings = pd.read_csv(
     26    infile,
     27    names=['test', 'corpus', 'concatsPerReset', 'corpusItemCount', 'corpusMeanLenChars', 'concatDoneActualCount', 'execTimeActualSec'],
     28    dtype={'test':                  str,
     29           'corpus':                str,
     30           'concatsPerReset':       'Int64', # allows missing; https://stackoverflow.com/a/70626154
     31           'corpusItemCount':       np.int64,
     32           'corpusMeanLenChars':    np.float64,
     33           'concatDoneActualCount': np.int64,
     34           'execTimeActualSec':     np.float64},
     35    na_values=['xxx'],
     36)
     37# print(timings.head())
    2838
    29 # Filter operation=peq, corpus=100-*-1
    3039
    31 timings = timings.groupby('operation').get_group('peq')
    32 timings = timings.groupby('corpus-nstrs').get_group(100)
    33 timings = timings.groupby('corpus-runid').get_group(1)
     40# project: parse executable and corpus names
     41
     42timings[['test-slug',
     43     'sut-platform',
     44     'operation',
     45     'sut-cfa-level',
     46     'sut-cfa-sharing',
     47     'op-alloc']] = timings['test'].str.strip().str.split('-', expand=True)
     48timings['sut'] = timings[['sut-platform',
     49                    'sut-cfa-level',
     50                    'sut-cfa-sharing',
     51                    'op-alloc']].agg('-'.join, axis=1)
     52
     53timings[['corpus-basename',
     54     'corpus-ext']] = timings['corpus'].str.strip().str.split('.', expand=True)
     55timings[['corpus-slug',
     56     'corpus-nstrs',
     57     'corpus-meanlen',
     58     'corpus-runid']] = timings['corpus-basename'].str.strip().str.split('-', expand=True)
     59timings["corpus-nstrs"] = pd.to_numeric(timings["corpus-nstrs"])
     60timings["corpus-meanlen"] = pd.to_numeric(timings["corpus-meanlen"])
     61timings["corpus-runid"] = pd.to_numeric(timings["corpus-runid"])
     62
     63
     64# project: calculate fact
     65
     66timings['op-duration-s'] = timings['execTimeActualSec'] / timings['concatDoneActualCount']
     67timings['op-duration-ns'] = timings['op-duration-s'] * 1000 * 1000 * 1000
     68
     69
     70# Filter operation=peq
     71
     72groupedOp = timings.groupby('operation')
     73tgtOpTimings = groupedOp.get_group('peq')
     74
    3475
    3576# Emit in groups
    3677
    37 groupedSut = timings.groupby('sut')
     78groupedSut = tgtOpTimings.groupby('sut')
    3879
    3980for sut, sgroup in groupedSut:
  • doc/theses/mike_brooks_MMath/programs/bkgd-cfa-arrayinteract.cfa

    rbd72f517 r7d02d35  
    33
    44struct tm { int x; };
    5 forall( T * ) T * alloc();
     5forall (T*) T* alloc();
    66
    77int main () {
  • doc/theses/mike_brooks_MMath/programs/hello-accordion.cfa

    rbd72f517 r7d02d35  
    3131int getPref( @School( C, S ) & school@, int is, int pref ) {
    3232        for ( ic; C ) {
    33                 if ( pref == @school.preferences@[ic][is]; ) return ic; $\C{// offset calculation implicit}$
     33                int curPref = @school.preferences@[ic][is];   $\C{// offset calculation implicit}$
     34                if ( curPref == pref ) return ic;
    3435        }
    3536        assert( false );
    3637}
    37 
    3838
    3939
     
    9191                sout | school.student_ids[is] | ": " | nonl;
    9292                for ( pref; 1 ~= nc ) {
    93                         int ic = getPref( school, is, pref );
     93                        int ic = getPref(school, is, pref);
    9494                        sout | school.course_codes[ ic ] | nonl;
    9595                }
  • doc/theses/mike_brooks_MMath/string.tex

    rbd72f517 r7d02d35  
    1717\begin{cquote}
    1818\begin{tabular}{@{}l|l|l|l@{}}
    19 C @char [ ]@                    &  \CC @string@                 & Java @String@ & \CFA @string@ \\
     19C @char [ ]@                    &  \CC @string@                 & Java @String@     & \CFA @string@     \\
    2020\hline
    21 @strcpy@, @strncpy@             & @=@                                   & @=@                   & @=@   \\
    22 @strcat@, @strncat@             & @+@, @+=@                             & @+@, @+=@             & @+@, @+=@     \\
     21@strcpy@, @strncpy@             & @=@                                   & @=@               & @=@       \\
     22@strcat@, @strncat@             & @+@, @+=@                             & @+@, @+=@         & @+@, @+=@ \\
    2323@strcmp@, @strncmp@             & @==@, @!=@, @<@, @<=@, @>@, @>=@
    24                                                                                                 & @equals@, @compareTo@
    25                                                                                                                                 & @==@, @!=@, @<@, @<=@, @>@, @>=@ \\
    26 @strlen@                                & @length@, @size@              & @length@              & @size@        \\
    27 @[ ]@                                   & @[ ]@                                 & @charAt@              & @[ ]@ \\
    28 @strncpy@                               & @substr@                              & @substring@   & @( )@, on RHS of @=@  \\
    29 @strncpy@                               & @replace@                             & @replace@             & @( )@, on LHS of @=@ \\
    30 @strstr@                                & @find@                                & @indexOf@             & @find@ \\
    31 @strcspn@                               & @find_first_of@               & @matches@             & @include@ \\
    32 @strspn@                                & @find_first_not_of@   & @matches@             & @exclude@ \\
    33 N/A                                             & @c_str@, @data@               & N/A                   & @strcpy@, @strncpy@ \\
     24                                                & @equals@, @compareTo@
     25                                                                                                                                        & @==@, @!=@, @<@, @<=@, @>@, @>=@ \\
     26@strlen@                                & @length@, @size@              & @length@                      & @size@        \\
     27@[ ]@                                   & @[ ]@                                 & @charAt@          & @[ ]@     \\
     28@strncpy@                               & @substr@                              & @substring@       & @( )@, on RHS of @=@      \\
     29@strncpy@                               & @replace@                             & @replace@         & @( )@, on LHS of @=@ \\
     30@strstr@                                & @find@                                & @indexOf@         & @find@ \\
     31@strcspn@                               & @find_first_of@               & @matches@         & @include@ \\
     32@strspn@                                & @find_first_not_of@   & @matches@         & @exclude@ \\
     33n/a                                             & @c_str@, @data@               & n/a               & @strcpy@, @strncpy@ \\
    3434\end{tabular}
    3535\end{cquote}
     
    5353
    5454
    55 \section{\CFA \lstinline{string} Type}
     55\section{\CFA \lstinline{string} type}
    5656\label{s:stringType}
    5757
     
    272272ch = ch + 'b'; $\C[2in]{// LHS disambiguate, add character values}$
    273273s = 'a' + 'b'; $\C{// LHS disambiguate, concatenate characters}$
    274 printf( "%c\n", @'a' + 'b'@ ); $\C{// no LHS information, ambiguous}$
    275 printf( "%c\n", @(return char)@('a' + 'b') ); $\C{// disambiguate with ascription cast}\CRT$
     274printf( "%c\n", @'a' + 'b'@ ); $\C[2in]{// no LHS information, ambiguous}$
     275printf( "%c\n", @(return char)@('a' + 'b') ); $\C{// disambiguate with ascription cast}$
    276276\end{cfa}
    277277The ascription cast, @(return T)@, disambiguates by stating a (LHS) type to use during expression resolution (not a conversion).
     
    319319ch = ch * 3; $\C[2in]{// LHS disambiguate, multiply character values}$
    320320s = 'a' * 3; $\C{// LHS disambiguate, concatenate characters}$
    321 printf( "%c\n", @'a' * 3@ ); $\C{// no LHS information, ambiguous}$
    322 printf( "%c\n", @(return char)@('a' * 3) ); $\C{// disambiguate with ascription cast}\CRT$
     321printf( "%c\n", @'a' * 3@ ); $\C[2in]{// no LHS information, ambiguous}$
     322printf( "%c\n", @(return char)@('a' * 3) ); $\C{// disambiguate with ascription cast}$
    323323\end{cfa}
    324324Fortunately, character multiplication without LHS information is even rarer than addition, so repurposing the operator @*@ for @string@ types is not a problem.
     
    600600&
    601601\begin{cfa}
    602 for () {
     602for ( ;; ) {
    603603        size_t posn = exclude( line, alpha );
    604604  if ( posn == len( line ) ) break;
     
    722722\end{tabular}
    723723\end{cquote}
    724 Input text can be \emph{gulped}, including whitespace, from the current point to an arbitrary delimiter character using @getline@.
     724Input text can be gulped, including whitespace, from the current point to an arbitrary delimiter character using @getline@.
    725725
    726726The \CFA philosophy for input is that, for every constant type in C, these constants should be usable as input.
     
    760760\end{tabular}
    761761\end{cquote}
    762 Note, the ability to read in quoted strings with whitespace to match with program string constants.
     762Note, the ability to read in quoted strings to match with program string constants.
    763763The @nl@ at the end of an input ignores the rest of the line.
    764764
     
    845845                                        & Laxed: The target's type is anything string-like; it may have a different status concerning ownership.
    846846                                                                & Strict: The target's type is the same as the source; both strings are equivalent peers concerning ownership.
    847                                                                                         & N/A           & no    & yes   & yes \\
     847                                                                                        & n/a           & no    & yes   & yes \\
    848848\hline
    849849Referent
     
    863863        The C ``string'' is @char *@, under the conventions of @<string.h>@. Because this type does not manage a text allocation, symmetry does not apply.
    864864\item
    865         The Java @String@ class is analyzed; its @StringBuffer@ class behaves similarly to \CC.
     865        The Java @String@ class is analyzed; its @StringBuffer@ class behaves similarly to @C++@.
    866866\end{itemize}
    867867\caption{Comparison of languages' strings, storage management perspective.}
     
    869869\end{figure}
    870870
    871 In C, these declarations are very different.
     871In C, these declarations give very different things.
    872872\begin{cfa}
    873873char x[$\,$] = "abcde";
    874874char * y = "abcde";
    875875\end{cfa}
    876 Both associate the declared name with the fixed, six contiguous bytes: @{'a', 'b', 'c', 'd', 'e', 0}@.
    877 But @x@ is allocated on the stack (with values filled at the declaration), while @y@ refers to the executable's read-only data-section.
     876Both associate the declared name with fixed-six contiguous bytes, filled as @{'a', 'b', 'c', 'd', 'e', 0}@.
     877But @x@ gets them allocated in the active stack frame (with values filled in as control passes the declaration), while @y@ refers into the executable's read-only data section.
    878878With @x@ representing an allocation, it offers information in @sizeof(x)@ that @y@ does not.
    879 But this extra information is second-class, as it can only be used in the immediate lexical context, \ie it cannot be passed to string operations or user functions.
     879But this extra information is second-class, as it can only be used in the immediate lexical context, \ie it cannot be passed on to string operations or user functions.
    880880Only pointers to text buffers are first-class, and discussed further.
     881
    881882\begin{cfa}
    882883char * s = "abcde";
    883 char * s1 = s;  $\C[2.25in]{// alias state, N/A symmetry, variable-constrained referent}$
    884 char * s2 = &s[1];  $\C{// alias state, N/A symmetry, variable-constrained referent}$
    885 char * s3 = &s2[1];  $\C{// alias state, N/A symmetry, variable-constrained referent}\CRT$
     884char * s1 = s;  $\C{// alias state, n/a symmetry, variable-constrained referent}$
     885char * s2 = &s[1];  $\C{// alias state, n/a symmetry, variable-constrained referent}\CRT$
     886char * s3 = &s2[1];  $\C{// alias state, n/a symmetry, variable-constrained referent}
    886887printf( "%s %s %s %s\n", s, s1, s2, s3 );
    887888$\texttt{\small abcde abcde bcde cde}$
     
    903904string & s5 = s.substr(2,4);  $\C{// error: cannot point to temporary}\CRT$
    904905\end{cfa}
    905 The @s1@ lax symmetry reflects how its validity depends on the lifetime of @s@.
     906The @s1@ lax symmetry reflects how its validity of depends on the lifetime of @s@.
    906907It is common practice in \CC to use the @s1@-style for a by-reference function parameter.
    907908Doing so assumes that the callee only uses the referenced string for the duration of the call, \ie no storing the parameter (as a reference) for later.
    908909So, when the called function is a constructor, its definition typically uses an @s2@-style copy-initialization.
    909910Exceptions to this pattern are possible, but require the programmer to assure safety where the type system does not.
    910 The @s3@ initialization must copy the substring to support a subsequent @c_str@ call, which provides null-termination, generally at a different position than the source string's.
     911The @s3@ initialization must copy the substring because it must support a subsequent @c_str@ call, which provides a null-termination, generally at a different position than the source string's.
    911912@s2@ assignment could be made fast, by reference-counting the text area and using copy-on-write, but would require an implementation upgrade.
    912913
     
    928929With @s2@, the case for fast-copy is more subtle.
    929930Certainly, its value is not pointer-equal to @s@, implying at least a further allocation.
    930 But because Java is \emph{not} constrained to use a null-terminated representation, a standard-library implementation is free to refer to the source characters in-place.
     931But because Java is not constrained to use a null-terminated representation, a standard-library implementation is free to refer to the source characters in-place.
    931932Java does not meet the aliasing requirement because immutability makes it impossible to modify.
    932933Java's @StringBuffer@ provides aliasing (see @replace@ example on \VPageref{p:JavaReplace}), though without supporting symmetric treatment of a fragment referent, \eg @substring@ of a @StringBuffer@ is a @String@;
     
    959960
    960961
    961 \subsection{Logical Overlap}
     962
     963\subsection{Logical overlap}
    962964
    963965It may be unfamiliar to combine \VRef[Figure]{f:StrSemanticCompare}'s alias state and fragment referent in one API, or at the same time.
    964966This section shows the capability in action.
    965967
    966 \begin{comment}
    967 The metaphor of a GUI text-editor is used to illustrate combining these features.
    968 Most editors allow selecting a consecutive block of text (highlighted) to define an aliased substring within a document.
    969 Typing in this area overwrites the prior text, replacing the selected text with less, same, or more text.
    970 Importantly, the document also changes size, not just the selection.
    971 %This alias model is assigning to an aliased substring for two strings overlapping by total containment: one is the selected string, the other is the document.
    972 Extend the metaphor to two selected areas, where one area can be drag-and-dropped into another, changing the text in the drop area and correspondingly changing the document.
    973 When the selected areas are indenpendent, the semantics of the drag-and-drop are straightforward.
    974 However, for overlapping selections, either partial or full, there are multiple useful semantics.
    975 For example, two areas overlap at the top, or bottom, or a block at a corner, where one areas is dropped into the other.
    976 For selecting a smaller area within a larger, and dropping the smaller area into the larger to replace it.
    977 In both cases, meaningful semantics must be constructed or the operation precluded.
    978 However, without this advanced capability, certain operations become multi-step, possible requiring explicit temporaries.
    979 \end{comment}
    980 
    981 A GUI text-editor provides a metaphor.
    982 Selecting a block of text using the mouse defines an aliased substring within a document.
    983 Typing in this area overwrites what was there, replacing the originally selected text with more or less text.
    984 But the \emph{containing document} also grows or shrinks, not just the selection.
    985 This action models assigning to an aliased substring when one string is completely contained in the other.
    986 
    987 Extend the metaphor to a multi-user editor.
    988 If Alice selects a range of text at the bottom, while Bob is rewriting a paragraph at the top, Alice's selection holds onto the characters initially selected, unaffected by Bob making the document grow/shrink even though Alice's start index in the document is changing.
    989 This action models assigning to an aliased substring when the two strings do not overlap.
    990 
    991 Logically, Alice's and Bob's actions on the whole document are like two single-user-edit cases, giving the semantics of the individual edits flowing into a whole.
    992 But, there is no need to have two separate document strings.
    993 Even if a third selection removes all the text, both Alice's and Bob's strings remain.
    994 The independence of their selections assumes that the editor API does not allow the selection to be enlarged, \ie adding text from the containing environment, which may have disappeared.
    995 
    996 This leaves the ``Venn-diagram overlap'' cases, where Alice's and Bob's selections overlap at the top, bottom, or corner.
    997 In this case, the selection areas are dependent, and so, changes in content and size in one may have an affect in the other.
    998 There are multiple possible semantics for this case.
    999 The remainder of this section shows the chosen semantics for all of the cases.
    1000 
    1001 String sharing is expressed using the @`share@ marker to indicate aliasing (mutations shared) \vs snapshot (not quite an immutable result, but one with subsequent mutations isolated).
     968In summary, the metaphor of a GUI text editor is intended.
     969Selecting a consecutive block of text using the mouse defines an aliased substring within the file.
     970Typing in this state overwrites what was there before, replacing the originally selected text with more or less text.
     971But the \emph{whole file} grows or shrinks as a result, not just the selection.
     972This action models assigning to an aliased substring when the two strings overlap by total containment: one string is the selection, the other is the whole file.
     973
     974Now extend the metaphor to a multi-user online editor.
     975If Alice selects a range of text at the bottom of the file, wile Bob is rewriting a paragraph at the top, Alice's selection holds onto the logical characters initially selected, unaffected by Bob making the total file grow/shrink, and unaffectd by Bob causing the start index of Alice's selction to vary.
     976This action models assigning to an aliased substring when the two strings do not overlap at all: one string is Alice's selection, the other is Bob's.
     977
     978If a third office worker were also watching Alice's and Bob's actions on the whole file (a string with ``all the text'' is kept around), then two further single-user-edit cases give the semantics of the individual edits flowing into the whole.
     979But, departing from the document analogy, it is not necessary to keep a such a third string:
     980no one has to resource-manage ``the document.''
     981When an original string, from which both the Alice- and Bob-parts came, ceases to exist, Alice and Bob are left with two independent strings.
     982They are independent because Alice and Bob have no API for growing the bounds of a string to subsume text that may once have been around it.
     983
     984Edge cases, notably ``Venn-diagram overlap,'' had to have handlings chosen.
     985The intent in fleshing out these details was to achieve the above story, with a single API, while keeping the rest as simple as possible.
     986The remainder of this section shows the resulting decisions, played out at the API level.
     987
     988\CFA uses the marker @`share@ as a dynamic mechanism to indicate alias (mutations shared) \vs snapshot (not quite an immutable result, but one with subsequent mutations isolated).
    1002989This aliasing relationship is a sticky property established at initialization.
    1003990For example, here strings @s1@ and @s1a@ are in an aliasing relationship, while @s2@ is in a copy relationship.
    1004991\input{sharing1.tex}
    1005992Here, the aliasing (@`share@) causes partial changes (subscripting) to flow in both directions.
    1006 (In the following examples, note how @s1@ and @s1a@ change together, and @s2@ is independent.)
     993(In the following examples, watch how @s1@ and @s1a@ change together, and @s2@ is independent.)
    1007994\input{sharing2.tex}
    1008995Similarly for complete changes.
     
    1012999\input{sharing4.tex}
    10131000
    1014 Now, consider string @s1_mid@ being an alias in the middle of @s1@, along with @s2@, made by a copy from the middle of @s1@.
     1001Now, consider string @s1_mid@ being an alias in the middle of @s1@, along with @s2@, made by a simple copy from the middle of @s1@.
    10151002\input{sharing5.tex}
    10161003Again, @`share@ passes changes in both directions; copy does not.
     
    10331020When @s1_bgn@'s size increases by 3, @s1_mid@'s starting location moves from 1 to 4 and @s1_end@'s from 3 to 6,
    10341021
    1035 When changes happen on an aliasing substring that overlap.
     1022When changes happens on an aliasing substring that overlap.
    10361023\input{sharing10.tex}
    1037 Strings @s1_crs@ and @s1_mid@ overlap at character 4, @j@, because the substrings are 3,2 and 4,2.
     1024Strings @s1_crs@ and @s1_mid@ overlap at character 4, @j@ because the substrings are 3,2 and 4,2.
    10381025When @s1_crs@'s size increases by 1, @s1_mid@'s starting location moves from 4 to 5, but the overlapping character remains, changing to @'+'@.
    10391026
     
    10921079
    10931080
    1094 \section{Storage Management}
     1081
     1082\section{Storage management}
    10951083
    10961084This section discusses issues related to storage management of strings.
     
    11111099const string s1 = "abc";
    11121100\end{cfa}
    1113 @const@ applies to the @s1@ pointer to @"abc"@, and @"abc"@ is an immutable constant that is \emph{copied} into the string's storage.
    1114 Hence, @s1@ is not pointing at an immutable constant and its underlying string is mutable, unless some other designation is specified, such as Java's global immutable rule.
    1115 
    1116 
    1117 \subsection{General Implementation}
     1101the @const@ applies to the @s1@ pointer to @"abc"@, and @"abc"@ is an immutable constant that is \emph{copied} into the string's storage.
     1102Hence, @s1@ is not pointing at an immutable constant, meaning its underlying string can be mutable, unless some other designation is specified, such as Java's global immutable rule.
     1103
     1104
     1105
     1106\subsection{General implementation}
    11181107\label{string-general-impl}
    11191108
     
    11241113A string is a smart pointer into this buffer.
    11251114
    1126 This cycle of frequent cheap allocations, interspersed with infrequent expensive compactions, has obvious similarities to a general-purpose memory-manager based on garbage collection (GC).
     1115This cycle of frequent cheap allocations, interspersed with infrequent expensive compactions, has obvious similarities to a general-purpose memory manager based on garbage collection (GC).
    11271116A few differences are noteworthy.
    11281117First, in a general purpose manager, the allocated objects may contain pointers to other objects, making the transitive reachability of these objects a crucial property.
    11291118Here, the allocations are text, so one allocation never keeps another alive.
    11301119Second, in a general purpose manager, the handle that keeps an allocation alive is a bare pointer.
    1131 For strings, a fatter representation is acceptable because this pseudo-pointer is only used for entry into the string-heap, not for general data-substructure linking around the general heap.
     1120For strings, a fatter representation is acceptable because this pseudo-pointer is only used for enty into the string-heap, not for general data-sub-structure linking around the general heap.
    11321121
    11331122\begin{figure}
     
    11391128\VRef[Figure]{f:memmgr-basic} shows the representation.
    11401129The heap header and text buffer define a sharing context.
    1141 Normally, one global context is appropriate for an entire program;
    1142 concurrency is discussed in \VRef{s:ControllingImplicitSharing}.
    1143 A string is a handle to a node in a linked list containing a information about a string text in the buffer.
     1130Normally, one global sharing context is appropriate for an entire program;
     1131concurrent exceptions are discussed in \VRef{s:ControllingImplicitSharing}.
     1132A string is a handle into the buffer and node within a linked list.
    11441133The list is doubly linked for $O(1)$ insertion and removal at any location.
    11451134Strings are ordered in the list by text start address.
    1146 The heap header maintains a next-allocation pointer, @alloc@, pointing to the last live allocation in the buffer.
     1135The header maintains a next-allocation pointer, @alloc@, pointing to the last live allocation in the buffer.
    11471136No external references point into the buffer and the management procedure relocates the text allocations as needed.
    11481137A string handle references a containing string, while its string is contiguous and not null terminated.
     
    11501139String handles can be allocated in the stack or heap, and represent the string variables in a program.
    11511140Normal C life-time rules apply to guarantee correctness of the string linked-list.
    1152 The text buffer is large enough with good management so that often only one dynamic allocation is necessary during program execution, but not so large as to cause program bloat.
     1141The text buffer is large enough with good management so that often only one dynamic allocation is necessary during program execution.
    11531142% During this period, strings can vary in size dynamically.
    11541143
    11551144When the text buffer fills, \ie the next new string allocation causes @alloc@ to point beyond the end of the buffer, the strings are compacted.
    11561145The linked handles define all live strings in the buffer, which indirectly defines the allocated and free space in the buffer.
    1157 The string handles are maintained in sorted order, so the handle list can be traversed, copying the first live text to the start of the buffer, and subsequent strings after each other.
    1158 After compaction, if free storage is still be less than the new string allocation, a larger text buffer is heap-allocated, the current buffer is copied into the new buffer, and the original buffer is freed.
     1146Since the string handles are in sorted order, the handle list can be traversed, copying the first live text to the start of the buffer, and subsequent strings after each other.
     1147If, upon compaction, the amount of free storage would still be less than the new string allocation, a larger text buffer is heap-allocated, the current buffer is copied into the new buffer, and the original buffer is freed.
    11591148Note, the list of string handles is structurally unaffected during a compaction;
    11601149only the text pointers in the handles are modified to new buffer locations.
     
    11681157Both string initialization styles preserve the string module's internal invariant that the linked-list order matches the buffer order.
    11691158For string destruction, handles are removed from the list.
    1170 Once the last handle using a run of buffer characters is destroyed, that buffer space is excluded from use until the next compaction.
    1171 
    1172 Certain string operations result in a substring of another string.
    1173 The resulting handle is then placed in the correct sorted position in the list, possible requiring a short linear search to locate the position.
     1159As a result, once a last handle using a run of buffer characters is destroyed, that buffer space gets excluded from the next compaction, making its character-count available in the compacted buffer.
     1160
     1161Certain string operations can result in a substring of another string.
     1162The resulting handle is then placed in the correct sorted position in the list, possible with a short linear search to locate the position.
    11741163For string operations resulting in a new string, that string is allocated at the end of the buffer.
    11751164For shared-edit strings, handles that originally referenced containing locations need to see the new value at the new buffer location.
     
    11861175
    11871176
    1188 \subsection{RAII Limitations}
     1177\subsection{RAII limitations}
    11891178\label{string-raii-limit}
    11901179
    11911180Earlier work on \CFA~\cite[ch.~2]{Schluntz17} implemented object constructors and destructors for all types (basic and user defined).
    1192 A constructor is a user-defined function run implicitly \emph{after} an object's storage is allocated, and a destructor is a user-defined function run \emph{before} an object's storage is deallocated.
     1181A constructor is a user-defined function run implicitly \emph{after} an object's storage is allocated, and a destructor is a user-defined function run \emph{before} an object's storage is deallcated.
    11931182This feature, called Resource Acquisition Is Initialization (RAII)~\cite[p.~389]{Stroustrup94}, helps guarantee invariants for users before accessing an object and for the programming environment after an object terminates.
    11941183
     
    12241213\end{cfa}
    12251214A module providing the @T@ type can traverse @all_T@ at relevant times, to keep the objects ``good.''
    1226 Hence, declaring a @T@ not only ensures that it begins with an initially ``good'' value, but it also provides an implicit subscription to a service that keeps the value ``good'' during its lifetime.
     1215Hence, declaring a @T@ not only ensures that it begins with an initially ``good'' value, but it also provides an implicit subscription to a service that keeps the value ``good'' in the future.
    12271216Again, both \CFA and \CC support this usage style.
    12281217
    12291218A third capability concerns \emph{implicitly} requested copies.
    12301219When stack-allocated objects are used as parameter and return values, a sender's version exists in one stack frame and a receiver's version exists in another.
    1231 In the parameter direction, the language's function-call handling must arrange for a copy-constructor call to happen, at a time near the control transfer into the callee. %, with the source as the caller's (sender's) version and the target as the callee's (receiver's) version.
    1232 In the return direction, the roles are reversed and the copy-constructor call happens near the return of control.
    1233 \CC supports this capability.% without qualification.
    1234 \CFA offers limited support;
    1235 simple examples work, but implicit copying does not combine successfully with the other RAII capabilities discussed.
    1236 
    1237 \CC also offers move constructors and return-value optimization~\cite{RVO20}.
    1238 These features help reduce unhelpful copy-constructor calls, which, for types like the @S@ example, would lead to extra memory allocations.
    1239 \CFA does not currently have these features; adding similarly-intended features to \CFA is desirable.
    1240 However, this section is about a problem in the realization of features that \CFA already supports.
    1241 Hence, the comparison continues with the classic version of \CC that treated such copy-constructor calls as necessary.
     1220In the parameter direction, the language's function-call handling must arrange for a copy-constructor call to happen\footnote{
     1221        \CC also offers move constructors and return-value optimization~\cite{RVO20}.
     1222        These features help reduce unhelpful copy-constructor calls, which, for types like the example \lstinline{S}, would lead to extra memory allocations.
     1223        \CFA does not currently have these features; adding similarly-intended features to \CFA is desirable.
     1224        However, this section is about a problem in the realization of features that \CFA already supports.
     1225        To understand the problem presented, the appropriate comparison is with classic versions of \CC that treated such copy-constructor calls as necessary.}
     1226at a time near the control transfer into the callee, with the source as the caller's (sender's) version and the target as the callee's (receiver's) version.
     1227(In the return direction, the roles are reversed and the copy-constructor call happens near the return of control.)
     1228\CC supports this capability without qualification.
     1229\CFA offers limited support here; simple examples work, but implicit copying does not combine successfully with the other RAII capabilities discussed.
    12421230
    12431231To summarize the unsupported combinations, the relevant features are:
     
    12551243At that time, adhering to a principal of minimal intervention, this code could always be treated as passthrough:
    12561244\begin{cfa}
    1257 struct U { ... };
     1245struct U {...};
    12581246// RAII to go here
    12591247void f( U u ) { F_BODY(u) }
     
    12611249f( x );
    12621250\end{cfa}
    1263 But adding custom RAII (at ``...go here'') changes things.
    1264 The common \CC lowering~\cite[Sec. 3.1.2.3]{cxx:raii-abi} proceeds differently than the present \CFA lowering.
    1265 \begin{cquote}
    1266 \setlength{\tabcolsep}{15pt}
    1267 \begin{tabular}{@{}l|l@{}}
    1268 \begin{cfa}
    1269 $\C[0.0in]{// \CC, \CFA future}\CRT$
     1251But adding custom RAII (at ``...here'') changes things.
     1252The common C++ lowering~\cite[Sec. 3.1.2.3]{cxx:raii-abi} proceeds differently than the present CFA lowering.
     1253
     1254\noindent
     1255\begin{tabular}{l|l}
     1256\begin{cfa}
     1257// C++, likely CFA to be
    12701258struct U {...};
    12711259// RAII elided
    12721260void f( U * __u_orig ) {
    12731261        U u = * __u_orig;  // call copy ctor
    1274         F_BODY( u );
     1262        F_BODY(u)
    12751263        // call dtor, u
    12761264}
    12771265U x; // call default ctor
    1278 
    1279 
    1280 f( &x ) ;
    1281 
    1282 
     1266f( & x ) ;
    12831267// call dtor, x
    12841268\end{cfa}
    12851269&
    12861270\begin{cfa}
    1287 $\C[0.0in]{// \CFA today}\CRT$
     1271// CFA today
    12881272struct U {...};
    12891273// RAII elided
    12901274void f( U u ) {
    1291 
    1292         F_BODY( u );
    1293 
     1275        F_BODY(u)
    12941276}
    12951277U x; // call default ctor
     
    13021284\end{cfa}
    13031285\end{tabular}
    1304 \end{cquote}
    1305 The current \CFA scheme is still using a by-value C call.
    1306 C does a @memcpy@ on structures passed by value.
    1307 And so, @F_BODY@ sees the bits of @__u_for_f@ occurring at an address that has never been presented to the @U@ lifecycle functions.
    1308 If @U@ is trying to have a style-\#2 invariant, it shows up broken in @F_BODY@: references supposedly to @u@ are actually to @__u_for_f@.
    1309 The \CC scheme does not have this problem because it constructs the for @f@ copy in the correct location within @f@.
    1310 
    1311 Yet, the current \CFA scheme is sufficient to deliver style-\#1 invariants (in this style-\#3 use case) because this scheme still does the correct number of lifecycle calls, using correct values, at correct times.
    1312 So, reference-counting or simple ownership applications get their invariants respected under call/return-by-value.
     1286
     1287In the CFA-today scheme, the lowered form is still using a by-value C call.
     1288C does a @memcpy@ on structs passed by value.
     1289And so, @F_BDY@ sees the bits of @__u_for_f@ occurring at an address that has never been presented to the @U@ lifecycle functions.
     1290If @U@ is trying to have a style-\#2 invariant, it shows up broken in @F_BDY@: references that are supposed to be to @u@ are actually to the different location @__u_for_f@.
     1291The \CC scheme does not have this problem because it constructs the for-@f@ copy in the correct location.
     1292
     1293Yet, the \CFA-today scheme is sufficient to deliver style-\#1 invariants (in this style-\#3 use case) because this scheme still does the correct number of lifecycle calls, using correct values, at correct times.  So, reference-counting or simple ownership applications get their invariants respected under call/return-by-value.
    13131294
    13141295% [Mike is not currently seeing how distinguishing initialization from assignment is relevant]
     
    13441325% The following discusses the consequences of this semantics with respect to lifetime management of \CFA strings.
    13451326
    1346 The string API offers style \#3's pass-by-value in, \eg in the return of @"a" + "b"@.
     1327
     1328The string API offers style \#3's pass-by-value in, for example, in the return of @"a" + "b"@.
    13471329Its implementation uses the style-\#2 invariant of the string handles being linked to each other, helping to achieve high performance.
    1348 Since these two RAII styles cannot coexist, a workaround splits the API into two layers: one that provides pass-by-value, built upon the other with inter-linked handles.
     1330Since these two RAII styles cannont coexist, a workaround splits the API into two layers: one that provides pass-by-value, built upon the other with inter-linked handles.
    13491331The layer with pass-by-value incurs a performance penalty, while the layer without delivers the desired runtime performance.
    1350 The slower, friendlier High Level API (HL, type @string@) wraps the faster, more primitive Low Level API (LL, type @string_res@, abbreviating ``resource'').
     1332The slower, friendlier High Level API (HL, type @string@) wrapps the faster, more primitive Low Level API (LL, type @string_res@, abbreviating ``resource'').
    13511333Both APIs present the same features, up to return-by-value operations being unavailable in LL and implemented via the workaround in HL.
    13521334The intention is for most future code to target HL.
    1353 When the RAII issue is fixed, the full HL feature set will be achievable using the LL-style lifetime management.
    1354 Then, HL will be removed;
    1355 LL's type will be renamed @string@ and programs written for current HL will run faster.
     1335When the RAII issue is fixed, the full HL feature set will be acheivable using the LL-style lifetime management.
     1336So then, there will be no need for two API levels; HL will be removed; LL's type will be renamed to @string@; programs written for current HL will run faster.
    13561337In the meantime, performance-critical sections of applications must use LL.
    13571338Subsequent performance experiments \see{\VRef{s:PerformanceAssessment}} use the LL API when comparing \CFA to other languages.
    13581339This measurement gives a fair estimate of the goal state for \CFA.
    13591340A separate measure of the HL overhead is also included.
    1360 hence, \VRef[Section]{string-general-impl} us describing the goal state for \CFA.
    1361 In present state, the type @string_res@ replaces its mention of @string@ as inter-linked handle.
    1362 
    1363 To use LL, a programmer rewrites invocations using pass-by-value APIs into invocations where resourcing is more explicit.
    1364 Many invocations are unaffected, notably assignment and comparison.
    1365 Of the capabilities listed in \VRef[Figure]{f:StrApiCompare}, only the following three cases need revisions.
    1366 \begin{cquote}
    1367 \setlength{\tabcolsep}{15pt}
     1341
     1342\VRef[Section]{string-general-impl} described the goal state for \CFA.  In present state, the type @string_res@ replaces its mention of @string@ as inter-linked handle.
     1343
     1344To use LL, a programmer rewrites invocations that used pass-by-value APIs into invocations where the resourcing is more explicit.
     1345Many invocations are unaffected, notably including assignment and comparison.
     1346Of the capabilities listed in \VRef[Figure]{f:StrApiCompare}, only the following three cases have revisions.
     1347
     1348\noindent
    13681349\begin{tabular}{ll}
    13691350HL & LL \\
    13701351\hline
    13711352\begin{cfa}
    1372 
    13731353string s = "a" + "b";
    13741354\end{cfa}
     
    13831363string s = "abcde";
    13841364string s2 = s(2, 3); // s2 == "cde"
    1385 
    13861365s(2,3) = "x"; // s == "abx" && s2 == "cde"
    13871366\end{cfa}
     
    13971376\begin{cfa}
    13981377string s = "abcde";
    1399 
    14001378s[2] = "xxx";  // s == "abxxxde"
    14011379\end{cfa}
     
    14071385\end{cfa}
    14081386\end{tabular}
    1409 \end{cquote}
     1387
    14101388The actual HL workaround is having @string@ wrap a pointer to a uniquely owned, heap-allocated @string_res@.  This arrangement has @string@ being style-\#1 RAII, which is compatible with pass-by-value.
    14111389
    14121390
    1413 \subsection{Sharing Implementation}
     1391
     1392\subsection{Sharing implementation}
    14141393\label{sharing-impl}
    14151394
    1416 The \CFA string module has two mechanisms to deal with string handles sharing text.
     1395The \CFA string module has two mechanisms to handle the case when string handles share a run of text.
     1396
    14171397In the first type of sharing, the user requests that both string handles be views of the same logical, modifiable string.
    14181398This state is typically produced by the substring operation.
     
    14241404$\texttt{\small axcde xc}$
    14251405\end{cfa}
    1426 Here, the source string-handle is referencing an entire string, and the resulting, newly made, string handle is referencing a contained portion of the original.
    1427 In this state, a modification made in the overlapping area is visible in both strings.
     1406In a typical substring call, the source string-handle is referencing an entire string, and the resulting, newly made, string handle is referencing a portion of the original.
     1407In this state, a subsequent modification made by either is visible in both.
    14281408
    14291409The second type of sharing happens when the system implicitly delays the physical execution of a logical \emph{copy} operation, as part of its copy-on-write optimization.
     
    14381418In this state, a subsequent modification done on one handle triggers the deferred copy action, leaving the handles referencing different text within the buffer, holding distinct values.
    14391419
    1440 A further abstraction helps distinguish the two senses of sharing.
     1420A further abstraction, in the string module's implementation, helps distinguish the two senses of sharing.
    14411421A share-edit set (SES) is an equivalence class over string handles, being the reflexive, symmetric and transitive closure of the relationship of one string being constructed from another, with the ``share'' option given.
    14421422The SES is represented by a second linked list among the handles.
     
    14471427
    14481428
    1449 \subsection{Controlling Implicit Sharing}
     1429\subsection{Controlling implicit sharing}
    14501430\label{s:ControllingImplicitSharing}
    14511431
     
    14541434In detail, string sharing has inter-linked string handles, so managing one string is also managing the neighbouring strings, and from there, a data structure of the ``set of all strings.''
    14551435Therefore, it is useful to toggle this capability on or off when it is not providing any application benefit.
     1436
     1437\begin{figure}
     1438    \begin{tabular}{ll}
     1439        \lstinputlisting[language=CFA, firstline=10, lastline=55]{sharectx.run.cfa}
     1440        &
     1441        \raisebox{-0.17\totalheight}{\includegraphics{string-sharectx.pdf}} % lower
     1442    \end{tabular}
     1443        \caption{Controlling copying vs sharing of strings using \lstinline{string_sharectx}.}
     1444        \label{fig:string-sharectx}
     1445\end{figure}
    14561446
    14571447The \CFA string library provides the type @string_sharectx@ to control an ambient sharing context.
     
    14661456Executing the example does not produce an interesting outcome, but the comments in the picture indicate when the logical copy operation runs with
    14671457\begin{description}
    1468         \item[share:] the copy being deferred, as described through the rest of this section (fast), or
    1469         \item[copy:] the copy performed eagerly (slow).
     1458    \item[share:] the copy being deferred, as described through the rest of this section (fast), or
     1459    \item[copy:] the copy performed eagerly (slow).
    14701460\end{description}
    14711461Only eager copies can cross @string_sharectx@ boundaries.
    14721462The intended use is with stack-managed lifetimes, in which the established context lasts until the current function returns, and affects all functions called that do not create their own contexts.
    14731463
    1474 \begin{figure}
    1475         \begin{tabular}{ll}
    1476                 \lstinputlisting[language=CFA, firstline=10, lastline=55]{sharectx.run.cfa}
    1477                 &
    1478                 \raisebox{-0.17\totalheight}{\includegraphics{string-sharectx.pdf}} % lower
    1479         \end{tabular}
    1480         \caption{Controlling copying vs sharing of strings using \lstinline{string_sharectx}.}
    1481         \label{fig:string-sharectx}
    1482 \end{figure}
    1483 
    1484 
    1485 \subsection{Sharing and Threading}
     1464[ TODO: true up with ``is thread local'' (implement that and expand this discussion to give a concurrent example, or adjust this wording) ]
     1465
     1466
     1467\subsection{Sharing and threading}
    14861468
    14871469The \CFA string library provides no thread safety, the same as \CC string, providing similar performance goals.
     
    14921474
    14931475
    1494 \subsection{Short-String Optimization}
    1495 
    1496 \CC implements a short-string ($\le$16) optimization (SSO).
    1497 As a string header contains a pointer to the string text, this pointer can be tagged and used to contain a short string, removing a dynamic memory allocation/deallocation.
     1476\subsection{Future work}
     1477
     1478Implementing the small-string optimization is straightforward, as a string header contains a pointer to the string text in the buffer.
     1479This pointer could be marked with a flag and contain a small string.
     1480However, there is now a conditional check required on the fast-path to switch between small and large string operations.
     1481
     1482It might be possible to pack 16- or 32-bit Unicode characters within the same string buffer as 8-bit characters.
     1483Again, locations for identification flags must be found and checked along the fast path to select the correct actions.
     1484Handling utf8 (variable length), is more problematic because simple pointer arithmetic cannot be used to stride through the variable-length characters.
     1485Trying to use a secondary array of fixed-sized pointers/offsets to the characters is possible, but raises the question of storage management for the utf8 characters themselves.
     1486
     1487
     1488\section{Performance assessment}
     1489\label{s:PerformanceAssessment}
     1490
     1491I assessed the \CFA string library's speed and memory usage against strings in \CC STL.
     1492
     1493Overall, this analysis shows that adding support for the features shown earlier in the chapter comes at no substantial cost in the performance of featrues common to both APIs.
     1494
     1495Moreover, the results support the \CFA string's position as a high-level enabler of simplified text processing.
     1496STL makes its user think about memory management.
     1497When the user does, and is successful, STL's performance can be very good.
     1498But when the user fails to think through the consequences of the STL representation, performance becomes poor.
     1499The \CFA string lets the user work at the level of just putting the right text into right variables, with corresponding performance degradations reduced or eliminated.
     1500
     1501% The final test shows the overall win of the \CFA text-sharing mechanism.
     1502% It exercises several operations together, showing \CFA enabling clean user code to achieve performance that STL requires less-clean user code to achieve.
     1503
     1504
     1505\subsection{Methodology}
     1506
     1507These tests use a \emph{corpus} of strings.
     1508Their lengths are important; the specific characters occurring in them are immaterial.
     1509In a result graph, a corpus's mean string length is often the independent variable shown on the X axis.
     1510
     1511When a corpus contains strings of different lenghths, the lengths are drawn from a geometric distribution.
     1512Therefore, strings much longer than the mean occur nontrivially and strings slightly shorter than the mean occur most often.
     1513A corpus's string sizes are one of:
     1514\begin{description}
     1515        \item [Fixed-size] all string lengths are of the stated size.
     1516        \item [Varying 1 and up] the string lengths are drawn from the geometric distribution with a stated mean and all lengths occur.
     1517        \item [Varying 16 and up] string lengths are drawn from the geometric distribution with the stated mean, but only lengths 16 and above occur; thus, the stated mean is above 16.  \PAB{Is this one unused?  May have just been for ``normalize.''}
     1518\end{description}
     1519The special treatment of length 16 deals with the short-string optimization (SSO) in STL @string@, currently not implemented in \CFA, though a fine future improvement to \CFA.
     1520In the general case, an STL string handle is a pointer (to separately allocated text) and a length.
     1521But when the text is shorter than this representation, the optimization repurposes the handle's storage to eliminate using the heap.
    14981522\begin{c++}
    14991523class string {
     
    15051529                char sstr[sizeof(lstr)]; $\C{// short string <16 characters, text in situ}$
    15061530        };
    1507         // some tagging for short or long strings
     1531        $\C{// tagging for kind (short or long) elided}$
    15081532};
    15091533\end{c++}
    1510 However, there is now a conditional check required on the fast-path to switch between short and long string operations.
    1511 
    1512 It might be possible to pack 16- or 32-bit Unicode characters within the same string buffer as 8-bit characters.
    1513 Again, locations for identification flags must be found and checked along the fast path to select the correct actions.
    1514 Handling utf8 (variable length), is more problematic because simple pointer arithmetic cannot be used to stride through the variable-length characters.
    1515 Trying to use a secondary array of fixed-sized pointers/offsets to the characters is possible, but raises the question of storage management for the utf8 characters themselves.
    1516 
    1517 
    1518 \section{Performance Assessment}
    1519 \label{s:PerformanceAssessment}
    1520 
    1521 I assessed the \CFA string library's speed and memory usage against strings in \CC STL.
    1522 Overall, this analysis shows that adding support for the features shown earlier in the chapter comes at no substantial cost in the performance of features common to both APIs.
    1523 
    1524 Moreover, the results support the \CFA string's position as a high-level enabler of simplified text processing.
    1525 STL makes its user think about memory management.
    1526 When the user does, and is successful, STL's performance can be very good.
    1527 But when the user fails to think through the consequences of the STL representation, performance becomes poor.
    1528 The \CFA string lets the user work at the level of just putting the right text into the right variables, with corresponding performance degradations reduced or eliminated.
    1529 
    1530 % The final test shows the overall win of the \CFA text-sharing mechanism.
    1531 % It exercises several operations together, showing \CFA enabling clean user code to achieve performance that STL requires less-clean user code to achieve.
    1532 
    1533 
    1534 \subsection{Methodology}
    1535 
    1536 These tests use a \emph{corpus} of strings.
    1537 Their lengths are important; the specific characters occurring in them are immaterial.
    1538 In a result graph, a corpus's mean string length is often the independent variable shown on the X axis.
    1539 
    1540 When a corpus contains strings of different lengths, the lengths are drawn from a geometric distribution.
    1541 Therefore, strings much longer than the mean occur less often and strings slightly shorter than the mean occur most often.
    1542 A corpus's string sizes are one of:
    1543 \begin{description}
    1544         \item [Fixed-size] all string lengths are of the stated size.
    1545         \item [Varying 1 and up] the string lengths are drawn from the geometric distribution with a stated mean and all lengths occur.
    1546         \item [Varying 16 and up] string lengths are drawn from the geometric distribution with the stated mean, but only lengths 16 and above occur; thus, the stated mean is above 16.
    1547 \end{description}
    1548 The special treatment of length 16 deals with the SSO in STL @string@, currently not implemented in \CFA.
     1534
    15491535A fixed-size or from-16 distribution ensures that \CC's extra-optimized cases are isolated within, or removed from, the comparison.
     1536
    15501537In all experiments that use a corpus, its text is generated and loaded into the system under test before the timed phase begins.
     1538
    15511539To ensure comparable results, a common memory allocator is used for \CFA and \CC.
    1552 \CFA runs the llheap allocator~\cite{Zulfiqar22}, which is also plugged into \CC.
     1540\CFA runs the llheap allocator~\cite{Zulfiqar22}; the test rig plugs this same allocator into \CC.
    15531541
    15541542The operations being measured take dozens of nanoseconds, so a succession of many invocations is run and timed as a group.
    1555 The experiments run for a fixed duration (5 seconds), as determined by re-checking @clock()@ every 10,000 invocations, which is never more often than once per 80 ms.
    1556 Timing outcomes report mean nanoseconds per invocation, which includes harness overhead and the targeted string API execution.
     1543The experiments run with fixed duration (targeting approximately 5 seconds), stopping upon passing a goal time, as determined by re-checking @clock()@ every 10,000 invocations, which is never more often than once per 80 ms.
     1544Timing outcomes reprt mean nanoseconds per invocation, which includes harness overhead and the targeted string API execution.
    15571545
    15581546\PAB{To discuss: hardware and such}
    15591547
    1560 As discussed in \VRef[Section]{string-raii-limit}, general performance comparisons are made using \CFA's faster, low-level string API, named @string_res@.
    1561 \VRef{s:ControllingImplicitSharing} presents an operational mode where \CFA string sharing is turned off.
    1562 In this mode, the \CFA string operates similarly to \CC's, by using a heap allocation for string text.
    1563 Some experiments include measurements in this mode for baselining purposes, called ``\CC emulation mode'' or ``nosharing''.
     1548As discussed in \VRef[Section]{string-raii-limit}, general performance comparisons are made using \CFA's faster, low-level string API, whose string type is named @string_res@.
     1549
     1550\VRef{s:ControllingImplicitSharing} presents an operational mode where \CFA string sharing is turned off.  In this mode, the \CFA string operates similarly to \CC's, by using a distinct heap allocation for each string's text.
     1551Some experiments include measurements in this mode for baselining purposes.
     1552It is called ``\CC emulation mode'' or ``nosharing'' here.
     1553
    15641554
    15651555
    15661556\subsection{Test: Append}
    15671557
    1568 These tests measure the speed of appending strings from the corpus onto a larger, growing string.
    1569 They show \CFA performing comparably to \CC overall, though with penalties for simple API misuses.
     1558These tests measure the speed of appending strings from the corpus onto a larger, growing string.  They show \CFA performing comparably to \CC overall, though with reduced penalties for simple API misuses for which \CC programmers may not know to watch out.
     1559
    15701560The basic harness is:
    1571 \begin{cfa}
    1572 // set alarm duration
    1573 for ( ... ) { $\C[1.5in]{// loop for duration}$
    1574         for ( i; N ) { $\C{// perform multiple appends (concatenations)}$
    1575                 accum += corpus[ f( i ) ];
     1561\begin{cquote}
     1562\setlength{\tabcolsep}{20pt}
     1563\begin{cfa}
     1564START_TIMER
     1565for ( ... ) {
     1566        string_res accum;
     1567        for ( i; 100 ) {
     1568                accum += corpus[ f(i) ]; // importing from char * here
     1569                COUNT_ONE_OP_DONE
    15761570        }
    1577         count += N; $\C{// count number of appends}\CRT$
    1578 }
    1579 \end{cfa}
    1580 The harness's outer loop executes for the experiment duration.
    1581 The string is reset to empty before appending (not shown).
    1582 The inner loop builds up a growing-length string with successive appends.
    1583 Each run targets a specific (mean) corpus string length and produces one data point on the result graph.
     1571}
     1572STOP_TIMER
     1573\end{cfa}
     1574\end{cquote}
     1575The harness's outer loop executes until a sample-worthy amount of execution has happened.
     1576The inner loop builds up the desired-length string with successive appends, before the outer makes it start over from a blank accumulator.
     1577Each harness run targets a specific (mean) corpus string length and produces one data point on the result graph.
     1578
    15841579Three specific comparisons are made with this harness.
    15851580Each picks its own independent-variable basis of comparison.
    1586 All three comparisons use the varying-from-1 corpus construction, \ie they allow the STL to show its advantage for SSO.
     1581
     1582All three comparisons use the varying-from-1 corpus construction, \ie they allow the STL to show its advantage from small-string optimization.
    15871583
    15881584
    15891585\subsubsection{Fresh vs Reuse in \CC, Emulation Baseline}
    15901586
    1591 The first experiment compares \CFA with \CC, with \CFA operating in nosharing mode and \CC having no other mode, hence both string package are using @malloc@/@free@.
    1592 % This experiment establishes a baseline for other experiments.
    1593 This experiment also introduces the first \CC coding pitfall, which the next experiment shows is helped by turning on \CFA sharing.
    1594 % This pitfall shows, a \CC programmer must pay attention to string variable reuse.
    1595 In the following, both programs are doing the same thing: start with @accum@ empty and build it up by appending @N@ strings (type @string@ in \CC and the faster @string_res@ in \CFA).
    1596 \begin{cquote}
    1597 \setlength{\tabcolsep}{40pt}
     1587The first experiment compares \CFA with \CC, with \CFA operating in nosharing mode (and \CC having no other mode).
     1588This experiment simply baselines how \CFA modestly lags \CC's optimization/tuning level generally, yet reproduces a coarser phenomenon.
     1589
     1590This experiment also introduces the first \CC coding pitfall, which the next experiment will show is helped by turning on \CFA sharing.  By this pitfall, a \CC programmer must pay attention to string variable reuse.
     1591
     1592\begin{cquote}
     1593\setlength{\tabcolsep}{20pt}
    15981594\begin{tabular}{@{}ll@{}}
    15991595% \multicolumn{1}{c}{\textbf{fresh}} & \multicolumn{1}{c}{\textbf{reuse}} \\
     
    16011597
    16021598for ( ... ) {
    1603         @string_res accum;@     $\C[1.5in]{// fresh}$
    1604         for ( N )
    1605                 accum @+=@ ...  $\C{// append}\CRT$
     1599        @string_res accum;@       // fresh
     1600        for ( ... )
     1601                accum @+=@ ...
    16061602}
    16071603\end{cfa}
     
    16101606string_res accum;
    16111607for ( ... ) {
    1612         @accum = "";@  $\C[1.5in]{// reuse}$
    1613         for ( N )
    1614                 accum @+=@ ...  $\C{// append}\CRT$
    1615 }
    1616 \end{cfa}
    1617 \end{tabular}
    1618 \end{cquote}
    1619 The difference is creating a new or reusing an existing string variable.
    1620 The pitfall is that most programmers do not consider this difference.
    1621 However, creating a new variable implies deallocating the previous string storage and allocating new empty storage.
    1622 As the string grows, further deallocations/allocations are required to release the previous and extend the current string storage.
    1623 So, the fresh version is constantly restarting with zero string storage, while the reuse version benefits from having its prior large storage from the last append sequence.
     1608        @accum = "";@  $\C[1in]{// reuse\CRT}$
     1609        for ( ... )
     1610                accum @+=@ ...
     1611}
     1612\end{cfa}
     1613\end{tabular}
     1614\end{cquote}
     1615
     1616Both programs are doing the same thing: start with @x@ empty and build it up by appending the same chunks.
     1617A programmer should not have to consider this difference.
     1618But from under the covers, each string being an individual allocation leaks through.
     1619While the inner loop is appending text to an @x@ that had not yet grown to have a large capacity, the program is, naturally, paying to extend the variable-length allocation, occasionally.
     1620This capacity stretching is a sticky property that survives assigning a (short, empty-string) value into an existing initialization.
     1621So, the ``reuse'' version benefits from not growing the allocation on subsequent runs of the inner loop.
     1622Yet, the ``fresh'' version is constantly restarting from a small buffer.
    16241623
    16251624\begin{figure}
     
    16271626        \includegraphics{plot-string-peq-cppemu.pdf}
    16281627%       \includegraphics[width=\textwidth]{string-graph-peq-cppemu.png}
    1629         \caption{Fresh vs Reuse in \CC, Emulation Baseline.
    1630         Average time per iteration with one \lstinline{x += y} invocation (lower is better).
    1631         Comparing \CFA's STL emulation mode with STL implementations, and comparing the fresh with reused reset styles.}
     1628        \caption{Fresh vs Reuse in \CC, Emulation Baseline.  Average time per iteration with one \lstinline{x += y} invocation (lower is better).  Comparing \CFA's STL emulation mode with STL implementations, and comparing the ``fresh'' with ``reused'' reset styles.}
    16321629        \label{fig:string-graph-peq-cppemu}
    1633         \bigskip
    1634         \bigskip
    1635         \includegraphics{plot-string-peq-sharing.pdf}
     1630\end{figure}
     1631
     1632\VRef[Figure]{fig:string-graph-peq-cppemu} shows the resulting performance.
     1633The fresh \vs reuse penalty is the dominant difference.
     1634The cost is 40\% averaged over the cases shown and minimally 24\%.
     1635It shows up consistently on both the \CFA and STL implementations, and this cost is more prominent with larger strings.
     1636
     1637The lesser \CFA \vs STL difference shows \CFA reproducing STL's performance, up to a 15\% penalty averaged over the cases shown, diminishing with larger strings, and 50\% in the worst case.
     1638This penalty characterizes implementation fine tuning done with STL and not done yet done with \CFA.
     1639
     1640
     1641\subsubsection{\CFA's Fresh-Reuse Compromise}
     1642
     1643This comparison has the same setup as the last one, except that the \CFA implementation is switched to use its sharing mode.  The outcome is that the fresh/reuse difference vanishes in \CFA, with \CFA consistently delivering performance that compromises between the two \CC cases.
     1644
     1645\begin{figure}
     1646\centering
     1647        \includegraphics{string-graph-peq-sharing.pdf}
    16361648%       \includegraphics[width=\textwidth]{string-graph-peq-sharing.png}
    1637         \caption{\CFA Compromise for Fresh \vs Reuse.
    1638         Average time per iteration with one \lstinline{x += y} invocation (lower is better).
    1639         Comparing \CFA's sharing mode with STL, and comparing the fresh with reused reset styles.
    1640         The \CC results are repeated from \VRef[Figure]{fig:string-graph-peq-cppemu}.}
     1649        \caption{\CFA Compromise for Fresh \vs Reuse.  Average time per iteration with one \lstinline{x += y} invocation (lower is better).  Comparing \CFA's sharing mode with STL, and comparing the ``fresh'' with ``reused'' reset styles.  The \CC results are repeated from \ref{fig:string-graph-peq-cppemu}.}
    16411650        \label{fig:string-graph-peq-sharing}
    16421651\end{figure}
    16431652
    1644 \VRef[Figure]{fig:string-graph-peq-cppemu} shows the resulting performance.
    1645 The two fresh (solid) lines and the two reuse (dash) lines are identical, except for lengths $\le$10, where the \CC SSO has a 40\% average and minimally 24\% advantage.
    1646 The gap between the fresh and reuse lines is the removal of the dynamic memory allocates and reuse of prior storage, \eg 100M allocations for fresh \vs 100 allocations for reuse across all experiments.
    1647 While allocation reduction is huge, data copying dominates the cost, so the lines are still reasonably close together.
    1648 
    1649 
    1650 \subsubsection{\CFA's Sharing Mode}
    1651 
    1652 This comparison is the same as the last one, except the \CFA implementation is using sharing mode.
    1653 Hence, both \CFA's fresh and reuse versions have no memory allocations, and as before, only for reuse does \CC have no memory allocations.
    1654 \VRef[Figure]{fig:string-graph-peq-sharing} shows the resulting performance.
    1655 For fresh at append lengths 5 and above, \CFA is now closer to the \CC reuse performance, because of removing the dynamic allocations.
    1656 However, for reuse, \CFA has slowed down slightly, to performance matching the new fresh version, as the two versions are now implemented virtually the same.
    1657 The reason for the \CFA reuse slow-down is the overhead of managing the sharing scheme (primarily maintaining the list of handles), without gaining any benefit.
    1658 
    1659 \begin{comment}
    1660 FIND A HOME!!!
    1661 The potential benefits of the sharing scheme do not give \CFA an edge over \CC when appending onto a reused string, though the first one helps \CFA win at going onto a fresh string.  These abilities are:
    1662 \begin{itemize}
    1663 \item
    1664 To grow a text allocation repeatedly without copying it elsewhere.
    1665 This ability is enabled by \CFA's most-recently modified string being located immediately before the text buffer's \emph{shared} bump-pointer area, \ie often a very large greenfield, relative to the \emph{individual} string being grown.
    1666 With \CC-reuse, this benefit is already reaped by the user's reuse of a pre-stretched allocation.
    1667 Yet \CC-fresh pays the higher cost because its room to grow for free is at most a constant times the original string's length.
    1668 \item
    1669 To share an individual text allocation across multiple related strings.
    1670 This ability is not applicable to appending with @+=@.
    1671 It in play in [xref sub-experiment pta] and [xref experiment pbv].
    1672 \item
    1673 To share a text arena across unrelated strings, sourcing disparate allocations from a common place.
    1674 That is, always allocating from a bump pointer, and never maintaining free lists.
    1675 This ability is not relevant to running any append scenario on \CFA with sharing, because appending modifies an existing allocation and is not driving several allocations.
    1676 This ability is assessed in [xref experiment allocn].
    1677 \end{itemize}
    1678 This cost, of slowing down append-with-reuse, is \CFA paying the piper for other scenarios done well.
    1679 \CFA prioritizes the fresh use case because it is more natural.
    1680 The \emph{user-invoked} reuse scheme is an unnatural programming act because it deliberately misuses lexical scope: a variable (@accum@) gets its lifetime extended beyond the scope in which it is used.
    1681 
    1682 A \CFA user needing the best performance on an append scenario can still access the \CC-like speed by invoking noshare.
    1683 This (indirect) resource management is memory-safe, as compared to that required in \CC to use @string&@, where knowledge of another string's lifetime comes into play.
    1684 This abstraction opt-out is also different from invoking the LL API-level option.
    1685 In fact, these considerations are orthogonal.
    1686 But the key difference is that invoking the LL API would be a temporary measure, to use a workaround of a known \CFA language issue; choosing to exempt a string from sharing is a permanent act of program tuning.
    1687 Beyond these comparisons, opting for noshare actually provides program ``eye candy,'' indicating that under-the-hood thinking is becoming relevant here.
    1688 \end{comment}
    1689 
    1690 
    1691 \subsubsection{Misusing Concatenation}
    1692 
    1693 A further pitfall occurs writing the apparently equivalent @x = x + y@ \vs @x += y@.
     1653\VRef[Figure]{fig:string-graph-peq-sharing} has the result.
     1654At append lengths 5 and above, \CFA not only splits the two STL cases, but its slowdown of 16\% over STL with user-managed reuse is close to the baseline \CFA-v-STL implementation difference seen with \CFA in STL-emulation mode.
     1655
     1656
     1657\subsubsection{\CFA's low overhead for misusing \lstinline{+}}
     1658
     1659A further pitfall occurs when the user writes @x = x + y@, rather than @x += y@.  Again, they are logically equivalent.
    16941660For numeric types, the generated code is equivalent, giving identical performance.
    16951661However, for string types there can be a significant difference.
    1696 This pitfall is a particularly likely for beginners.
     1662This pitfall is a particularly likely hazard for beginners.
    16971663
    16981664In earlier experiments, the choice of \CFA API among HL and LL had no impact on the functionality being tested.
     
    17421708\end{tabular}
    17431709\end{cquote}
    1744 Note, the goal code functions today in HL but with slower performance.
     1710Note that this ``Goal'' code functions today in HL.
    17451711
    17461712\begin{figure}
    17471713\centering
    1748         \includegraphics{plot-string-pta-sharing.pdf}
     1714        \includegraphics{string-graph-pta-sharing.pdf}
    17491715%       \includegraphics[width=\textwidth]{string-graph-pta-sharing.png}
    1750         \caption{CFA's low overhead for misusing concatenation.  Average time per iteration with one \lstinline{x += y} invocation (lower is better). Comparing \CFA (having implicit sharing activated) with STL, and comparing the \lstinline{+}-then-\lstinline{=} with the \lstinline{+=} append styles.  The \lstinline{+=} results are repeated from \VRef[Figure]{fig:string-graph-peq-sharing}.}
     1716        \caption{CFA's low overhead for misusing \lstinline{+}.  Average time per iteration with one \lstinline{x += y} invocation (lower is better). Comparing \CFA (having implicit sharing activated) with STL, and comparing the \lstinline{+}-then-\lstinline{=} with the \lstinline{+=} append styles.  The \lstinline{+=} results are repeated from \VRef[Figure]{fig:string-graph-peq-sharing}.}
    17511717        \label{fig:string-graph-pta-sharing}
    17521718\end{figure}
    17531719
    1754 \VRef[Figure]{fig:string-graph-pta-sharing} gives the outcome, where the Y-axis is log scale because of the large differences.
    1755 The STL's penalty is $8 \times$ while \CFA's is only $2 \times$, averaged across the cases shown here.
     1720\VRef[Figure]{fig:string-graph-pta-sharing} gives the outcome.  The STL's penalty is $8 \times$ while \CFA's is only $2 \times$, averaged across the cases shown here.
    17561721Moreover, the STL's gap increases with string size, while \CFA's converges.
    17571722So again, \CFA helps users who just want to treat strings as values, and not think about the resource management under the covers.
    17581723
    1759 While not a design goal, and not graphed, \CFA in STL-emulation mode outperformed STL in this case.
    1760 User-managed allocation reuse did not affect either implementation in this case; only ``fresh'' results are shown.
    1761 
    1762 
    1763 \subsection{Test: Pass Argument}
     1724While not a design goal, and not graphed out, \CFA in STL-emulation mode heppened to outperform STL in this case.  User-managed allocation reuse did not affect either implementation in this case; only ``fresh'' results are shown.
     1725
     1726
     1727\subsection{Test: Pass argument}
    17641728
    17651729STL has a penalty for passing a string by value, which forces users to think about memory management when communicating values with a function.
    1766 The key \CFA value-add is that a user can think of a string simply as a value; this test shows that \CC charges a stiff penalty for thinking this way, while \CFA does not.
     1730The key \CFA value-add is that a user can think of a string simply as a value; this test shows that \CC charges a stiff penalty for thining this way, while \CFA does not.
    17671731This test illustrates a main advantage of the \CFA sharing algorithm (in one case).
    1768 It shows STL's SSO providing a successful mitigation (in the other case).
     1732It shows STL's small-string optimization providing a successful mitigation (in the other case).
    17691733
    17701734The basic operation considered is:
     
    17851749
    17861750}
    1787 for ( ... ) { // loop for duration
    1788         helper( corpus[ f( i ) ] );
    1789         count += 1;
    1790 }
     1751START_TIMER
     1752for ( i; ... ) {
     1753        helper( corpus[ f(i) ] ); // imported from char * previously
     1754        COUNT_ONE_OP_DONE
     1755}
     1756STOP_TIMER
    17911757\end{cfa}
    17921758&
     
    17951761        string_res q = { qref, COPY_VALUE };
    17961762}
    1797 
    1798 
    1799 
    1800 
    1801 \end{cfa}
    1802 \end{tabular}
    1803 \end{cquote}
    1804 The goal (HL) version gives the modified test harness, with a single loop.
    1805 Each iteration uses a corpus item as the argument to the function call.
     1763// rest same, elided
     1764
     1765
     1766
     1767
     1768
     1769\end{cfa}
     1770\end{tabular}
     1771\end{cquote}
     1772The Goal (HL) version gives the simplest sketch of the test harness.
     1773It uses a single level of looping.
     1774Each iteration uses a corpus item as the argument to a function call.
    18061775These corpus items were imported to the string heap before beginning the timed run.
     1776
    18071777
    18081778\begin{figure}
    18091779\centering
    1810         \includegraphics{plot-string-pbv.pdf}
     1780        \includegraphics{string-graph-pbv.pdf}
    18111781%       \includegraphics[width=\textwidth]{string-graph-pbv.png}
    1812         \begin{tabularx}{\linewidth}{>{\centering\arraybackslash}X >{\centering\arraybackslash}X} (a) & (b) \end{tabularx}
    18131782        \caption{Average time per iteration (lower is better) with one call to a function that takes a by-value string argument, comparing \CFA (having implicit sharing activated) with STL.
    1814 (a) With \emph{Varying-from-1} corpus construction, in which the STL-only benefit of SSO optimization occurs, in varying degrees, at all string sizes.
    1815 (b) With \emph{Fixed-size} corpus construction, in which this benefit applies exactly to strings with length below 16.}
     1783(a) With \emph{Varying-from-1} corpus construction, in which the STL-only benefit of small-string optimization occurs, in varying degrees, at all string sizes.
     1784(b) With \emph{Fixed-size} corpus construction, in which this benefit applies exactly to strings with length below 16.
     1785[TODO: show version (b)]}
    18161786        \label{fig:string-graph-pbv}
    18171787\end{figure}
    18181788
     1789
    18191790\VRef[Figure]{fig:string-graph-pbv} shows the costs for calling a function that receives a string argument by value.
    1820 STL's performance worsens uniformly as string length increases, while \CFA has the same performance at all sizes.
    1821 Although the STL is better than \CFA until string length 10 because of the SSO.
     1791STL's performance worsens as string length increases, while \CFA has the same performance at all sizes.
     1792
    18221793While improved, the \CFA cost to pass a string is still nontrivial.
    18231794The contributor is adding and removing the callee's string handle from the global list.
    1824 This cost is $1.5 \times$ to $2 \times$ over STL's when SSO applies, but is avoidable once \CFA realizes this optimization.
    1825 At the larger sizes, the STL runs more than $3 \times$ slower, because it has to allocation/deallocate storage for the parameter and copy the argument string to the parameter.
    1826 If the \CC string is passed by reference, the results are better and flat across string lengths like \CFA.
     1795This cost is $1.5 \times$ to $2 \times$ over STL's when small-string optimization applies, though this cost should be avoidable in the same case, upon a \CFA realization of this optimization.
     1796At the larger sizes, when STL has to manage storage for the string, STL runs more than $3 \times$ slower, mainly due to time spent in the general-purpose memory allocator.
     1797\PAB{Need to check that.  Expecting copying to dominate.}
    18271798
    18281799
     
    18341805
    18351806A garbage collector, afforded the freedom of managed memory (where it knows about all the pointers and is allowed to modify them), often runs faster than malloc-free in an amortized analysis, even though it must occasionally stop to collect.
    1836 The speedup happens because GC is able to use its collection time to move objects.
    1837 (In the case of the mini-allocator powering the \CFA string library, objects are runs of text.)
    1838 Moving objects lets fresh allocations consume from a large contiguous store of available memory; the ``bump pointer'' bookkeeping for such a scheme is very light.
     1807The sppedup happens because GC is able to use its collection time to move objects.
     1808(In the case of the mini-allocator powering the \CFA string library, objects are runs of text.)  Moving objects lets fresh allocations consume from a large contiguous store of available memory; the ``bump pointer'' bookkeeping for such a scheme is very light.
    18391809A malloc-free implementation without the freedom to move objects must, in the general case, allocate in the spaces between existing objects; doing so entails the heavier bookkeeping of maintaining a linked structure of freed allocations and/or coalescing freed allocations.
    18401810
     
    18921862\begin{figure}
    18931863\centering
    1894   \includegraphics{plot-string-allocn.pdf}
     1864  \includegraphics{string-graph-allocn.pdf}
    18951865% \includegraphics[width=\textwidth]{string-graph-allocn.png}
    18961866  \caption{Space and time performance, under varying fraction-live targets, for the five string lengths shown, at \emph{Fixed-size} corpus construction.
  • libcfa/prelude/prelude-gen.cc

    rbd72f517 r7d02d35  
    149149}
    150150
    151 template<typename... T>
    152 using make_array_t = std::array<std::decay_t<std::common_type_t<T...>>, sizeof...(T)>;
    153 
    154 template<typename... T>
    155 constexpr make_array_t<T...> make_array(T&&... values) {
    156         return make_array_t<T...>{{std::forward<T>(values)...}};
     151template <typename... T>
     152constexpr auto make_array(T&&... values) ->
     153    std::array<
     154        typename std::decay<typename std::common_type<T...>::type>::type,
     155        sizeof...(T)>
     156{
     157    return std::array<
     158        typename std::decay<
     159            typename std::common_type<T...>::type>::type,
     160        sizeof...(T)>{{std::forward<T>(values)...}};
    157161}
    158162
  • src/InitTweak/InitTweak.cpp

    rbd72f517 r7d02d35  
    6868        };
    6969
    70         struct InitDepthChecker : public ast::WithShortCircuiting {
     70        struct InitDepthChecker {
    7171                bool result = true;
    7272                const ast::Type * type;
     
    8686                void postvisit( ast::ListInit const * ) {
    8787                        curDepth--;
    88                 }
    89                 void previsit( ast::SingleInit const * ) {
    90                         // We don't want to visit the value field.
    91                         visit_children = false;
    9288                }
    9389        };
  • src/Parser/TypeData.cpp

    rbd72f517 r7d02d35  
    737737                        location,
    738738                        "?=?",
     739                        {}, // forall
     740                        {}, // assertions
    739741                        {
    740742                                new ast::ObjectDecl(
     
    777779                        location,
    778780                        "?{}",
     781                        {}, // forall
     782                        {}, // assertions
    779783                        {
    780784                                new ast::ObjectDecl(
     
    798802                        location,
    799803                        "?{}",
     804                        {}, // forall
     805                        {}, // assertions
    800806                        {
    801807                                new ast::ObjectDecl(
     
    828834                        location,
    829835                        "^?{}",
     836                        {}, // forall
     837                        {}, // assertions
    830838                        {
    831839                                new ast::ObjectDecl(
     
    874882                        location,
    875883                        "?=?",
     884                        {}, // forall
     885                        {}, // assertions
    876886                        {
    877887                                new ast::ObjectDecl(
     
    914924                        location,
    915925                        "?{}",
     926                        {}, // forall
     927                        {}, // assertions
    916928                        {
    917929                                new ast::ObjectDecl(
     
    936948                        location,
    937949                        "?{}",
     950                        {}, // forall
     951                        {}, // assertions
    938952                        {
    939953                                new ast::ObjectDecl(
     
    967981                        location,
    968982                        "^?{}",
     983                        {}, // forall
     984                        {}, // assertions
    969985                        {
    970986                                new ast::ObjectDecl(
Note: See TracChangeset for help on using the changeset viewer.