Changes in / [673cd63:ffe2fad]
- Files:
-
- 25 edited
-
configure (modified) (4 diffs)
-
configure.ac (modified) (2 diffs)
-
doc/bibliography/pl.bib (modified) (12 diffs)
-
doc/theses/aaron_moss_PhD/phd/background.tex (modified) (15 diffs)
-
doc/theses/aaron_moss_PhD/phd/conclusion.tex (modified) (1 diff)
-
doc/theses/aaron_moss_PhD/phd/evaluation/algo-summary.dat (modified) (1 diff)
-
doc/theses/aaron_moss_PhD/phd/evaluation/algo-summary.gp (modified) (1 diff)
-
doc/theses/aaron_moss_PhD/phd/evaluation/cfa-plots.gp (modified) (2 diffs)
-
doc/theses/aaron_moss_PhD/phd/evaluation/per-prob-scatter.gp (modified) (1 diff)
-
doc/theses/aaron_moss_PhD/phd/experiments.tex (modified) (12 diffs)
-
doc/theses/aaron_moss_PhD/phd/figures/union-find-with-classes.eps (modified) ( previous)
-
doc/theses/aaron_moss_PhD/phd/figures/union-find-with-classes.odg (modified) ( previous)
-
doc/theses/aaron_moss_PhD/phd/frontpgs.tex (modified) (3 diffs)
-
doc/theses/aaron_moss_PhD/phd/generic-bench.tex (modified) (1 diff)
-
doc/theses/aaron_moss_PhD/phd/generic-types.tex (modified) (17 diffs)
-
doc/theses/aaron_moss_PhD/phd/introduction.tex (modified) (4 diffs)
-
doc/theses/aaron_moss_PhD/phd/macros.tex (modified) (1 diff)
-
doc/theses/aaron_moss_PhD/phd/resolution-heuristics.tex (modified) (17 diffs)
-
doc/theses/aaron_moss_PhD/phd/thesis.tex (modified) (1 diff)
-
doc/theses/aaron_moss_PhD/phd/type-environment.tex (modified) (6 diffs)
-
libcfa/configure (modified) (1 diff)
-
libcfa/configure.ac (modified) (1 diff)
-
libcfa/src/stdlib.hfa (modified) (4 diffs)
-
src/Makefile.am (modified) (1 diff)
-
src/Makefile.in (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
configure
r673cd63 rffe2fad 637 637 LIBOBJS 638 638 CFA_BACKEND_CC 639 WITH_LIBTCMALLOC_FALSE640 WITH_LIBTCMALLOC_TRUE641 WITH_LIBPROFILER_FALSE642 WITH_LIBPROFILER_TRUE643 639 WITH_LIBFIBRE_FALSE 644 640 WITH_LIBFIBRE_TRUE … … 3402 3398 "debug") ;; 3403 3399 "nolib") ;; 3404 "profile") ;;3405 3400 *) 3406 3401 >&2 echo "Configuration must be 'debug', 'nodebug' or 'nolib'" … … 16662 16657 16663 16658 16664 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ProfilingIsEnabledForAllThreads in -lprofiler" >&516665 $as_echo_n "checking for ProfilingIsEnabledForAllThreads in -lprofiler... " >&6; }16666 if ${ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads+:} false; then :16667 $as_echo_n "(cached) " >&616668 else16669 ac_check_lib_save_LIBS=$LIBS16670 LIBS="-lprofiler $LIBS"16671 cat confdefs.h - <<_ACEOF >conftest.$ac_ext16672 /* end confdefs.h. */16673 16674 /* Override any GCC internal prototype to avoid an error.16675 Use char because int might match the return type of a GCC16676 builtin and then its argument prototype would still apply. */16677 #ifdef __cplusplus16678 extern "C"16679 #endif16680 char ProfilingIsEnabledForAllThreads ();16681 int16682 main ()16683 {16684 return ProfilingIsEnabledForAllThreads ();16685 ;16686 return 0;16687 }16688 _ACEOF16689 if ac_fn_c_try_link "$LINENO"; then :16690 ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads=yes16691 else16692 ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads=no16693 fi16694 rm -f core conftest.err conftest.$ac_objext \16695 conftest$ac_exeext conftest.$ac_ext16696 LIBS=$ac_check_lib_save_LIBS16697 fi16698 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads" >&516699 $as_echo "$ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads" >&6; }16700 if test "x$ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads" = xyes; then :16701 HAVE_LIBPROFILER=116702 else16703 HAVE_LIBPROFILER=016704 fi16705 16706 if test "$HAVE_LIBPROFILER" -eq 1; then16707 WITH_LIBPROFILER_TRUE=16708 WITH_LIBPROFILER_FALSE='#'16709 else16710 WITH_LIBPROFILER_TRUE='#'16711 WITH_LIBPROFILER_FALSE=16712 fi16713 16714 16715 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for malloc in -ltcmalloc" >&516716 $as_echo_n "checking for malloc in -ltcmalloc... " >&6; }16717 if ${ac_cv_lib_tcmalloc_malloc+:} false; then :16718 $as_echo_n "(cached) " >&616719 else16720 ac_check_lib_save_LIBS=$LIBS16721 LIBS="-ltcmalloc $LIBS"16722 cat confdefs.h - <<_ACEOF >conftest.$ac_ext16723 /* end confdefs.h. */16724 16725 /* Override any GCC internal prototype to avoid an error.16726 Use char because int might match the return type of a GCC16727 builtin and then its argument prototype would still apply. */16728 #ifdef __cplusplus16729 extern "C"16730 #endif16731 char malloc ();16732 int16733 main ()16734 {16735 return malloc ();16736 ;16737 return 0;16738 }16739 _ACEOF16740 if ac_fn_c_try_link "$LINENO"; then :16741 ac_cv_lib_tcmalloc_malloc=yes16742 else16743 ac_cv_lib_tcmalloc_malloc=no16744 fi16745 rm -f core conftest.err conftest.$ac_objext \16746 conftest$ac_exeext conftest.$ac_ext16747 LIBS=$ac_check_lib_save_LIBS16748 fi16749 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_tcmalloc_malloc" >&516750 $as_echo "$ac_cv_lib_tcmalloc_malloc" >&6; }16751 if test "x$ac_cv_lib_tcmalloc_malloc" = xyes; then :16752 HAVE_LIBTCMALLOC=116753 else16754 HAVE_LIBTCMALLOC=016755 fi16756 16757 if test "$HAVE_LIBTCMALLOC" -eq 1; then16758 WITH_LIBTCMALLOC_TRUE=16759 WITH_LIBTCMALLOC_FALSE='#'16760 else16761 WITH_LIBTCMALLOC_TRUE='#'16762 WITH_LIBTCMALLOC_FALSE=16763 fi16764 16765 16766 16659 # Checks for header files. 16767 16660 for ac_header in libintl.h malloc.h unistd.h … … 16996 16889 if test -z "${WITH_LIBFIBRE_TRUE}" && test -z "${WITH_LIBFIBRE_FALSE}"; then 16997 16890 as_fn_error $? "conditional \"WITH_LIBFIBRE\" was never defined. 16998 Usually this means the macro was only invoked conditionally." "$LINENO" 516999 fi17000 if test -z "${WITH_LIBPROFILER_TRUE}" && test -z "${WITH_LIBPROFILER_FALSE}"; then17001 as_fn_error $? "conditional \"WITH_LIBPROFILER\" was never defined.17002 Usually this means the macro was only invoked conditionally." "$LINENO" 517003 fi17004 if test -z "${WITH_LIBTCMALLOC_TRUE}" && test -z "${WITH_LIBTCMALLOC_FALSE}"; then17005 as_fn_error $? "conditional \"WITH_LIBTCMALLOC\" was never defined.17006 16891 Usually this means the macro was only invoked conditionally." "$LINENO" 5 17007 16892 fi -
configure.ac
r673cd63 rffe2fad 139 139 "debug") ;; 140 140 "nolib") ;; 141 "profile") ;;142 141 *) 143 142 >&2 echo "Configuration must be 'debug', 'nodebug' or 'nolib'" … … 191 190 AM_CONDITIONAL([WITH_LIBFIBRE], [test "$HAVE_LIBFIBRE" -eq 1]) 192 191 193 AC_CHECK_LIB([profiler], [ProfilingIsEnabledForAllThreads], [HAVE_LIBPROFILER=1], [HAVE_LIBPROFILER=0])194 AM_CONDITIONAL([WITH_LIBPROFILER], [test "$HAVE_LIBPROFILER" -eq 1])195 196 AC_CHECK_LIB([tcmalloc], [malloc], [HAVE_LIBTCMALLOC=1], [HAVE_LIBTCMALLOC=0])197 AM_CONDITIONAL([WITH_LIBTCMALLOC], [test "$HAVE_LIBTCMALLOC" -eq 1])198 199 192 # Checks for header files. 200 193 AC_CHECK_HEADERS([libintl.h malloc.h unistd.h], [], [echo "Error: Missing required header"; exit 1]) -
doc/bibliography/pl.bib
r673cd63 rffe2fad 973 973 contributer = {pabuhr@plg}, 974 974 author = {Aaron Moss and Robert Schluntz and Peter A. Buhr}, 975 title = {\textsf{C}$\mathbf{\forall}$ : Adding Modern Programming Language Features to {C}},975 title = {\textsf{C}$\mathbf{\forall}$ : Adding Modern Programming Language Features to C}, 976 976 journal = spe, 977 977 volume = 48, … … 1093 1093 } 1094 1094 1095 @techreport{Prokopec11,1096 keywords = {ctrie, concurrent map},1097 contributer = {a3moss@uwaterloo.ca},1098 title={Cache-aware lock-free concurrent hash tries},1099 author={Prokopec, Aleksandar and Bagwell, Phil and Odersky, Martin},1100 institution={EPFL},1101 year={2011}1102 }1103 1104 1095 @article{Buhr85, 1105 1096 keywords = {goto, multi-exit loop}, … … 1148 1139 year = 1998, 1149 1140 note = {{\small\textsf{ftp://\-plg.uwaterloo.ca/\-pub/\-Cforall/\-refrat.ps.gz}}}, 1150 }1151 1152 @phdthesis{Norrish98,1153 title={C formalised in HOL},1154 author={Norrish, Michael},1155 year={1998},1156 school={University of Cambridge}1157 1141 } 1158 1142 … … 1174 1158 contributer = {a3moss@uwaterloo.ca}, 1175 1159 title = {Clang: a {C} language family frontend for {LLVM}}, 1176 howpublished= {\href{https://clang.llvm.org/}{https://\-clang.llvm.org/}} 1160 howpublished= {\href{https://clang.llvm.org/}{https://\-clang.llvm.org/}}, 1161 note = {Accessed 2019-02-22} 1177 1162 } 1178 1163 … … 1275 1260 number = 11, 1276 1261 pages = {853-860}, 1277 }1278 1279 @inproceedings{Odersky01,1280 keywords = {Scala},1281 contributer = {a3moss@uwaterloo.ca},1282 author = {Odersky, Martin and Zenger, Christoph and Zenger, Matthias},1283 title = {Colored Local Type Inference},1284 booktitle = {Proceedings of the 28th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},1285 series = {POPL '01},1286 year = {2001},1287 isbn = {1-58113-336-7},1288 location = {London, United Kingdom},1289 pages = {41--53},1290 numpages = {13},1291 url = {http://doi.acm.org/10.1145/360204.360207},1292 doi = {10.1145/360204.360207},1293 acmid = {360207},1294 publisher = {ACM},1295 address = {New York, NY, USA},1296 1262 } 1297 1263 … … 1685 1651 } 1686 1652 1687 @inproceedings{Prokopec12,1688 keywords={ctrie, hash trie, concurrent map},1689 contributer={a3moss@uwaterloo.ca},1690 title={Concurrent tries with efficient non-blocking snapshots},1691 author={Prokopec, Aleksandar and Bronson, Nathan Grasso and Bagwell, Phil and Odersky, Martin},1692 booktitle={ACM SIGPLAN Notices},1693 volume={47},1694 number={8},1695 pages={151--160},1696 year={2012},1697 organization={ACM}1698 }1699 1700 1653 @article{Buhr05a, 1701 1654 keywords = {concurrency, myths}, … … 3179 3132 year = 2018, 3180 3133 } 3181 3182 @article{Leroy09,3183 keywords = {C formalization},3184 contributer = {a3moss@uwaterloo.ca},3185 author = {Leroy, Xavier},3186 title = {Formal Verification of a Realistic Compiler},3187 journal = {Commun. ACM},3188 issue_date = {July 2009},3189 volume = {52},3190 number = {7},3191 month = jul,3192 year = {2009},3193 issn = {0001-0782},3194 pages = {107--115},3195 numpages = {9},3196 url = {http://doi.acm.org/10.1145/1538788.1538814},3197 doi = {10.1145/1538788.1538814},3198 acmid = {1538814},3199 publisher = {ACM},3200 address = {New York, NY, USA},3201 }3202 3134 3203 3135 @manual{Fortran95, … … 4123 4055 number = 1, 4124 4056 pages = {1-15}, 4125 }4126 4127 @article{Morgado13,4128 keywords = {expression resolution},4129 contributer = {a3moss@uwaterloo.ca},4130 title={Iterative and core-guided {MaxSAT} solving: A survey and assessment},4131 author={Morgado, Antonio and Heras, Federico and Liffiton, Mark and Planes, Jordi and Marques-Silva, Joao},4132 journal={Constraints},4133 volume={18},4134 number={4},4135 pages={478--534},4136 year={2013},4137 publisher={Springer}4138 4057 } 4139 4058 … … 4381 4300 } 4382 4301 4383 @article{Pierce00,4384 keywords = {Scala},4385 contributer = {a3moss@uwaterloo.ca},4386 author = {Pierce, Benjamin C. and Turner, David N.},4387 title = {Local Type Inference},4388 journal = {ACM Trans. Program. Lang. Syst.},4389 issue_date = {Jan. 2000},4390 volume = {22},4391 number = {1},4392 month = jan,4393 year = {2000},4394 issn = {0164-0925},4395 pages = {1--44},4396 numpages = {44},4397 url = {http://doi.acm.org/10.1145/345099.345100},4398 doi = {10.1145/345099.345100},4399 acmid = {345100},4400 publisher = {ACM},4401 address = {New York, NY, USA},4402 keywords = {polymorphism, subtyping, type inference},4403 }4404 4405 4302 @article{Sundell08, 4406 4303 keywords = {lock free, deque}, … … 5373 5270 note = {\href{https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf}{https://\-www.openmp.org/\-wp-content/\-uploads/\-openmp-4.5.pdf}}, 5374 5271 } 5375 5376 @inproceedings{Krebbers14,5377 keywords = {c formalization},5378 contributer = {a3moss@uwaterloo.ca},5379 author = {Krebbers, Robbert},5380 title = {An Operational and Axiomatic Semantics for Non-determinism and Sequence Points in C},5381 booktitle = {Proceedings of the 41st ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},5382 series = {POPL '14},5383 year = {2014},5384 isbn = {978-1-4503-2544-8},5385 location = {San Diego, California, USA},5386 pages = {101--112},5387 numpages = {12},5388 url = {http://doi.acm.org/10.1145/2535838.2535878},5389 doi = {10.1145/2535838.2535878},5390 acmid = {2535878},5391 publisher = {ACM},5392 address = {New York, NY, USA},5393 }5394 5272 5395 5273 @book{Deitel04, … … 7456 7334 } 7457 7335 7458 @article{SysVABI,7459 keywords = {System V ABI},7460 contributer = {a3moss@uwaterloo.ca},7461 title={System {V} application binary interface},7462 author={Matz, Michael and Hubicka, Jan and Jaeger, Andreas and Mitchell, Mark},7463 journal={AMD64 Architecture Processor Supplement, Draft v0},7464 volume={99},7465 year={2013}7466 }7467 7468 7336 % T 7469 7337 … … 7687 7555 Argues against declaring exceptions on routine definitions. 7688 7556 }, 7689 }7690 7691 @techreport{Black90,7692 title={Typechecking polymorphism in {Emerald}},7693 author={Black, Andrew P and Hutchinson, Norman C},7694 year={1990},7695 institution={Cambridge Research Laboratory, Digital Equipment Corporation}7696 7557 } 7697 7558 -
doc/theses/aaron_moss_PhD/phd/background.tex
r673cd63 rffe2fad 5 5 To provide background for the contributions in subsequent chapters, this chapter provides a summary of the features of \CFA{} at the time this work was conducted. 6 6 7 Glen Ditchfield laid out the core design of \CFA{} in his 1992 PhD thesis, \emph{Contextual Polymorphism} \cite{Ditchfield92}; in that thesis, Ditchfield presents the theoretical underpinnings of the \CFA{} polymorphism model.7 The core design of \CFA{} is laid out in Glen Ditchfield's 1992 PhD thesis, \emph{Contextual Polymorphism} \cite{Ditchfield92}; in that thesis, Ditchfield presents the theoretical underpinnings of the \CFA{} polymorphism model. 8 8 Building on Ditchfield's design for contextual polymorphism as well as KW-C \cite{Buhr94a}, an earlier set of (largely syntactic) extensions to C, Richard Bilson \cite{Bilson03} built the first version of the \CFA{} compiler, \CFACC{}, in the early 2000's. 9 9 This early \CFACC{} provided basic functionality, but incorporated a number of algorithmic choices that have failed to scale as \CFA{} has developed, lacking the runtime performance for practical use; this thesis is substantially concerned with rectifying those deficits. … … 13 13 Notable features added during this period include generic types (Chapter~\ref{generic-chap}), constructors and destructors \cite{Schluntz17}, improved support for tuples \cite{Schluntz17}, reference types \cite{Moss18}, first-class concurrent and parallel programming support \cite{Delisle18}, as well as numerous pieces of syntactic sugar and the start of an idiomatic standard library \cite{Moss18}. 14 14 15 This thesis is primarily concerned with the \emph{expression resolution} portion of \CFA{} type-checking; resolution is discussed in more detail in Chapter~\ref{resolution-chap}, but is essentially determining which declarations the identifiers in each expression correspond to. 16 In C, no simultaneously-visible declarations share identifiers, hence expression resolution in C is not difficult. 17 In \CFA{}, multiple added features make the resolution process significantly more complex. 18 Due to this complexity, the expression-resolution pass in \CFACC{} requires 95\% of compiler runtime on some source files, making a new, more efficient procedure for expression resolution a requirement for a performant \CFA{} compiler. 19 20 The features presented in this chapter are chosen to elucidate the design constraints of the work presented in this thesis. 15 The selection of features presented in this chapter are chosen to elucidate the design constraints of the work presented in this thesis. 21 16 In some cases the interactions of multiple features make this design a significantly more complex problem than any individual feature; in other cases a feature that does not by itself add any complexity to expression resolution triggers previously rare edge cases more frequently. 22 17 … … 32 27 33 28 Another aspect of \CFA{}'s procedural paradigm is that it retains C's translation-unit-based encapsulation model, rather than class-based encapsulation such as in \CC{}. 34 As such, any language feature that requires code to be exposed in header files (\eg{} \CC{} templates) also eliminates encapsulation in \CFA{}. 35 Given this constraint, \CFA{} is carefully designed to allow separate compilation for its added language features under the existing C usage patterns. 29 This design choice implies that separate compilation must be maintained to allow headers to act as an encapsulation boundary, rather than the header-only libraries used by \CC{} templates. 36 30 37 31 \section{Name Overloading} \label{overloading-sec} … … 94 88 The addition of !one_t! allows generic algorithms to handle the unit value uniformly for types where it is meaningful; a simple example of this is that polymorphic functions\footnote{discussed in Section~\ref{poly-func-sec}} in the \CFA{} prelude define !++x! and !x++! in terms of !x += 1!, allowing users to idiomatically define all forms of increment for a type !T! by defining the single function !T& ?+=?(T&, one_t)!; analogous overloads for the decrement operators are also present, and programmers can override any of these functions for a particular type if desired. 95 89 96 \CFA{} previously allowed !0! and !1! to be the names of polymorphic variables, with separate overloads for !int 0!, !int 1!, and the polymorphic variable!forall(dtype T) T* 0!.90 \CFA{} previously allowed !0! and !1! to be the names of polymorphic variables, with separate overloads for !int 0!, !int 1!, and !forall(dtype T) T* 0!. 97 91 While designing \CFA{} generic types (see Chapter~\ref{generic-chap}), it was discovered that the parametric polymorphic zero variable is not generalizable to other types; though all null pointers have the same in-memory representation, the same cannot be said of the zero values of arbitrary types. 98 As such, polymorphic variables, and in particular variables for !0! and !1!,were phased out in favour of functions that could generate those values for a given type as appropriate.92 As such, variables that could represent !0! and !1! were phased out in favour of functions that could generate those values for a given type as appropriate. 99 93 100 94 \section{Polymorphic Functions} \label{poly-func-sec} … … 111 105 The type variable !T! is transformed into a set of additional implicit parameters to !identity!, which encode sufficient information about !T! to create and return a variable of that type. 112 106 \CFA{} passes the size and alignment of the type represented by an !otype! parameter, as well as a default constructor, copy constructor, assignment operator, and destructor. 113 Types thatdo not have one or more of these operators visible cannot be bound to !otype! parameters, but may be bound to un-constrained !dtype! (``data type'') type variables.107 Types which do not have one or more of these operators visible cannot be bound to !otype! parameters, but may be bound to un-constrained !dtype! (``data type'') type variables. 114 108 In this design, the runtime cost of polymorphism is spread over each polymorphic call, due to passing more arguments to polymorphic functions; the experiments in Chapter~\ref{generic-chap} indicate that this overhead is comparable to that of \CC{} virtual function calls. 115 109 % \TODO{rerun experiments, possibly look at vtable variant} … … 121 115 \subsection{Type Assertions} 122 116 123 Since bare polymorphic types do not provide a great range of available operations, \CFA{} provides a \emph{type assertion} mechanism to provide further information about a type\footnote{ This example introduces a convention used throughout this thesis of disambiguating overloaded names with subscripts; the subscripts do not appear in \CFA{} source code.}:117 Since bare polymorphic types do not provide a great range of available operations, \CFA{} provides a \emph{type assertion} mechanism to provide further information about a type\footnote{Subscript not included in source code.\label{sub-foot}}: 124 118 125 119 \begin{cfa} … … 135 129 136 130 Monomorphic specializations of polymorphic functions can themselves be used to satisfy type assertions. 137 For instance, !twice! could have been defined like this :131 For instance, !twice! could have been defined like this\footref{sub-foot}: 138 132 139 133 \begin{cfa} … … 142 136 \end{cfa} 143 137 144 Specializing this polymorphic function with !S = double! produces a monomorphic function which can be used to satisfy the type assertion on !four_times!.138 Specializing this polymorphic function with !S = double! produces a monomorphic function which can be used to satisfy the type assertion on !four_times!. 145 139 \CFACC{} accomplishes this by creating a wrapper function calling !twice!$_2$ with !S! bound to !double!, then providing this wrapper function to !four_times!\footnote{\lstinline{twice}$_2$ could also have had a type parameter named \lstinline{T}; \CFA{} specifies renaming of the type parameters, which would avoid the name conflict with the type variable \lstinline{T} of \lstinline{four_times}}. 146 140 However, !twice!$_2$ also works for any type !S! that has an addition operator defined for it. 147 141 148 Finding appropriate functions to satisfy type assertions is essentially a recursive case of expression resolution, as it takes a name (that of the type assertion) and attempts to match it to a suitable declaration in the current scope \footnote{\CFACC{} actually performs a type-unification computation for assertion satisfaction rather than an expression resolution computation; see Section~\ref{assn-sat-sec} for details.}.142 Finding appropriate functions to satisfy type assertions is essentially a recursive case of expression resolution, as it takes a name (that of the type assertion) and attempts to match it to a suitable declaration in the current scope. 149 143 If a polymorphic function can be used to satisfy one of its own type assertions, this recursion may not terminate, as it is possible that that function is examined as a candidate for its own assertion unboundedly repeatedly. 150 144 To avoid such infinite loops, \CFACC{} imposes a fixed limit on the possible depth of recursion, similar to that employed by most \CC{} compilers for template expansion; this restriction means that there are some otherwise semantically well-typed expressions that cannot be resolved by \CFACC{}. 145 146 % \subsection{Deleted Declarations} 147 148 % Particular type combinations can be exempted from matching a given polymorphic function through use of a \emph{deleted function declaration}: 149 150 % \begin{cfa} 151 % int somefn(char) = void; 152 % \end{cfa} 153 154 % This feature is based on a \CCeleven{} feature typically used to make a type non-copyable by deleting its copy constructor and assignment operator\footnote{In previous versions of \CC{}, a type could be made non-copyable by declaring a private copy constructor and assignment operator, but not defining either. This idiom is well-known, but depends on some rather subtle and \CC{}-specific rules about private members and implicitly-generated functions; the deleted function form is both clearer and less verbose.} or forbidding some interpretations of a polymorphic function by specifically deleting the forbidden overloads\footnote{Specific polymorphic function overloads can also be forbidden in previous \CC{} versions through use of template metaprogramming techniques, though this advanced usage is beyond the skills of many programmers. A similar effect can be produced on an ad-hoc basis at the appropriate call sites through use of casts to determine the function type. In both cases, the deleted-function form is clearer and more concise.}. 155 % Deleted function declarations are implemented in \CFACC{} by adding them to the symbol table as usual, but with a flag set that indicates that the function is deleted. 156 % If this deleted declaration is selected as the unique minimal-cost interpretation of an expression than an error is produced. 151 157 152 158 \subsection{Traits} … … 184 190 185 191 Traits, however, are significantly more powerful than nominal-inheritance interfaces; firstly, due to the scoping rules of the declarations that satisfy a trait's type assertions, a type may not satisfy a trait everywhere that that type is declared, as with !char! and the !nominal! trait above. 186 Secondly, because \CFA{} is not object-oriented and types do not have a closed set of methods, existing C library types can be extended to implement a trait simply by writing the requisite functions\footnote{\CC{} only allows partial extension of C types, because constructors, destructors, conversions, and the assignment, indexing, and function-call operators may only be defined in a class; \CFA{} does not have any of these restrictions.}. 192 Secondly, because \CFA{} is not object-oriented and types do not have a closed set of methods, existing C library types can be extended to implement a trait simply by writing the requisite functions\footnote{\CC{} only allows partial extension of C types, because constructors, destructors, conversions, and the assignment, indexing, and function-call operators may only be defined in a class; \CFA{} does not have any of these restrictions.}. 187 193 Finally, traits may be used to declare a relationship among multiple types, a property that may be difficult or impossible to represent in nominal-inheritance type systems\footnote{This example uses \CFA{}'s reference types, described in Section~\ref{type-features-sec}.}: 188 194 … … 206 212 In this example above, !(list_iterator, int)! satisfies !pointer_like! by the user-defined dereference function, and !(list_iterator, list)! also satisfies !pointer_like! by the built-in dereference operator for pointers. 207 213 Given a declaration !list_iterator it!, !*it! can be either an !int! or a !list!, with the meaning disambiguated by context (\eg{} !int x = *it;! interprets !*it! as !int!, while !(*it).value = 42;! interprets !*it! as !list!). 208 While a nominal-inheritance system with associated types could model one of those two relationships by making !El! an associated type of !Ptr! in the !pointer_like! implementation, 209 I am unaware of any nominal-inheritance system that can model both relationships simultaneously. 210 Further comparison of \CFA{} polymorphism with other languages can be found in Section~\ref{generic-related-sec}. 214 While a nominal-inheritance system with associated types could model one of those two relationships by making !El! an associated type of !Ptr! in the !pointer_like! implementation, few such systems could model both relationships simultaneously. 211 215 212 216 The flexibility of \CFA{}'s implicit trait-satisfaction mechanism provides programmers with a great deal of power, but also blocks some optimization approaches for expression resolution. 213 The ability of types to begin or cease to satisfy traits when declarations go into or out of scope makes caching of trait satisfaction judgments difficult, and the ability of traits to take multiple type parameters can lead to a combinatorial explosion of work in any attempt to pre-compute trait satisfaction relationships. 214 215 \subsection{Deleted Declarations} 216 217 Particular type combinations can be exempted from matching a given polymorphic function through the use of a \emph{deleted function declaration}: 218 219 \begin{cfa} 220 int somefn(char) = void; 221 \end{cfa} 222 223 This feature is based on a \CCeleven{} feature typically used to make a type non-copyable by deleting its copy constructor and assignment operator\footnote{In previous versions of \CC{}, a type could be made non-copyable by declaring a private copy constructor and assignment operator, but not defining either. This idiom is well-known, but depends on some rather subtle and \CC{}-specific rules about private members and implicitly-generated functions; the deleted function form is both clearer and less verbose.} or forbidding some interpretations of a polymorphic function by specifically deleting the forbidden overloads\footnote{Specific polymorphic function overloads can also be forbidden in previous \CC{} versions through use of template metaprogramming techniques, though this advanced usage is beyond the skills of many programmers.}. 224 Deleted function declarations are implemented in \CFACC{} by adding them to the symbol table as usual, but with a flag set that indicates that the function is deleted. 225 If this deleted declaration is selected as the unique minimal-cost interpretation of an expression then an error is produced, allowing \CFA{} programmers to guide the expression resolver away from undesirable solutions. 217 The ability of types to begin or cease to satisfy traits when declarations go into or out of scope makes caching of trait satisfaction judgments difficult, and the ability of traits to take multiple type parameters can lead to a combinatorial explosion of work in any attempt to pre-compute trait satisfaction relationships. 226 218 227 219 \section{Implicit Conversions} \label{implicit-conv-sec} … … 232 224 One contribution of this thesis, discussed in Section~\ref{conv-cost-sec}, is a number of refinements to this cost model to more efficiently resolve polymorphic function calls. 233 225 234 In the context of these implicit conversions, the expression resolution problem can be restated as findingthe unique minimal-cost interpretation of each expression in the program, where all identifiers must be matched to a declaration, and implicit conversions or polymorphic bindings of the result of an expression may increase the cost of the expression.226 The expression resolution problem, which is the focus of Chapter~\ref{resolution-chap}, is to find the unique minimal-cost interpretation of each expression in the program, where all identifiers must be matched to a declaration, and implicit conversions or polymorphic bindings of the result of an expression may increase the cost of the expression. 235 227 While semantically valid \CFA{} code always has such a unique minimal-cost interpretation, \CFACC{} must also be able to detect and report as errors expressions that have either no interpretation or multiple ambiguous minimal-cost interpretations. 236 228 Note that which subexpression interpretation is minimal-cost may require contextual information to disambiguate. … … 249 241 Given some type !T!, a !T&! (``reference to !T!'') is essentially an automatically dereferenced pointer. 250 242 These types allow seamless pass-by-reference for function parameters, without the extraneous dereferencing syntax present in C; they also allow easy aliasing of nested values with a similarly convenient syntax. 251 The addition of reference types also eliminated two syntactic special-cases present in previous versions of \CFA{}. 252 Consider the a call !a += b! to a compound assignment operator. 253 The previous declaration for that operator is !lvalue int ?+=?(int*, int)!. 254 To mutate the left argument, the built-in operators were special-cased to implicitly take the address of that argument, while the special !lvalue! syntax was used to mark the return type of a function as a mutable reference. 255 With references, this declaration is re-written as !int& ?+=?(int&, int)!. 256 The reference semantics generalize the implicit address-of on the left argument and allow it to be used in user-declared functions, while also subsuming the (now removed) !lvalue! syntax for function return types. 243 A particular improvement is removing syntactic special cases for operators that take or return mutable values; for example, the use !a += b! of a compound assignment operator now matches its signature, !int& ?+=?(int&, int)!, as opposed to the syntactic special cases in earlier versions of \CFA{} to automatically take the address of the first argument to !+=! and to mark its return value as mutable. 257 244 258 245 The C standard makes heavy use of the concept of \emph{lvalue}, an expression with a memory address; its complement, \emph{rvalue} (a non-addressable expression) is not explicitly named in the standard. 259 246 In \CFA{}, the distinction between lvalue and rvalue can be re-framed in terms of reference and non-reference types, with the benefit of being able to express the difference in user code. 260 \CFA{} references preserve the existing qualifier-dropping implicit lvalue-to-rvalue conversion from C (\eg{} a !const volatile int&! can be implicitly copied to a bare !int!) .247 \CFA{} references preserve the existing qualifier-dropping implicit lvalue-to-rvalue conversion from C (\eg{} a !const volatile int&! can be implicitly copied to a bare !int!) 261 248 To make reference types more easily usable in legacy pass-by-value code, \CFA{} also adds an implicit rvalue-to-lvalue conversion, implemented by storing the value in a compiler-generated temporary variable and passing a reference to that temporary. 262 249 To mitigate the ``!const! hell'' problem present in \CC{}, there is also a qualifier-dropping lvalue-to-lvalue conversion implemented by copying into a temporary: … … 264 251 \begin{cfa} 265 252 const int magic = 42; 253 266 254 void inc_print( int& x ) { printf("%d\n", ++x); } 267 255 268 inc_print( magic ); $\C{// legal; implicitly generated code in red below:}$256 print_inc( magic ); $\C{// legal; implicitly generated code in red below:}$ 269 257 270 258 `int tmp = magic;` $\C{// to safely strip const-qualifier}$ 271 ` inc_print( tmp );` $\C{// tmp is incremented, magic is unchanged}$259 `print_inc( tmp );` $\C{// tmp is incremented, magic is unchanged}$ 272 260 \end{cfa} 273 261 … … 277 265 \CFA{} supports all of these use cases without further added syntax. 278 266 The key to this syntax-free feature support is an observation made by the author that the address of a reference is a lvalue. 279 In C, the address-of operator !&x! can only be applied to lvalue expressions, and always produces an immutable rvalue; \CFA{} supports reference re-binding by assignment to the address of a reference \footnote{The syntactic difference between reference initialization and reference assignment is unfortunate, but preserves the ability to pass function arguments by reference (a reference initialization context) without added syntax.}, and pointers to references by repeating the address-of operator:267 In C, the address-of operator !&x! can only be applied to lvalue expressions, and always produces an immutable rvalue; \CFA{} supports reference re-binding by assignment to the address of a reference, and pointers to references by repeating the address-of operator: 280 268 281 269 \begin{cfa} … … 286 274 \end{cfa} 287 275 288 For better compatibility with C, the \CFA{} team has chosen not to differentiate function overloads based on top-level reference types, and as such their contribution to the difficulty of \CFA{} expression resolution is largely restricted to the implementation details of matching reference to non-reference types during type-checking.289 290 \subsection{Resource Management} \label{ctor-sec}276 For better compatibility with C, the \CFA{} team has chosen not to differentiate function overloads based on top-level reference types, and as such their contribution to the difficulty of \CFA{} expression resolution is largely restricted to the implementation details of normalization conversions and adapters. 277 278 \subsection{Resource Management} 291 279 292 280 \CFA{} also supports the RAII (``Resource Acquisition is Initialization'') idiom originated by \CC{}, thanks to the object lifetime work of Robert Schluntz \cite{Schluntz17}. -
doc/theses/aaron_moss_PhD/phd/conclusion.tex
r673cd63 rffe2fad 7 7 The combination of these practical improvements and added features significantly improve the viability of \CFA{} as a practical programming language. 8 8 9 Further improvements to the \CFA{} type system are still possible, however.9 Further improvements to the \CFA{} type-system are still possible, however. 10 10 One area suggested by this work is development of a scheme for user-defined conversions; to integrate properly with the \CFA{} conversion model, there would need to be a distinction between safe and unsafe conversions, and possibly a way to denote conversions as explicit-only or non-chainable. 11 Another place for ongoing effort is improvement of compilation performance; I believe the most promising direction for that effortis rebuilding the \CFA{} compiler on a different framework than Bilson's \CFACC{}.11 Another place for ongoing effort is improvement of compilation performance; I believe the most promising direction for that is rebuilding the \CFA{} compiler on a different framework than Bilson's \CFACC{}. 12 12 The resolver prototype presented in this work has good performance and already has the basics of \CFA{} semantics implemented, as well as many of the necessary core data structures, and would be a viable candidate for a new compiler architecture. 13 13 An alternate approach would be to fork an existing C compiler such as Clang~\cite{Clang}, which would need to be modified to use one of the resolution algorithms discussed here, as well as various other features introduced by Bilson~\cite{Bilson03}. 14 15 More generally, the algorithmic techniques described in this thesis may be useful to implementors of other programming languages.16 In particular, the demonstration of practical performance for polymorphic return-type inference suggests the possibility of eliding return-type-only template parameters in \CC{} function calls, though integrating such an extension into \CC{} expression resolution in a backwards-compatible manner may be challenging.17 The \CFA{} expression resolution problem also bears some similarity to the \emph{local type inference} model put forward by Pierce \& Turner \cite{Pierce00} and Odersky \etal{} \cite{Odersky01}; compiler implementors for languages like Scala \cite{Scala}, which performs type inference based on this model, may be able to profitably adapt the algorithms and data structures presented in this thesis. -
doc/theses/aaron_moss_PhD/phd/evaluation/algo-summary.dat
r673cd63 rffe2fad 18 18 "\\textsc{co-dca-inc}" 101 50 2824 2 19 19 "\\textsc{co-dca-per}" 103 12 1339 2 20 "\\textsc{td-def-bas}" 67 18 189 3 21 "\\textsc{td-def-inc}" 66 40 263 3 20 22 "\\textsc{td-imm-bas}" 64 125 2620 3 21 23 "\\textsc{td-imm-inc}" 62 322 2717 3 22 "\\textsc{td-def-bas}" 67 18 189 323 "\\textsc{td-def-inc}" 66 40 263 324 24 "\\textsc{td-dca-bas}" 67 18 191 3 25 25 "\\textsc{td-dca-inc}" 66 40 258 3 -
doc/theses/aaron_moss_PhD/phd/evaluation/algo-summary.gp
r673cd63 rffe2fad 48 48 set yrange [0:80] 49 49 50 set label "125" at 18.25,8251 set label "322" at 19.75,8250 set label "125" at 20.25,82 51 set label "322" at 21.75,82 52 52 plot 'evaluation/algo-summary.dat' using ($5 == 1 ? $3 : 1/0):xtic(1) with boxes, \ 53 53 '' using ($5 == 2 ? $3 : 1/0):xtic(1) with boxes, \ -
doc/theses/aaron_moss_PhD/phd/evaluation/cfa-plots.gp
r673cd63 rffe2fad 6 6 set output BUILD."cfa-time.tex" 7 7 8 set xlabel "\\textsc{cfa-co} runtime ( s)"9 set ylabel "runtime ( s)"8 set xlabel "\\textsc{cfa-co} runtime (ms)" 9 set ylabel "runtime (ms)" 10 10 set logscale xy 11 11 set key outside 12 12 13 plot for [i=2: 5] 'evaluation/cfa-time.tsv' using 2:i title columnheader13 plot for [i=2:6] 'evaluation/cfa-time.tsv' using 2:i title columnheader 14 14 15 15 # MEMORY # … … 18 18 set ylabel "peak memory (MB)" 19 19 20 plot for [i=2:5] 'evaluation/cfa-mem-by-time.tsv' using 7:(column(i)/1000) title columnheader 21 22 # SPEEDUP # 23 set output BUILD."cfa-speedup.tex" 24 25 set ylabel "speedup w.r.t. baseline" 26 unset logscale y 27 28 plot 'evaluation/cfa-time.tsv' using 2:(column(2)/column(2)) title 'baseline', \ 29 '' using 2:(column(2)/column(3)) title columnheader, \ 30 '' using 2:(column(3)/column(4)) title 'inter-round', \ 31 '' using 2:(column(4)/column(5)) title columnheader 20 plot for [i=2:6] 'evaluation/cfa-mem-by-time.tsv' using 7:(column(i)/1000) title columnheader 32 21 33 22 # # RUNTIME SPEEDUP # -
doc/theses/aaron_moss_PhD/phd/evaluation/per-prob-scatter.gp
r673cd63 rffe2fad 42 42 set output BUILD."per-prob-assns.tex" 43 43 44 set xlabel "assertions satisfied"44 set xlabel "assertions resolved" 45 45 46 46 plot for [i=1:words(tests)] 'evaluation/per_prob/'.word(tests, i).'-per-prob.csv' using 6:(column(7)/1000) title word(tests, i) -
doc/theses/aaron_moss_PhD/phd/experiments.tex
r673cd63 rffe2fad 7 7 8 8 \CFACC{} can generate realistic test inputs for the resolver prototype from equivalent \CFA{} code; 9 the generated test inputs currently comprise all \CFA{} code currently in existence\footnote{Though \CFA{} is backwards-compatible with C, the lack of \lstinline{forall} functions and name overloading in C mean that the larger corpus of C code does not provide challenging test instances for \CFACC{}.}, 9,000 lines drawn primarily from the standard library and compiler test suite. 10 This code includes a substantial degree of name overloading for common library functions and a number of fundamental polymorphic abstractions, including iterators and streaming input/output. 9 the generated test inputs currently comprise all \CFA{} code currently extant, $9,000$ lines drawn primarily from the standard library and compiler test suite. 11 10 \CFACC{} is also instrumented to produce a number of code metrics. 12 11 These metrics were used to construct synthetic test inputs during development of the resolver prototype; these synthetic inputs provided useful design guidance, but the performance results presented in this chapter are based on the more realistic directly-generated inputs. 12 13 % There are three sources of problem instances for the resolver prototype. 14 % The first is small, hand-written tests designed to test the expressive power and correctness of the prototype. 15 % These tests are valuable for regression testing, but not time-consuming enough to be useful performance tests. 16 % The second sort of problem instances are procedurally generated according to a set of parameters (distributions of polymorphic versus monomorphic functions, number of function arguments, number of types, \etc{}); the procedural problem generator can be used to explore the behaviour of an algorithm with respect to certain sorts of problem instances by varying the input parameters. 17 % I have implemented a flagged \CFACC{} pass which outputs information which can be used to initialize the procedural generator's parameters to realistic values. 18 % The final sort of problem instances are derived from actual \CFA{} code. 19 % The prototype has a rich enough representation of \CFA{} that actual instances of expression resolution can be expressed with good fidelity, and I have implemented a compiler pass for \CFACC{} which can generate instances from \CFA{} code. 20 % Since at this juncture all development in \CFA{} is done by our research team, I have tested the prototype system on all \CFA{} code currently extant, primarily the standard library and compiler test suite. 13 21 14 22 \section{Resolver Prototype Features} \label{rp-features-sec} … … 28 36 Generic named types are used to represent the built-in parameterized types of \CFA{} as well; !T*! is encoded as \texttt{\#\$ptr<T>}. 29 37 \CFA{} arrays are also represented as pointers, to simulate array-to-pointer decay, while top-level reference types are replaced by their referent to simulate the variety of reference conversions. 30 \emph{Function types} have first-class representation in the prototype as well; \CFA{} function pointers are represented as variables with the appropriate function type, though \CFA{} polymorphic function pointers cannot be represented, as the prototype system stores information about type assertions in function declarations rather than in the function type.38 \emph{Function types} have first-class representation in the prototype as well; \CFA{} function function pointers are represented as variables with the appropriate function type, though \CFA{} polymorphic function pointers cannot be represented, as the prototype system function type does not store information about type assertions. 31 39 \emph{Void} and \emph{tuple types} are also supported in the prototype, to express the multiple-return-value functions in \CFA{}, though varargs functions and !ttype! tuple-typed type variables are absent from the prototype system. 32 40 The prototype system also does not represent type qualifiers (\eg{} !const!, !volatile!), so all such qualifiers are stripped during conversion to the prototype system. … … 40 48 The main area for future expansion in the design of the resolver prototype is conversions. 41 49 Cast expressions are implemented in the output language of the resolver, but cannot be expressed in the input. 42 The only implicit conversions supported are among the arithmetic-like concrete types, which capture most, but not all, of \CFA{}'s built-in implicit conversions\footnote{Notable absences include \lstinline{void*} to other pointer types, or \lstinline{0} to pointer types.}.50 The only implicit conversions supported are among the arithmetic-like concrete types, which captures most, but not all, of \CFA{}'s built-in implicit conversions\footnote{Notable absences include \lstinline{void*} to other pointer types, or \lstinline{0} to pointer types.}. 43 51 Future work should include a way to express implicit (and possibly explicit) conversions in the input language, with an investigation of the most efficient way to handle implicit conversions, and potentially a design for user-defined conversions. 44 52 … … 51 59 The primary architectural difference between the resolver prototype and \CFACC{} is that the prototype system uses a simple mark-and-sweep garbage collector for memory management, while \CFACC{} uses a manual memory-management approach. 52 60 This architectural difference affects the mutation patterns used by both systems: \CFACC{} frequently makes deep clones of multi-node object graphs to ensure that there is a single ``owner'' for each object which can safely delete it later; the prototype system, by contrast, relies on its garbage collector to handle ownership, and can often copy pointers rather than cloning objects. 53 The resolver prototype thus only needs to clone nodes thatit modifies, and can share un-modified children between clones; the tree mutator abstraction in the prototype is designed to take advantage of this property.61 The resolver prototype thus only needs to clone nodes which it modifies, and can share un-modified children between clones; the tree mutator abstraction in the prototype is designed to take advantage of this property. 54 62 The key design decision enabling this is that all child nodes are held by !const! pointer, and thus cannot be mutated once they have been stored in a parent node. 55 63 With minimal programming discipline, it can thus be ensured that any expression is either mutable or shared, but never both; the Dotty research compiler for Scala takes a similar architectural approach \cite{Dotty-github}. … … 90 98 \item[Basic] (\textsc{bas}) Bilson-style type environment with hash-based equivalence class storage, as discussed in Section~\ref{naive-env-sec}. 91 99 \item[Incremental Inheritance] (\textsc{inc}) Incremental-inheritance variant sharing unmodified common parent information among environments, as discussed in Section~\ref{inc-env-sec}. 92 \item[Persistent union-find] (\textsc{per}) Union-find-based environment, using the persistent variant discussed in Section~\ref{env-persistent-union-find} for backtracking and combination. This variant requires that all pairs of type arguments used as arguments to $combine$ descen dfrom a common root environment; this requirement is incompatible with the caching used in the top-down traversal direction, and thus no \textsc{td-*-per} algorithms are tested.100 \item[Persistent union-find] (\textsc{per}) Union-find-based environment, using the persistent variant discussed in Section~\ref{env-persistent-union-find} for backtracking and combination. This variant requires that all pairs of type arguments used as arguments to $combine$ descent from a common root environment; this requirement is incompatible with the caching used in the top-down traversal direction, and thus no \textsc{td-*-per} algorithms are tested. 93 101 \end{description} 94 102 \end{description} … … 108 116 As a matter of experimental practicality, test runs that exceeded 8~GB of peak resident memory usage are excluded from the data set. 109 117 This restriction is justifiable by real-world use, as a compiler that is merely slow may be accommodated with patience, but one that uses in excess of 8~GB of RAM may be impossible to run on many currently deployed computer systems. 110 8~GB of RAM is not typical of the memory usage of the best-pe rforming two variants, \textsc{bu-dca-bas} and \textsc{bu-dca-per}, which were able to run all 131 test inputs to completionwith maximum memory usage of 70~MB and 78~MB, respectively.118 8~GB of RAM is not typical of the memory usage of the best-peforming two variants, \textsc{bu-dca-bas} and \textsc{bu-dca-per}, which were able to run all 131 test inputs to completion with maximum memory usage of 70~MB and 78~MB, respectively. 111 119 However, this threshold did eliminate a significant number of algorithm-test variants, with the worst-performing variant, \textsc{td-imm-inc}, only completing 62 test inputs within the memory bound. 112 120 Full results for tests completed by algorithm variant are presented in Figure~\ref{tests-completed-fig}. … … 143 151 % \end{figure} 144 152 145 It can be seen from these results that th e top-down, immediate assertion-satisfaction (\textsc{td-imm-*}) variants are particularly inefficient, as they check a significant number of assertions without filtering to determine if the arguments can be made to fit.153 It can be seen from these results that that the top-down, immediate assertion-satisfaction (\textsc{td-imm-*}) variants are particularly inefficient, as they check a significant number of assertions without filtering to determine if the arguments can be made to fit. 146 154 It is also clear that the bottom-up (\textsc{bu}) traversal order is better than both top-down (\textsc{td}) and the Bilson-style bottom-up-combined (\textsc{co}) orders. 147 155 While the advantage of \textsc{bu} over \textsc{co} is clear, in that it performs less redundant work if a prefix of a combination fails, the advantage of \textsc{bu} over \textsc{td} provides an answer for an open question from Baker \cite{Baker82}. … … 162 170 This instrumented resolver is then run on a set of difficult test instances; to limit the data collection task, these runs are restricted to the best-performing \textsc{bu-dca-per} algorithm and test inputs taking more than 1~s to complete. 163 171 164 The 13 test inputs thus selected contain 20 ,632 top-level expressions among them, which are separated into order-of-magnitude bins by runtime in Figure~\ref{per-prob-histo-fig}.172 The 13 test inputs thus selected contain 20632 top-level expressions among them, which are separated into order-of-magnitude bins by runtime in Figure~\ref{per-prob-histo-fig}. 165 173 As can be seen from this figure, overall runtime is dominated by a few particularly difficult problem instances --- the 60\% of expressions that resolve in under 0.1~ms collectively take less time to resolve than any of the 0.2\% of expressions that take at least 100~ms to resolve. 166 174 On the other hand, the 46 expressions in that 0.2\% take 38\% of the overall time in this difficult test suite, while the 201 expressions that take between 10 and 100~ms to resolve consume another 30\%. … … 202 210 \section{\CFA{} Results} \label{cfa-results-sec} 203 211 204 I have integrated a numberof the algorithmic techniques discussed in this chapter into \CFACC{}.212 I have integrated most of the algorithmic techniques discussed in this chapter into \CFACC{}. 205 213 This integration took place over a period of months while \CFACC{} was under active development on a number of other fronts, so it is not possible to completely isolate the effects of the algorithmic changes, but I believe the algorithmic changes to have had the most-significant effects on performance over the study period. 206 214 To generate this data, representative commits from the \texttt{git} history of the project were checked out and compiled, then run on the same machine used for the resolver prototype experiments discussed in Section~\ref{proto-exp-sec}. … … 209 217 I performed two rounds of modification to \CFACC{}; the first round moved from Bilson's original combined-bottom-up algorithm to an un-combined bottom-up algorithm, denoted \textsc{cfa-co} and \textsc{cfa-bu}, respectively. 210 218 A top-down algorithm was not attempted in \CFACC{} due to its poor performance in the prototype. 211 The second round of modifications addressed assertion satisfaction, taking Bilson's original \textsc{cfa-imm} algorithm and modifying it to use the deferred approach \textsc{cfa-def}. 212 Due to time constraints, a deferred-cached assertion satisfaction algorithm for \CFACC{} could not be completed, but both preliminary results from this effort and the averaged prototype results from Section~\ref{proto-exp-sec} indicate that assertion satisfaction caching is not likely to be a fruitful optimization for \CFACC{}. 219 The second round of modifications addressed assertion satisfaction, taking Bilson's original \textsc{cfa-imm} algorithm, and iteratively modifying it, first to use the deferred approach \textsc{cfa-def}, then caching those results in the \textsc{cfa-dca} algorithm. 213 220 The new environment data structures discussed in Section~\ref{proto-exp-sec} have not been successfully merged into \CFACC{} due to their dependencies on the garbage-collection framework in the prototype; I spent several months modifying \CFACC{} to use similar garbage collection, but due to \CFACC{} not being designed to use such memory management the performance of the modified compiler was non-viable. 214 221 It is possible that the persistent union-find environment could be modified to use a reference-counted pointer internally without changing the entire memory-management framework of \CFACC{}, but such an attempt is left to future work. 215 222 216 As can be seen in Figures~\ref{cfa-time-fig}--\ref{cfa-mem-fig}, the time and peak memory results for these five versions of \CFACC{} show that assertion resolution dominates total resolution cost, with the \textsc{cfa-def} variant running consistently faster than the others on more expensive test cases, and the speedup from the deferred approach increasing with the difficulty of the test case. 217 The results from \CFACC{} for \textsc{cfa-co} \vs{} \textsc{cfa-bu} do not mirror those from the prototype; I conjecture this is mostly due to the different memory-management schemes and sorts of data required to run type unification and assertion satisfaction calculations, as \CFACC{} performance has proven to be particularly sensitive to the amount of heap allocation performed. 218 This data also shows a noticeable regression in compiler performance in the eleven months between \textsc{cfa-bu} and \textsc{cfa-imm}, which use the same resolution algorithms; this approximate doubling in runtime is not due to expression resolution, as no integration work happened in this time, but I am unable to ascertain its actual cause. 219 To isolate the effects of the algorithmic changes from this unrelated performance regression, the speedup results in Figure~\ref{cfa-speedup-fig} are shown with respect to the start of each modification round, \textsc{cfa-bu} \vs{} \textsc{cfa-co} and \textsc{cfa-def} \vs{} \textsc{cfa-imm}. 223 As can be seen in Figures~\ref{cfa-time-fig} and~\ref{cfa-mem-fig}, the time and peak memory results for these five versions of \CFACC{} show that assertion resolution dominates total resolution cost, with the \textsc{cfa-def} and \textsc{cfa-dca} variants running consistently faster than the others on more expensive test cases; no difference can be seen between these two algorithms' performance, but which agrees with the prototype experiments in Section~\ref{proto-exp-sec}. 224 The results from \CFACC{} for \textsc{cfa-co} \textit{vs.}\ \textsc{cfa-bu} do not mirror those from the prototype; I conjecture this is mostly due to the different memory-management schemes and sorts of data required to run type unification and assertion satisfaction calculations, as \CFACC{} performance has proven to be particularly sensitive to the amount of heap allocation performed. 225 This data also shows a noticeable regression in compiler performance in the eleven months between \textsc{cfa-bu} and \textsc{cfa-imm}, which use the same resolution algorithms; this regression is not due to expression resolution, as no integration work happened in this time, but I am unable to ascertain its actual cause. 220 226 It should also be noted with regard to the peak memory results in Figure~\ref{cfa-mem-fig} that the peak memory usage does not always occur during the resolution phase of the compiler. 221 227 … … 224 230 \input{cfa-time} 225 231 \caption[\CFACC{} runtime against \textsc{cfa-co} baseline.]{\CFACC{} runtime against \textsc{cfa-co} baseline. Note log scales on both axes.} \label{cfa-time-fig} 226 \end{figure}227 228 \begin{figure}229 \centering230 \input{cfa-speedup}231 \caption[\CFACC{} speedup.]{\CFACC{} speedup against against \textsc{cfa-co} baseline runtime. To isolate the effect of algorithmic changes, \textsc{cfa-bu} speedup is \vs{} \textsc{cfa-co} while \textsc{cfa-def} speedup is \vs{} \textsc{cfa-imm}. The `inter-round' series shows slowdown between \textsc{cfa-bu} and \textsc{cfa-imm}.} \label{cfa-speedup-fig}232 232 \end{figure} 233 233 … … 246 246 % some data you collected personally for imm vs. def vs. dca variants in resolv-proto/logs/rp-bu-tec_vs_cfacc.ods 247 247 248 % look back at Resolution Algorithms section for threads to tie up "does the algorithm look like this?" 249 248 250 \section{Conclusion} 249 251 250 The dominant factor in the cost of \CFA{} expression resolution is assertion satisfaction.252 As can be seen from the prototype results, per-expression benchmarks, and \CFACC{}, the dominant factor in the cost of \CFA{} expression resolution is assertion satisfaction. 251 253 Reducing the total number of assertions satisfied, as in the deferred satisfaction algorithm, is consistently effective at reducing runtime, and caching results of these satisfaction problem instances has shown promise in the prototype system. 252 254 The results presented here also demonstrate that a bottom-up approach to expression resolution is superior to top-down, settling an open question from Baker~\cite{Baker82}. -
doc/theses/aaron_moss_PhD/phd/frontpgs.tex
r673cd63 rffe2fad 67 67 External Examiner: \> Doug Lea \\ 68 68 \> Professor, Computer Science Department, \\ 69 \> State Unive rsity of New York at Oswego \\69 \> State Univesity of New York at Oswego \\ 70 70 \end{tabbing} 71 71 \bigskip … … 141 141 The compilation performance improvements have all been experimentally validated with a new prototype system that encapsulates the key aspects of the \CFA{} language; this prototype is a promising basis for future research and a technical contribution of this work. 142 142 143 \CFA{}, extended and refined in this thesis, presents both an independently interesting combination of language features and a comprehensive approach to the modernization of C.144 This work demonstrates the hitherto unproven compiler-implementation viability of the \CFA{} language design, and provides a number of useful tools to implementors of other languages.145 146 143 \cleardoublepage 147 144 … … 154 151 This thesis would not exist in the form it does without the mentorship of my advisor, Peter Buhr, who has ably led the \CFA{} team while giving me both the advantage of his decades of experience and the freedom to follow my own interests. 155 152 156 My work on \CFA{} does not exist in a vac uum, and it has been a pleasure and a privilege to collaborate with the members of the \CFA{} team: Andrew Beach, Richard Bilson, Michael Brooks, Bryan Chan, Thierry Delisle, Glen Ditchfield, Brice Dobry, Rob Schluntz, and others.153 My work on \CFA{} does not exist in a vaccuum, and it has been a pleasure and a privilege to collaborate with the members of the \CFA{} team: Andrew Beach, Richard Bilson, Michael Brooks, Bryan Chan, Thierry Delisle, Glen Ditchfield, Brice Dobry, Rob Schluntz, and others. 157 154 I gratefully acknowledge the financial support of the National Science and Engineering Council of Canada and Huawei Ltd.\ for this project. 158 155 I would also like to thank of my thesis committee, Werner Dietl, Doug Lea, Ond\v{r}ej Lhot\a'ak, and Gregor Richards, for the time and effort they have invested in providing constructive feedback to refine this work. 159 156 I am indebted to Peter van Beek and Ian Munro for their algorithmic expertise and willingness to share their time with me. 160 I have far too many colleagues in the Programming Languages Group and School of Computer Science to name, but I deeply appreciate their camarad erie; specifically with regard to the production of this thesis, I would like to thank Nathan Fish for recommending my writing soundtrack, and Sharon Choy for her unfailing supply of encouraging rabbit animations.157 I have far too many colleagues in the Programming Languages Group and School of Computer Science to name, but I deeply appreciate their camaradarie; specifically with regard to the production of this thesis, I would like to thank Nathan Fish for recommending my writing soundtrack, and Sharon Choy for her unfailing supply of encouraging rabbit animations. 161 158 162 159 Finally, to all my friends and family who have supported me and made Kitchener-Waterloo home these past seven years, thank you, I could not have done it without you; most especially, Christina Moss, you are the best of wives and best of women, your support has kept me going through the ups and downs of research, and your partnership is key to all my successes. -
doc/theses/aaron_moss_PhD/phd/generic-bench.tex
r673cd63 rffe2fad 1 1 \chapter{Generic Stack Benchmarks} \label{generic-bench-app} 2 2 3 This appendix includes the generic stack code for all four language variants discussed in Section~\ref{generic-performance-sec}. Throughout, !/***/! designates a counted redundant type annotation; these include !sizeof! on a known type, repetition of a type name in initialization or return statements, and type-specific helper functions. 4 The code is reformatted slightly for brevity. 3 Throughout, !/***/! designates a counted redundant type annotation; code reformatted slightly for brevity. 5 4 6 5 \section{C} -
doc/theses/aaron_moss_PhD/phd/generic-types.tex
r673cd63 rffe2fad 27 27 int int_list_head( const struct int_list* ls ) { return ls->value; } 28 28 29 // all code must be duplicated for every generic instantiation29 $\C[\textwidth]{// all code must be duplicated for every generic instantiation}$ 30 30 31 31 struct string_list { const char* value; struct string_list* next; }; … … 40 40 { return ls->value; } 41 41 42 // use is efficient and idiomatic42 $\C[\textwidth]{// use is efficient and idiomatic}$ 43 43 44 44 int main() { … … 65 65 struct list { void* value; struct list* next; }; 66 66 67 // internal memory management requires helper functions67 $\C[\textwidth]{// internal memory management requires helper functions}$ 68 68 69 69 void list_insert( struct list** ls, void* x, void* (*copy)(void*) ) { … … 75 75 void* list_head( const struct list* ls ) { return ls->value; } 76 76 77 // helpers duplicated per type77 $\C[\textwidth]{// helpers duplicated per type}$ 78 78 79 79 void* int_copy(void* x) { … … 105 105 #include <stdio.h> $\C{// for printf}$ 106 106 107 // code is nested in macros107 $\C[\textwidth]{// code is nested in macros}$ 108 108 109 109 #define list(N) N ## _list … … 127 127 define_list(string, const char*); $\C[3in]{// defines string\_list}$ 128 128 129 // use is efficient, but syntactically idiosyncratic129 $\C[\textwidth]{// use is efficient, but syntactically idiosyncratic}$ 130 130 131 131 int main() { … … 143 143 \end{figure} 144 144 145 \CC{}, Java, and other languages use \emph{generic types} (or \emph{parameterized types})to produce type-safe abstract data types.146 Design and implementation of generic types for \CFA{} is the first major contribution of this thesis, a summary of which is published in \cite{Moss18} and onwhich this chapter is closely based.145 \CC{}, Java, and other languages use \emph{generic types} to produce type-safe abstract data types. 146 Design and implementation of generic types for \CFA{} is the first major contribution of this thesis, a summary of which is published in \cite{Moss18} and from which this chapter is closely based. 147 147 \CFA{} generic types integrate efficiently and naturally with the existing polymorphic functions in \CFA{}, while retaining backward compatibility with C in layout and support for separate compilation. 148 148 A generic type can be declared in \CFA{} by placing a !forall! specifier on a !struct! or !union! declaration, and instantiated using a parenthesized list of types after the generic name. … … 156 156 forall(otype T) struct list { T value; list(T)* next; }; 157 157 158 // single polymorphic implementation of each function159 // overloading reduces need for namespace prefixes158 $\C[\textwidth]{// single polymorphic implementation of each function}$ 159 $\C[\textwidth]{// overloading reduces need for namespace prefixes}$ 160 160 161 161 forall(otype T) void insert( list(T)** ls, T x ) { … … 167 167 forall(otype T) T head( const list(T)* ls ) { return ls->value; } 168 168 169 // use is clear and efficient169 $\C[\textwidth]{// use is clear and efficient}$ 170 170 171 171 int main() { … … 189 189 A key insight for this design is that C already possesses a handful of built-in generic types (\emph{derived types} in the language of the standard \cite[\S{}6.2.5]{C11}), notably pointer (!T*!) and array (!T[]!), and that user-definable generics should act similarly. 190 190 191 \subsection{Related Work} \label{generic-related-sec}191 \subsection{Related Work} 192 192 193 193 One approach to the design of generic types is that taken by \CC{} templates \cite{C++}. … … 202 202 Java \cite{Java8} has another prominent implementation for generic types, introduced in Java~5 and based on a significantly different approach than \CC{}. 203 203 The Java approach has much more in common with the !void*!-polymorphism shown in Figure~\ref{void-generic-fig}; since in Java nearly all data is stored by reference, the Java approach to polymorphic data is to store pointers to arbitrary data and insert type-checked implicit casts at compile-time. 204 This process of \emph{type erasure} has the benefit of allowing a single instantiation of polymorphic code, but relies heavily on Java's object model .204 This process of \emph{type erasure} has the benefit of allowing a single instantiation of polymorphic code, but relies heavily on Java's object model and garbage collector. 205 205 To use this model, a more C-like language such as \CFA{} would be required to dynamically allocate internal storage for variables, track their lifetime, and properly clean them up afterward. 206 206 … … 277 277 A \emph{dtype-static} type has polymorphic parameters but is still concrete. 278 278 Polymorphic pointers are an example of dtype-static types; given some type variable !T!, !T! is a polymorphic type, but !T*! has a fixed size and can therefore be represented by a !void*! in code generation. 279 In particular, generic types where all parameters are un-!sized! (\ie{} they do not conform to the built-in !sized! trait , which is satisfied by all types the compiler knows the size and alignment of) are always concrete, as there is no possibility for their layout to vary based on type parameters of unknown size and alignment.279 In particular, generic types where all parameters are un-!sized! (\ie{} they do not conform to the built-in !sized! trait because the compiler does not know their size and alignment) are always concrete, as there is no possibility for their layout to vary based on type parameters of unknown size and alignment. 280 280 More precisely, a type is concrete if and only if all of its !sized! type parameters are concrete, and a concrete type is dtype-static if any of its type parameters are (possibly recursively) polymorphic. 281 281 To illustrate, the following code using the !pair! type from above has each use of !pair! commented with its class: … … 333 333 \end{cfa} 334 334 335 Here, !_assign_T! is passed in as an implicit parameter from !otype T! and takes two !T*! (!void*! in the generated code \footnote{A GCC extension allows arithmetic on \lstinline{void*}, calculated as if \lstinline{sizeof(void) == 1}.}), a destination and a source, and !_retval! is the pointer to a caller-allocated buffer for the return value, the usual \CFA{} method to handle dynamically-sized return types.335 Here, !_assign_T! is passed in as an implicit parameter from !otype T! and takes two !T*! (!void*! in the generated code), a destination and a source, and !_retval! is the pointer to a caller-allocated buffer for the return value, the usual \CFA{} method to handle dynamically-sized return types. 336 336 !_offsetof_pair! is the offset array passed into !value!; this array is statically generated at the call site as: 337 337 … … 350 350 Similarly, the layout function can only safely be called from a context where the generic type definition is visible, because otherwise the caller does not know how large to allocate the array of member offsets. 351 351 352 The C standard does not specify a memory layout for structs, but the System V ABI \cite{SysVABI} does; compatibility with this standard is sufficient for \CFA{}'s currently-supported architectures, though future ports may require different layout-function generation algorithms.352 The C standard does not specify a memory layout for structs, but the POSIX ABI for x86 \cite{POSIX08} does; this memory layout is common for C implementations, but is a platform-specific issue for porting \CFA{}. 353 353 This algorithm, sketched below in pseudo-\CFA{}, is a straightforward mapping of consecutive fields into the first properly-aligned offset in the !struct! layout; layout functions for !union! types omit the offset array and simply calculate the maximum size and alignment over all union variants. 354 354 Since \CFACC{} generates a distinct layout function for each type, constant-folding and loop unrolling are applied. … … 423 423 424 424 To validate the practicality of this generic type design, microbenchmark-based tests were conducted against a number of comparable code designs in C and \CC{}, first published in \cite{Moss18}. 425 Since these languages are all C-based and compiled with the same compiler backend, maximal-performance benchmarks should show little runtime variance, differing only in length and clarity of source code.425 Since all these languages are all C-based and compiled with the same compiler backend, maximal-performance benchmarks should show little runtime variance, differing only in length and clarity of source code. 426 426 A more illustrative comparison measures the costs of idiomatic usage of each language's features. 427 The code below shows the \CFA{} benchmark tests for a generic stack based on a singly-linked list; the test suite is equivalent for the other languages, code for which is included in Appendix~\ref{generic-bench-app}.428 The experiment uses element types !int! and !pair(short, char)! and pushes $N = 4 M$ elements on a generic stack, copies the stack, clears one of the stacks, and finds the maximum value in the other stack.427 The code below shows the \CFA{} benchmark tests for a generic stack based on a singly-linked list; the test suite is equivalent for the other other languages, code for which is included in Appendix~\ref{generic-bench-app}. 428 The experiment uses element types !int! and !pair(short, char)! and pushes $N = 40M$ elements on a generic stack, copies the stack, clears one of the stacks, and finds the maximum value in the other stack. 429 429 430 430 \begin{cfa} … … 451 451 452 452 The four versions of the benchmark implemented are C with !void*!-based polymorphism, \CFA{} with parametric polymorphism, \CC{} with templates, and \CC{} using only class inheritance for polymorphism, denoted \CCV{}. 453 The \CCV{} variant illustrates an alternative object-oriented idiom where all objects inherit from a base !object! class, a language design similar to Java 4; in particular, runtime checks are necessary to safely downcast objects.453 The \CCV{} variant illustrates an alternative object-oriented idiom where all objects inherit from a base !object! class, mimicking a Java-like interface; in particular, runtime checks are necessary to safely downcast objects. 454 454 The most notable difference among the implementations is the memory layout of generic types: \CFA{} and \CC{} inline the stack and pair elements into corresponding list and pair nodes, while C and \CCV{} lack such capability and, instead, must store generic objects via pointers to separately allocated objects. 455 455 Note that the C benchmark uses unchecked casts as C has no runtime mechanism to perform such checks, whereas \CFA{} and \CC{} provide type safety statically. … … 481 481 482 482 The C and \CCV{} variants are generally the slowest and have the largest memory footprint, due to their less-efficient memory layout and the pointer indirection necessary to implement generic types in those languages; this inefficiency is exacerbated by the second level of generic types in the pair benchmarks. 483 By contrast, the \CFA{} and \CC{} variants run in noticeably lesstime for both the integer and pair because of the equivalent storage layout, with the inlined libraries (\ie{} no separate compilation) and greater maturity of the \CC{} compiler contributing to its lead.483 By contrast, the \CFA{} and \CC{} variants run in roughly equivalent time for both the integer and pair because of the equivalent storage layout, with the inlined libraries (\ie{} no separate compilation) and greater maturity of the \CC{} compiler contributing to its lead. 484 484 \CCV{} is slower than C largely due to the cost of runtime type checking of downcasts (implemented with !dynamic_cast!); the outlier for \CFA{}, pop !pair!, results from the complexity of the generated-C polymorphic code. 485 485 The gcc compiler is unable to optimize some dead code and condense nested calls; a compiler designed for \CFA{} could more easily perform these optimizations. 486 Finally, the binary size for \CFA{} is larger because of static linking with the \CFA{} prelude library, which includes function definitions for all the built-in operators.486 Finally, the binary size for \CFA{} is larger because of static linking with \CFA{} libraries. 487 487 488 488 \CFA{} is also competitive in terms of source code size, measured as a proxy for programmer effort. -
doc/theses/aaron_moss_PhD/phd/introduction.tex
r673cd63 rffe2fad 1 1 \chapter{Introduction} 2 2 3 The C programming language ~\cite{C11}has had a wide-ranging impact on the design of software and programming languages.4 In the 30 years since its first standardization, it has consistently been one of the most popular programming languages, with billions of lines of C code still in active use, and tens of thousands of trained programmers producing it. The TIOBE index ~\cite{TIOBE} tracks popularity of programming languages over time, and C has never dropped below second place:3 The C programming language has had a wide-ranging impact on the design of software and programming languages. 4 In the 30 years since its first standardization, it has consistently been one of the most popular programming languages, with billions of lines of C code still in active use, and tens of thousands of trained programmers producing it. The TIOBE index\cite{TIOBE} tracks popularity of programming languages over time, and C has never dropped below second place: 5 5 6 6 \begin{table}[h] … … 19 19 20 20 The impact of C on programming language design is also obvious from Table~\ref{tiobe-table}; with the exception of Python, all of the top five languages use C-like syntax and control structures. 21 \CC{} ~\cite{C++}is even a largely backwards-compatible extension of C.21 \CC{} is even a largely backwards-compatible extension of C. 22 22 Though its lasting popularity and wide impact on programming language design point to the continued relevance of C, there is also widespread desire of programmers for languages with more expressive power and programmer-friendly features; accommodating both maintenance of legacy C code and development of the software of the future is a difficult task for a single programming language. 23 23 … … 26 26 The new features make \CFA{} more powerful and expressive than C, while maintaining strong backward-compatibility with both C code and the procedural paradigm expected by C programmers. 27 27 Unlike other popular C extensions like \CC{} and Objective-C, \CFA{} adds modern features to C without imposing an object-oriented paradigm to use them. 28 However, these new features do impose a compile-time cost, particularly in the expression resolver, which must evaluate the typing rules of a significantly more complex type system.28 However, these new features do impose a compile-time cost, particularly in the expression resolver, which must evaluate the typing rules of a significantly more complex type-system. 29 29 30 30 This thesis is focused on making \CFA{} a more powerful and expressive language, both by adding new features to the \CFA{} type system and ensuring that both added and existing features can be efficiently implemented in \CFACC{}, the \CFA{} reference compiler. … … 37 37 \end{itemize} 38 38 39 The prototype system, which implements the algorithmic contributions of this thesis, is the first performant type-checker implementation for a \CFA{}-style type system. 40 As the existence of an efficient compiler is necessary for the practical viability of a programming language, the contributions of this thesis comprise a validation of the \CFA{} language design that was previously lacking. 41 42 Though the direction and experimental validation of this work is fairly narrowly focused on the \CFA{} programming language, the tools used and results obtained should be of interest to a wider compiler and programming language design community. 43 In particular, with the addition of \emph{concepts} in \CCtwenty{}~\cite{C++Concepts}, conforming \CC{} compilers must support a model of type assertions very similar to that in \CFA{}, and the algorithmic techniques used here may prove useful. 44 Much of the difficulty of type-checking \CFA{} stems from the language design choice to allow overload selection from the context of a function call based on function return type in addition to the type of the arguments to the call; this feature allows the programmer to specify fewer redundant type annotations on functions that are polymorphic in their return type. 45 As an example in \CC{}: 46 \begin{C++} 47 template<typename T> T* zero() { return new T( 0 ); } 48 49 int* z = zero<int>(); $\C{// must specify int twice}$ 50 \end{C++} 51 52 \CFA{} allows !int* z = zero()!, which elides the second !int!. 53 While the !auto! keyword in \CCeleven{} supports similar inference in a limited set of contexts (\eg{} !auto z = zero<int>()!), the demonstration of the richer inference in \CFA{} raises possibilities for similar features in future versions of \CC{}. 54 By contrast to \CC{}, Java~8~\cite{Java8} and Scala~\cite{Scala} use comparably powerful forms of type inference to \CFA{}, so the algorithmic techniques in this thesis may be effective for those languages' compiler implementors. 55 Type environments are also widely modelled in compiler implementations, particularly for functional languages, though also increasingly commonly for other languages (such as Rust~\cite{Rust}) that perform type inference; the type environment presented here may be useful to those language implementors. 56 57 One area of inquiry that is outside the scope of this thesis is formalization of the \CFA{} type system. 58 Ditchfield~\cite{Ditchfield92} defined the $F_\omega^\ni$ polymorphic lambda calculus, which is the theoretical basis for the \CFA{} type system. 59 Ditchfield did not, however, prove any soundness or completeness properties for $F_\omega^\ni$; such proofs remain future work. 60 A number of formalisms other than $F_\omega^\ni$ could potentially be adapted to model \CFA{}. 61 One promising candidate is \emph{local type inference} \cite{Pierce00,Odersky01}, which describes similar contextual propagation of type information; another is the polymorphic conformity-based model of the Emerald~\cite{Black90} programming language, which defines a subtyping relation on types that is conceptually similar to \CFA{} traits. 62 These modelling approaches could potentially be used to extend an existing formal semantics for C such as Cholera \cite{Norrish98}, CompCert \cite{Leroy09}, or Formalin \cite{Krebbers14}. 39 Though the direction and validation of this work is fairly narrowly focused on the \CFA{} programming language, the tools used and results obtained should be of interest to a wider compiler and programming language design community. 40 In particular, with the addition of \emph{concepts} in \CCtwenty{} \cite{C++Concepts}, conforming \CC{} compilers must support a model of type assertions very similar to that in \CFA{}, and the algorithmic techniques used here may prove useful. 41 Type environments are also widely modelled in compiler implementations, particularly for functional languages, though also increasingly commonly for other languages (such as Rust) which perform type inference; the type environment presented here may be useful to those language implementers. -
doc/theses/aaron_moss_PhD/phd/macros.tex
r673cd63 rffe2fad 19 19 \newcommand{\etc}{\textit{etc.}\@} 20 20 \newcommand{\etal}{\textit{et~al.}\@} 21 \newcommand{\vs}{\textit{vs.}\@}22 21 23 22 \newcommand{\myset}[1]{\left\{#1\right\}} -
doc/theses/aaron_moss_PhD/phd/resolution-heuristics.tex
r673cd63 rffe2fad 2 2 \label{resolution-chap} 3 3 4 The main task of the \CFACC{} type-checker is \emph{expression resolution}: determining which declarations the identifiers in each expression correspond to. 5 Resolution is a straightforward task in C, as no simultaneously-visible declarations share identifiers, but in \CFA{}, the name overloading features discussed in Section~\ref{overloading-sec} generate multiple candidate declarations for each identifier. 4 % consider using "satisfaction" throughout when talking about assertions 5 % "valid" instead of "feasible" interpretations 6 7 The main task of the \CFACC{} type-checker is \emph{expression resolution}, determining which declarations the identifiers in each expression correspond to. 8 Resolution is a straightforward task in C, as no declarations share identifiers, but in \CFA{} the name overloading features discussed in Section~\ref{overloading-sec} generate multiple candidate declarations for each identifier. 6 9 A given matching between identifiers and declarations in an expression is an \emph{interpretation}; an interpretation also includes information about polymorphic type bindings and implicit casts to support the \CFA{} features discussed in Sections~\ref{poly-func-sec} and~\ref{implicit-conv-sec}, each of which increase the number of valid candidate interpretations. 7 10 To choose among valid interpretations, a \emph{conversion cost} is used to rank interpretations. 8 This conversion cost is summed over all subexpression interpretations in the interpretation of a top-level expression. 9 Hence, the expression resolution problem is to find the unique minimal-cost interpretation for an expression, reporting an error if no such unique interpretation exists. 11 Hence, the expression resolution problem is to find the unique minimal-cost interpretation for an expression, reporting an error if no such interpretation exists. 10 12 11 13 \section{Expression Resolution} 12 13 The expression resolution pass in \CFACC{} must traverse an input expression, match identifiers to available declarations, rank candidate interpretations according to their conversion cost, and check type assertion satisfaction for these candidates.14 Once the set of valid interpretations for the top-level expression is found, the expression resolver selects the unique minimal-cost candidate or reports an error.15 16 The expression resolution problem in \CFA{} is more difficult than the analogous problems in C or \CC{}.17 As mentioned above, the lack of name overloading in C (except for built-in operators) makes its resolution problem substantially easier.18 A comparison of the richer type systems in \CFA{} and \CC{} highlights some of the challenges in \CFA{} expression resolution.19 The key distinction between \CFA{} and \CC{} resolution is that \CC{} uses a greedy algorithm for selection of candidate functions given their argument interpretations, whereas \CFA{} allows contextual information from superexpressions to influence the choice among candidate functions.20 One key use of this contextual information is for type inference of polymorphic return types; \CC{} requires explicit specification of template parameters that only occur in a function's return type, while \CFA{} allows the instantiation of these type parameters to be inferred from context (and in fact does not allow explicit specification of type parameters to a function), as in the following example:21 22 \begin{cfa}23 forall(dtype T) T& deref(T*); $\C{// dereferences pointer}$24 forall(otype T) T* def(); $\C{// new heap-allocated default-initialized value}$25 26 int& i = deref( def() );27 \end{cfa}28 29 In this example, the \CFA{} compiler infers the type arguments of !deref! and !def! from the !int&! type of !i!; \CC{}, by contrast, requires a type parameter on !def!\footnote{The type parameter of \lstinline{deref} can be inferred from its argument.}, \ie{} !deref( def<int>() )!.30 Similarly, while both \CFA{} and \CC{} rank candidate functions based on a cost metric for implicit conversions, \CFA{} allows a suboptimal subexpression interpretation to be selected if it allows a lower-cost overall interpretation, while \CC{} requires that each subexpression interpretation have minimal cost.31 Because of this use of contextual information, the \CFA{} expression resolver must consider multiple interpretations of each function argument, while the \CC{} compiler has only a single interpretation for each argument\footnote{With the exception of address-of operations on functions.}.32 Additionally, until the introduction of concepts in \CCtwenty{} \cite{C++Concepts}, \CC{} expression resolution has no analogue to \CFA{} assertion satisfaction checking, a further complication for a \CFA{} compiler.33 The precise definition of \CFA{} expression resolution in this section further expands on the challenges of this problem.34 14 35 15 \subsection{Type Unification} … … 43 23 \subsection{Conversion Cost} \label{conv-cost-sec} 44 24 45 \CFA{}, like C, allows inexact matches between the type of function parameters and function call arguments. 46 Both languages insert \emph{implicit conversions} in these situations to produce an exact type match, and \CFA{} also uses the relative \emph{cost} of different conversions to select among overloaded function candidates. 47 C does not have an explicit cost model for implicit conversions, but the ``usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11} used to decide which arithmetic operators to apply define one implicitly. 25 C does not have an explicit cost model for implicit conversions, but the ``usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11} used to decide which arithmetic operators to use define one implicitly. 48 26 The only context in which C has name overloading is the arithmetic operators, and the usual arithmetic conversions define a \emph{common type} for mixed-type arguments to binary arithmetic operators. 49 27 Since for backward-compatibility purposes the conversion costs of \CFA{} must produce an equivalent result to these common type rules, it is appropriate to summarize \cite[\S{}6.3.1.8]{C11} here: 50 28 51 29 \begin{itemize} 52 \item If either operand is a floating-point type, the common type is the size of the largest floating-point type. If either operand is !_Complex!, the common type is also \linebreak!_Complex!.30 \item If either operand is a floating-point type, the common type is the size of the largest floating-point type. If either operand is !_Complex!, the common type is also !_Complex!. 53 31 \item If both operands are of integral type, the common type has the same size\footnote{Technically, the C standard defines a notion of \emph{rank} in \cite[\S{}6.3.1.1]{C11}, a distinct value for each \lstinline{signed} and \lstinline{unsigned} pair; integral types of the same size thus may have distinct ranks. For instance, though \lstinline{int} and \lstinline{long} may have the same size, \lstinline{long} always has greater rank. The standard-defined types are declared to have greater rank than any types of the same size added as compiler extensions.} as the larger type. 54 32 \item If the operands have opposite signedness, the common type is !signed! if the !signed! operand is strictly larger, or !unsigned! otherwise. If the operands have the same signedness, the common type shares it. … … 59 37 With more specificity, the cost is a lexicographically-ordered tuple, where each element corresponds to a particular kind of conversion. 60 38 In Bilson's design, conversion cost is a 3-tuple, $(unsafe, poly, safe)$, where $unsafe$ is the count of unsafe (narrowing) conversions, $poly$ is the count of polymorphic type bindings, and $safe$ is the sum of the degree of safe (widening) conversions. 61 Degree of safe conversion is calculated as path weight in a directed graph of safe conversions between types; Bilson's version of this graph is in Figure~\ref{bilson-conv-fig}.39 Degree of safe conversion is calculated as path weight in a directed graph of safe conversions between types; Bilson's version and the current version of this graph are in Figures~\ref{bilson-conv-fig} and~\ref{extended-conv-fig}, respectively. 62 40 The safe conversion graph is designed such that the common type $c$ of two types $u$ and $v$ is compatible with the C standard definitions from \cite[\S{}6.3.1.8]{C11} and can be calculated as the unique type minimizing the sum of the path weights of $\overrightarrow{uc}$ and $\overrightarrow{vc}$. 63 The following example lists the cost in the Bilson model of calling each of the following functions with two !int! parameters , where the interpretation with the minimum total cost will be selected:41 The following example lists the cost in the Bilson model of calling each of the following functions with two !int! parameters: 64 42 65 43 \begin{cfa} … … 95 73 96 74 \begin{cfa} 97 forall(dtype T | { T& ++?(T&); }) T& advance$\( _1\)$(T& i, int n);98 forall(dtype T | { T& ++?(T&); T& ?+=?(T&, int)}) T& advance$\( _2\)$(T& i, int n);75 forall(dtype T | { T& ++?(T&); }) T& advance$\(1\)$(T& i, int n); 76 forall(dtype T | { T& ++?(T&); T& ?+=?(T&, int)}) T& advance$\(2\)$(T& i, int n); 99 77 \end{cfa} 100 78 … … 110 88 111 89 \begin{cfa} 112 forall(otype T, otype U) void f$\(_1\)$(T, U); $\C[3. 125in]{// polymorphic}$113 forall(otype T) void f$\(_2\)$(T, T); $\C[3. 125in]{// less polymorphic}$114 forall(otype T) void f$\(_3\)$(T, int); $\C[3. 125in]{// even less polymorphic}$115 forall(otype T) void f$\(_4\)$(T*, int); $\C[3. 125in]{// least polymorphic}$90 forall(otype T, otype U) void f$\(_1\)$(T, U); $\C[3.25in]{// polymorphic}$ 91 forall(otype T) void f$\(_2\)$(T, T); $\C[3.25in]{// less polymorphic}$ 92 forall(otype T) void f$\(_3\)$(T, int); $\C[3.25in]{// even less polymorphic}$ 93 forall(otype T) void f$\(_4\)$(T*, int); $\C[3.25in]{// least polymorphic}$ 116 94 \end{cfa} 117 95 118 96 The new cost model accounts for the fact that functions with more polymorphic variables are less constrained by introducing a $var$ cost element that counts the number of type variables on a candidate function. 119 97 In the example above, !f!$_1$ has $var = 2$, while the others have $var = 1$. 120 121 98 The new cost model also accounts for a nuance unhandled by Ditchfield or Bilson, in that it makes the more specific !f!$_4$ cheaper than the more generic !f!$_3$; !f!$_4$ is presumably somewhat optimized for handling pointers, but the prior \CFA{} cost model could not account for the more specific binding, as it simply counted the number of polymorphic unifications. 99 122 100 In the modified model, each level of constraint on a polymorphic type in the parameter list results in a decrement of the $specialization$ cost element, which is shared with the count of assertions due to their common nature as constraints on polymorphic type bindings. 123 101 Thus, all else equal, if both a binding to !T! and a binding to !T*! are available, the model chooses the more specific !T*! binding with $specialization = -1$. … … 126 104 For multi-argument generic types, the least-specialized polymorphic parameter sets the specialization cost, \eg{} the specialization cost of !pair(T, S*)! is $-1$ (from !T!) rather than $-2$ (from !S!). 127 105 Specialization cost is not counted on the return type list; since $specialization$ is a property of the function declaration, a lower specialization cost prioritizes one declaration over another. 128 User programmers can choose between functions with varying parameter lists by adjusting the arguments, but the same is not true in general of varying return types\footnote{In particular, as described in Section~\ref{expr-cost-sec}, cast expressions take the cheapest valid and convert ible interpretation of the argument expression, and expressions are resolved as a cast to \lstinline{void}. As a result of this, including return types in the $specialization$ cost means that a function with return type \lstinline{T*} for some polymorphic type \lstinline{T} would \emph{always} be chosen over a function with the same parameter types returning \lstinline{void}, even for \lstinline{void} contexts, an unacceptably counter-intuitive result.}, so the return types are omitted from the $specialization$ element.106 User programmers can choose between functions with varying parameter lists by adjusting the arguments, but the same is not true in general of varying return types\footnote{In particular, as described in Section~\ref{expr-cost-sec}, cast expressions take the cheapest valid and convertable interpretation of the argument expression, and expressions are resolved as a cast to \lstinline{void}. As a result of this, including return types in the $specialization$ cost means that a function with return type \lstinline{T*} for some polymorphic type \lstinline{T} would \emph{always} be chosen over a function with the same parameter types returning \lstinline{void}, even for \lstinline{void} contexts, an unacceptably counter-intuitive result.}, so the return types are omitted from the $specialization$ element. 129 107 Since both $vars$ and $specialization$ are properties of the declaration rather than any particular interpretation, they are prioritized less than the interpretation-specific conversion costs from Bilson's original 3-tuple. 130 108 … … 135 113 In the redesign, for consistency with the approach of the usual arithmetic conversions, which select a common type primarily based on size, but secondarily on sign, arcs in the new graph are annotated with whether they represent a sign change, and such sign changes are summed in a new $sign$ cost element that lexicographically succeeds $safe$. 136 114 This means that sign conversions are approximately the same cost as widening conversions, but slightly more expensive (as opposed to less expensive in Bilson's graph), so maintaining the same signedness is consistently favoured. 137 This refined conversion graph is shown in Figure~\ref{extended-conv-fig}.138 115 139 116 With these modifications, the current \CFA{} cost tuple is as follows: … … 145 122 \subsection{Expression Cost} \label{expr-cost-sec} 146 123 147 The mapping from \CFA{} expressions to cost tuples is described by Bilson in \cite{Bilson03}, and remains effectively unchanged with the exception ofthe refinements to the cost tuple described above.124 The mapping from \CFA{} expressions to cost tuples is described by Bilson in \cite{Bilson03}, and remains effectively unchanged modulo the refinements to the cost tuple described above. 148 125 Nonetheless, some salient details are repeated here for the sake of completeness. 149 126 … … 152 129 In terms of the core argument-parameter matching algorithm, overloaded variables are handled the same as zero-argument function calls, aside from a different pool of candidate declarations and setup for different code generation. 153 130 Similarly, an aggregate member expression !a.m! can be modelled as a unary function !m! that takes one argument of the aggregate type. 154 Literals do not require sophisticated resolution, as in C the syntactic form of each implies their result types : !42! is !int!, !"hello"! is !char*!, \etc{}\footnote{Struct literals (\eg{} \lstinline|(S)\{ 1, 2, 3 \}| for some struct \lstinline{S}) are a somewhat special case, as they are known to be of type \lstinline{S}, but require resolution of the implied constructor call described in Section~\ref{ctor-sec}.}.131 Literals do not require sophisticated resolution, as in C the syntactic form of each implies their result types (!42! is !int!, !"hello"! is !char*!, \etc{}), though struct literals (\eg{} !(S){ 1, 2, 3 }! for some struct !S!) require resolution of the implied constructor call. 155 132 156 133 Since most expressions can be treated as function calls, nested function calls are the primary component of complexity in expression resolution. … … 170 147 \end{cfa} 171 148 172 Considered independently, !g!$_1$!(42)! is the cheapest interpretation of !g(42)!, with cost $(0,0,0,0,0,0 ,0)$ since the argument type is an exact match.173 However, in context, an unsafe conversion is required to downcast the return type of !g!$_1$ to an !int! suitable for !f!, for a total cost of $(1,0,0,0,0,0 ,0)$ for !f( g!$_1$!(42) )!.174 If !g!$_2$ is chosen, on the other hand, there is a safe upcast from the !int! type of !42! to !long!, but no cast on the return of !g!$_2$, for a total cost of $(0,0,1,0,0,0 ,0)$ for !f( g!$_2$!(42) )!; as this is cheaper, !g!$_2$ is chosen.175 Due to this design, all valid interpretations of subexpressions must in general be propagated to the top of the expression tree before any can be eliminated, a lazy form of expression resolution, as opposed to the eager expression resolution allowed by C or \CC{}, where each expression can be resolved given only the resolution of its immediate subexpressions.149 Considered independently, !g!$_1$!(42)! is the cheapest interpretation of !g(42)!, with cost $(0,0,0,0,0,0)$ since the argument type is an exact match. 150 However, in context, an unsafe conversion is required to downcast the return type of !g!$_1$ to an !int! suitable for !f!, for a total cost of $(1,0,0,0,0,0)$ for !f( g!$_1$!(42) )!. 151 If !g!$_2$ is chosen, on the other hand, there is a safe upcast from the !int! type of !42! to !long!, but no cast on the return of !g!$_2$, for a total cost of $(0,0,1,0,0,0)$ for !f( g!$_2$!(42) )!; as this is cheaper, !g!$_2$ is chosen. 152 Due to this design, all valid interpretations of subexpressions must in general be propagated to the top of the expression tree before any can be eliminated, a lazy form of expression resolution, as opposed to the eager expression resolution allowed by C, where each expression can be resolved given only the resolution of its immediate subexpressions. 176 153 177 154 If there are no valid interpretations of the top-level expression, expression resolution fails and must produce an appropriate error message. … … 200 177 \end{cfa} 201 178 202 In C semantics, this example is unambiguously upcasting !32! to !unsigned long long!, performing the shift, then downcasting the result to !unsigned!, at cost $(1,0,3,1,0,0,0)$.179 In C semantics, this example is unambiguously upcasting !32! to !unsigned long long!, performing the shift, then downcasting the result to !unsigned!, at total cost $(1,0,3,1,0,0,0)$. 203 180 If ascription were allowed to be a first-class interpretation of a cast expression, it would be cheaper to select the !unsigned! interpretation of !?>>?! by downcasting !x! to !unsigned! and upcasting !32! to !unsigned!, at a total cost of $(1,0,1,1,0,0,0)$. 204 However, this break from C semantics is not backwards compatib le, so to maintain C compatibility, the \CFA{} resolver selects the lowest-cost interpretation of the cast argument for which a conversion or coercion to the target type exists (upcasting to !unsigned long long! in the example above, due to the lack of unsafe downcasts), using the cost of the conversion itself only as a tie-breaker.181 However, this break from C semantics is not backwards compatibile, so to maintain C compatibility, the \CFA{} resolver selects the lowest-cost interpretation of the cast argument for which a conversion or coercion to the target type exists (upcasting to !unsigned long long! in the example above, due to the lack of unsafe downcasts), using the cost of the conversion itself only as a tie-breaker. 205 182 For example, in !int x; double x; (int)x;!, both declarations have zero-cost interpretations as !x!, but the !int x! interpretation is cheaper to cast to !int!, and is thus selected. 206 183 Thus, in contrast to the lazy resolution of nested function-call expressions discussed above, where final interpretations for each subexpression are not chosen until the top-level expression is reached, cast expressions introduce eager resolution of their argument subexpressions, as if that argument was itself a top-level expression. … … 269 246 270 247 Pruning possible interpretations as early as possible is one way to reduce the real-world cost of expression resolution, provided that a sufficient proportion of interpretations are pruned to pay for the cost of the pruning algorithm. 271 One opportunity for interpretation pruning is by the argument-parameter type matching, but the literature \cite{Baker82,Bilson03,Cormack81,Ganzinger80,Pennello80,PW:overload}provides no clear answers on whether candidate functions should be chosen according to their available arguments, or whether argument resolution should be driven by the available function candidates.248 One opportunity for interpretation pruning is by the argument-parameter type matching, but the literature provides no clear answers on whether candidate functions should be chosen according to their available arguments, or whether argument resolution should be driven by the available function candidates. 272 249 For programming languages without implicit conversions, argument-parameter matching is essentially the entirety of the expression resolution problem, and is generally referred to as ``overload resolution'' in the literature. 273 250 All expression-resolution algorithms form a DAG of interpretations, some explicitly, some implicitly; in this DAG, arcs point from function-call interpretations to argument interpretations, as in Figure~\ref{res-dag-fig} … … 310 287 This approach of filtering out invalid types is unsuited to \CFA{} expression resolution, however, due to the presence of polymorphic functions and implicit conversions. 311 288 312 Some other language designs solve the matching problem by forcing a bottom-up order.313 \CC{}, for instance, defines its overload-selection algorithm in terms of a partial order between function overloads given a fixed list of argument candidates, implying that the arguments must be selected before the function.314 This design choice improves worst-case expression resolution time by only propagating a single candidate for each subexpression, but type annotations must be provided for any function call that is polymorphic in its return type, and these annotations are often redundant:315 316 \begin{C++}317 template<typename T> T* malloc() { /* ... */ }318 319 int* p = malloc<int>(); $\C{// T = int must be explicitly supplied}$320 \end{C++}321 322 \CFA{} saves programmers from redundant annotations with its richer inference:323 324 \begin{cfa}325 forall(dtype T | sized(T)) T* malloc();326 327 int* p = malloc(); $\C{// Infers T = int from left-hand side}$328 \end{cfa}329 330 289 Baker~\cite{Baker82} left empirical comparison of different overload resolution algorithms to future work; Bilson~\cite{Bilson03} described an extension of Baker's algorithm to handle implicit conversions and polymorphism, but did not further explore the space of algorithmic approaches to handle both overloaded names and implicit conversions. 331 290 This thesis closes that gap in the literature by performing performance comparisons of both top-down and bottom-up expression resolution algorithms, with results reported in Chapter~\ref{expr-chap}. … … 335 294 The assertion satisfaction algorithm designed by Bilson~\cite{Bilson03} for the original \CFACC{} is the most-relevant prior work to this project. 336 295 Before accepting any subexpression candidate, Bilson first checks that that candidate's assertions can all be resolved; this is necessary due to Bilson's addition of assertion satisfaction costs to candidate costs (discussed in Section~\ref{conv-cost-sec}). 337 If this subexpression interpretation ends up not being used in the final resolution, th en the (sometimes substantial) work of checking its assertions ends up wasted.296 If this subexpression interpretation ends up not being used in the final resolution, than the (sometimes substantial) work of checking its assertions ends up wasted. 338 297 Bilson's assertion checking function recurses on two lists, !need! and !newNeed!, the current declaration's assertion set and those implied by the assertion-satisfying declarations, respectively, as detailed in the pseudo-code below (ancillary aspects of the algorithm are omitted for clarity): 339 298 … … 401 360 During the course of checking the assertions of a single top-level expression, the results are cached for each assertion checked. 402 361 The search key for this cache is the assertion declaration with its type variables substituted according to the type environment to distinguish satisfaction of the same assertion for different types. 403 This adjusted assertion declaration is then run through the \CFA{} name-mangling algorithm to produce an equivalent string-type key. 404 405 One superficially-promising optimization, which I did not pursue, is caching assertion-satisfaction judgments among top-level expressions. 406 This approach would be difficult to correctly implement in a \CFA{} compiler, due to the lack of a closed set of operations for a given type. 407 New declarations related to a particular type can be introduced in any lexical scope in \CFA{}, and these added declarations may cause an assertion that was previously satisfiable to fail due to an introduced ambiguity. 408 Furthermore, given the recursive nature of assertion satisfaction and the possibility of this satisfaction judgment depending on an inferred type, an added declaration may break satisfaction of an assertion with a different name and that operates on different types. 409 Given these concerns, correctly invalidating a cross-expression assertion satisfaction cache for \CFA{} is a non-trivial problem, and the overhead of such an approach may possibly outweigh any benefits from such caching. 362 This adjusted assertion declaration is then run through the \CFA{} name-mangling algorithm to produce an equivalent string-type key. 410 363 411 364 The assertion satisfaction aspect of \CFA{} expression resolution bears some similarity to satisfiability problems from logic, and as such other languages with similar trait and assertion mechanisms make use of logic-program solvers in their compilers. … … 413 366 The combination of the assertion satisfaction elements of the problem with the conversion-cost model of \CFA{} makes this logic-solver approach difficult to apply in \CFACC{}, however. 414 367 Expressing assertion resolution as a satisfiability problem ignores the cost optimization aspect, which is necessary to decide among what are often many possible satisfying assignments of declarations to assertions. 415 (MaxSAT solvers \cite{Morgado13}, which allow weights on solutions to satisfiability problems, may be a productive avenue for future investigation.)416 368 On the other hand, the deeply-recursive nature of the satisfiability problem makes it difficult to adapt to optimizing solver approaches such as linear programming. 417 369 To maintain a well-defined programming language, any optimization algorithm used must provide an exact (rather than approximate) solution; this constraint also rules out a whole class of approximately-optimal generalized solvers. … … 429 381 The main challenge to implement this approach in \CFACC{} is applying the implicit conversions generated by the resolution process in the code-generation for the thunk functions that \CFACC{} uses to pass type assertions to their requesting functions with the proper signatures. 430 382 431 One \CFA{} feature that could be added to improve the ergonomics of overload selection is an \emph{ascription cast}; as discussed in Section~\ref{expr-cost-sec}, the semantics of the C cast operator are to choose the cheapest argument interpretation which is convertible to the target type, using the conversion cost as a tie-breaker. 432 An ascription cast would reverse these priorities, choosing the argument interpretation with the cheapest conversion to the target type, only using interpretation cost to break ties\footnote{A possible stricter semantics would be to select the cheapest interpretation with a zero-cost conversion to the target type, reporting a compiler error otherwise.}. 433 This would allow ascription casts to the desired return type to be used for overload selection: 434 435 \begin{cfa} 436 int f$\(_1\)$(int); 437 int f$\(_2\)$(double); 438 int g$\(_1\)$(int); 439 double g$\(_2\)$(long); 440 441 f((double)42); $\C[4.5in]{// select f\(_2\) by argument cast}$ 442 (as double)g(42); $\C[4.5in]{// select g\(_2\) by return ascription cast}$ 443 (double)g(42); $\C[4.5in]{// select g\(_1\) NOT g\(_2\) because of parameter conversion cost}$ 444 \end{cfa} 445 446 Though performance of the existing resolution algorithms is promising, some further optimizations do present themselves. 383 Though performance of the existing algorithms is promising, some further optimizations do present themselves. 447 384 The refined cost model discussed in Section~\ref{conv-cost-sec} is more expressive, but requires more than twice as many fields; it may be fruitful to investigate more tightly-packed in-memory representations of the cost-tuple, as well as comparison operations that require fewer instructions than a full lexicographic comparison. 448 385 Integer or vector operations on a more-packed representation may prove effective, though dealing with the negative-valued $specialization$ field may require some effort. -
doc/theses/aaron_moss_PhD/phd/thesis.tex
r673cd63 rffe2fad 37 37 % Use the "hyperref" package 38 38 % N.B. HYPERREF MUST BE THE LAST PACKAGE LOADED; ADD ADDITIONAL PKGS ABOVE 39 %\usepackage[pdftex,pagebackref=false]{hyperref} % with basic options \40 \usepackage[ breaklinks,pagebackref=false]{hyperref}39 %\usepackage[pdftex,pagebackref=false]{hyperref} % with basic options 40 \usepackage[pagebackref=false]{hyperref} 41 41 % N.B. pagebackref=true provides links back from the References to the body text. This can cause trouble for printing. 42 42 -
doc/theses/aaron_moss_PhD/phd/type-environment.tex
r673cd63 rffe2fad 12 12 For purposes of this chapter, a \emph{type environment} $T$ is a set of \emph{type classes} $\myset{T_1, T_2, \cdots, T_{|T|}}$. 13 13 Each type class $T_i$ contains a set of \emph{type variables} $\myset{v_{i,1}, v_{i,2}, \cdots, v_{i,|T_i|}}$. 14 Since the type classes represent an equivalence relation over the type variables the sets of variables contained in two distinct classes in the same environment must be \emph{disjoint}. 15 Each individual type class $T_i$ may also be associated with a \emph{bound}, $b_i$; this bound contains the \emph{bound type} that the variables in the type class are replaced with, but also includes other information in \CFACC{}, including whether type conversions are permissible on the bound type and what sort of type variables are contained in the class (data types, function types, or variadic tuples). 16 17 The following example demonstrates the use of a type environment for unification: 18 19 \begin{cfa} 20 forall(otype F) F f(F, F); 21 forall(otype G) G g(G); 22 23 f( g(10), g(20) ); 24 \end{cfa} 25 26 Expression resolution starts from an empty type environment; from this empty environment, the calls to !g! can be independently resolved. 27 These resolutions result in two new type environments, $T = \{ \myset{\mathsf{G}_1} \rightarrow$ !int!$\}$ and $T' = \{ \myset{\mathsf{G}_2} \rightarrow$ !int!$\}$; the calls to !g! have generated distinct type variables !G!$_1$ and !G!$_2$, each bound to !int! by unification with the type of its argument (!10! and !20!, both !int!). 28 To complete resolution of the call to !f!, both environments must be combined; resolving the first argument to !f! produces a new type environment $T'' = \{ \myset{\mathsf{G}_1, \mathsf{F}_1} \rightarrow$ !int!$\}$: the new type variable !F!$_1$ has been introduced and unified with !G!$_1$ (the return type of !g(10)!), and consequently bound to !int!. 29 To resolve the second argument to !f!, $T''$ must be checked for compatibility with $T'$; since !F!$_1$ unifies with !G!$_2$, their type classes must be merged. 30 Since both !F!$_1$ and !G!$_2$ are bound to !int!, this merge succeeds, producing the final environment $T'' = \{ \myset{\mathsf{G}_1, \mathsf{F}_1, \mathsf{G}_2} \rightarrow$ !int!$\}$. 14 Since the type classes represent an equivalence relation over the type variables the sets of variables contained in two distinct classes in the same environment must be disjoint. 15 Each individual type class $T_i$ may also be associated with a \emph{bound}, $b_i$; this bound contains the \emph{bound type} that the variables in the type class are replaced with, but also includes other information in \CFACC{}, including whether type conversions are permissible on the bound type and what sort of type variables are contained in the class (data types, function types, or variadic tuples). 31 16 32 17 \begin{table} … … 52 37 \end{table} 53 38 54 Type environments in \CFACC{} need to support eleven basic operations, summarized in Table~\ref{env-op-table}.39 Given this basic structure, type environments in \CFACC{} need to support eleven basic operations, summarized in Table~\ref{env-op-table}. 55 40 The first six operations are straightforward queries and updates on these data structures: 56 41 The lookup operation $find(T, v_{i,j})$ produces $T_i$, the type class in $T$ that contains variable $v_{i,j}$, or an invalid sentinel value for no such class. 57 42 The other two query operations act on type classes, where $report(T_i)$ produces the set $\myset{v_{i,1}, v_{i,2}, \cdots, v_{i,|T_i|}}$ of all type variables in a class $T_i$ and $bound(T_i)$ produces the bound $b_i$ of that class, or a sentinel indicating no bound is set. 58 43 59 The update operation $insert(T, v_{i,1})$ creates a new type class $T_i$ in $T$ that contains only the variable $v_{i,1}$ and no bound; due to the disjointness property, $v_{i,1}$ mustnot belong to any other type class in $T$.44 The update operation $insert(T, v_{i,1})$ creates a new type class $T_i$ in $T$ that contains only the variable $v_{i,1}$ and no bound; due to the disjointness property, $v_{i,1}$ cannot belong to any other type class in $T$. 60 45 The $add(T_i, v_{i,j})$ operation adds a new type variable $v_{i,j}$ to class $T_i$; again, $v_{i,j}$ cannot exist elsewhere in $T$. 61 46 $bind(T_i, b_i)$ mutates the bound for a type class, setting or updating the current bound. … … 90 75 \subsection{Na\"{\i}ve} \label{naive-env-sec} 91 76 92 The type environment data structure used in Bilson's~\cite{Bilson03} original implementation of \CFACC{} is a s impletranslation of the definitions in Section~\ref{env-defn-sec} to \CC{} code; a !TypeEnvironment! contains a list of !EqvClass! type equivalence classes, each of which contains the type bound information and a tree-based sorted set of type variables.77 The type environment data structure used in Bilson's~\cite{Bilson03} original implementation of \CFACC{} is a straightforward translation of the definitions in Section~\ref{env-defn-sec} to \CC{} code; a !TypeEnvironment! contains a list of !EqvClass! type equivalence classes, each of which contains the type bound information and a tree-based sorted set of type variables. 93 78 This approach has the benefit of being easy to understand and not imposing life-cycle or inheritance constraints on its use, but, as can be seen in Table~\ref{env-bounds-table}, does not support many of the desired operations with any particular efficiency. 94 79 Some variations on this structure may improve performance somewhat; for instance, replacing the !EqvClass! variable storage with a hash-based set reduces search and update times from $O(\log n)$ to amortized $O(1)$, while adding an index for the type variables in the entire environment removes the need to check each type class individually to maintain the disjointness property. … … 121 106 In particular, the type-class bound cannot be easily included in the union-find data structure, as the requirement to make it the class representative breaks the balancing properties of $union$, and requires too-close integration of the type environment $unifyBound$ internal operation. 122 107 This issue can be solved by including a side map from class representatives to the type-class bound. 123 If placeholder values are inserted in this map for type classes without bounds th en this also has the useful property that the key set of the map provides an easily obtainable list of all the class representatives, a list which cannot be derived from the union-find data structure without a linear search for class representatives through all elements.108 If placeholder values are inserted in this map for type classes without bounds than this also has the useful property that the key set of the map provides an easily obtainable list of all the class representatives, a list which cannot be derived from the union-find data structure without a linear search for class representatives through all elements. 124 109 125 110 \subsection{Union-Find with Classes} \label{env-union-find-classes-approach} … … 259 244 The na\"{\i}ve $combine$ operation must traverse each of the classes of one environment, merging in any class of the other environment that shares a type variable. 260 245 Since there are at most $n$ classes to unify, the unification cost is $O(nm + nu(n))$, while traversal and $find$ costs to locate classes to merge total $O(n^2m)$, for an overall cost of $O(n^2m + nu(n))$. 261 The incremental $combine$ operation works similarly, but only needs to consider classes modified in either environment with respect to the common ancestor of both environments, allowing the $n$ cost terms to be substituted for $p$, for an overall cost of $O(p^ 2m + pu(n))$.246 The incremental $combine$ operation works similarly, but only needs to consider classes modified in either environment with respect to the common ancestor of both environments, allowing the $n$ cost terms to be substituted for $p$, for an overall cost of $O(p^m + pu(n))$. 262 247 Neither variant supports the $split$ operation to undo a $unify$. 263 248 … … 297 282 This persistent union-find data structure is efficient, but not thread-safe; as suggested in Section~\ref{resn-conclusion-sec}, it may be valuable to parallelize the \CFA{} expression resolver. 298 283 However, allowing multiple threads concurrent access to the persistent data structure is likely to result in ``reroot thrashing'', as different threads reroot the data structure to their own versions of interest. 299 This contention could be mitigated by partitioning the data structure into separate subtrees for each thread, with each subtree having its own root node, and the boundaries among them implemented with a lock-equipped !ThreadBoundary! edit node. 300 Alternatively, the concurrent hash trie of Prokopec \etal{} \cite{Prokopec11,Prokopec12} may be a useful hash-table replacement. 284 This contention could be mitigated by partitioning the data structure into separate subtrees for each thread, with each subtree having its own root node, and the boundaries among them implemented with a lock-equipped !ThreadBoundary! edit node. -
libcfa/configure
r673cd63 rffe2fad 2959 2959 case $CONFIGURATION in 2960 2960 "debug" ) 2961 CONFIG_CFLAGS="-O g-g"2961 CONFIG_CFLAGS="-O0 -g" 2962 2962 CONFIG_CFAFLAGS="-debug" 2963 2963 CONFIG_BUILDLIB="yes" 2964 2964 ;; 2965 2965 "nodebug" ) 2966 CONFIG_CFLAGS="-O 3-s"2966 CONFIG_CFLAGS="-O2 -s" 2967 2967 CONFIG_CFAFLAGS="-nodebug" 2968 2968 CONFIG_BUILDLIB="yes" 2969 2969 ;; 2970 2970 "nolib" ) 2971 CONFIG_CFLAGS="-O 3-s"2971 CONFIG_CFLAGS="-O2 -s" 2972 2972 CONFIG_CFAFLAGS="-nolib" 2973 2973 CONFIG_BUILDLIB="no" 2974 ;;2975 "profile" )2976 CONFIG_CFLAGS="-O3 -g -fno-omit-frame-pointer"2977 CONFIG_CFAFLAGS="-nodebug"2978 CONFIG_BUILDLIB="yes"2979 2974 ;; 2980 2975 *) -
libcfa/configure.ac
r673cd63 rffe2fad 45 45 case $CONFIGURATION in 46 46 "debug" ) 47 CONFIG_CFLAGS="-O g-g"47 CONFIG_CFLAGS="-O0 -g" 48 48 CONFIG_CFAFLAGS="-debug" 49 49 CONFIG_BUILDLIB="yes" 50 50 ;; 51 51 "nodebug" ) 52 CONFIG_CFLAGS="-O 3-s"52 CONFIG_CFLAGS="-O2 -s" 53 53 CONFIG_CFAFLAGS="-nodebug" 54 54 CONFIG_BUILDLIB="yes" 55 55 ;; 56 56 "nolib" ) 57 CONFIG_CFLAGS="-O 3-s"57 CONFIG_CFLAGS="-O2 -s" 58 58 CONFIG_CFAFLAGS="-nolib" 59 59 CONFIG_BUILDLIB="no" 60 ;;61 "profile" )62 CONFIG_CFLAGS="-O3 -g -fno-omit-frame-pointer"63 CONFIG_CFAFLAGS="-nodebug"64 CONFIG_BUILDLIB="yes"65 60 ;; 66 61 *) -
libcfa/src/stdlib.hfa
r673cd63 rffe2fad 10 10 // Created On : Thu Jan 28 17:12:35 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Apr 24 17:35:43 201913 // Update Count : 3 5212 // Last Modified On : Mon Dec 17 15:37:45 2018 13 // Update Count : 346 14 14 // 15 15 … … 40 40 } // malloc 41 41 42 // T & malloc( void ) { 43 // int & p = *(T *)(void *)malloc( (size_t)sizeof(T) ); // C malloc 44 // printf( "& malloc %p\n", &p ); 45 // return p; 46 // // return (T &)*(T *)(void *)malloc( (size_t)sizeof(T) ); // C malloc 47 // } // malloc 48 42 49 T * calloc( size_t dim ) { 43 50 return (T *)(void *)calloc( dim, sizeof(T) ); // C calloc … … 69 76 T * alloc( char fill ) { 70 77 T * ptr = (T *)(void *)malloc( (size_t)sizeof(T) ); // C malloc 71 return (T *)memset( ptr, (int)fill, sizeof(T) ); // initializewith fill value78 return (T *)memset( ptr, (int)fill, sizeof(T) ); // initial with fill value 72 79 } // alloc 73 80 … … 77 84 78 85 T * alloc( size_t dim, char fill ) { 79 T * ptr = (T *)(void *)malloc( dim * (size_t)sizeof(T) ); // C calloc80 return (T *)memset( ptr, (int)fill, dim * sizeof(T) ); // initializewith fill value86 T * ptr = (T *)(void *)malloc( dim * (size_t)sizeof(T) ); // C malloc 87 return (T *)memset( ptr, (int)fill, dim * sizeof(T) ); // initial with fill value 81 88 } // alloc 82 89 -
src/Makefile.am
r673cd63 rffe2fad 56 56 ___driver_cfa_cpp_LDADD = -ldl # yywrap 57 57 58 if WITH_LIBPROFILER 59 ___driver_cfa_cpp_LDADD += -lprofiler 60 endif 61 62 if WITH_LIBTCMALLOC 63 ___driver_cfa_cpp_LDADD += -ltcmalloc 64 endif 65 66 AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O3 -g -std=c++14 58 AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O2 -g -std=c++14 67 59 AM_LDFLAGS = @HOST_FLAGS@ -Xlinker -export-dynamic 68 60 ARFLAGS = cr -
src/Makefile.in
r673cd63 rffe2fad 651 651 ___driver_cfa_cpp_SOURCES = $(SRC) 652 652 ___driver_cfa_cpp_LDADD = -ldl # yywrap 653 AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O 3-g -std=c++14653 AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O2 -g -std=c++14 654 654 AM_LDFLAGS = @HOST_FLAGS@ -Xlinker -export-dynamic 655 655 ARFLAGS = cr … … 1526 1526 @rm BasicTypes-gen 1527 1527 1528 @WITH_LIBPROFILER_TRUE@ ___driver_cfa_cpp_LDADD += -lprofiler1529 1530 @WITH_LIBTCMALLOC_TRUE@ ___driver_cfa_cpp_LDADD += -ltcmalloc1531 1532 1528 # Tell versions [3.59,3.63) of GNU make to not export all variables. 1533 1529 # Otherwise a system limit (for SysV at least) may be exceeded.
Note:
See TracChangeset
for help on using the changeset viewer.