# Changeset 673cd63

Ignore:
Timestamp:
Apr 26, 2019, 4:59:48 PM (3 years ago)
Branches:
arm-eh, cleanup-dtors, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr
Children:
5b11c25
Parents:
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into ctxswitch

Files:
25 edited

Unmodified
Removed
• ## configure

 rffe2fad LIBOBJS CFA_BACKEND_CC WITH_LIBTCMALLOC_FALSE WITH_LIBTCMALLOC_TRUE WITH_LIBPROFILER_FALSE WITH_LIBPROFILER_TRUE WITH_LIBFIBRE_FALSE WITH_LIBFIBRE_TRUE "debug") ;; "nolib") ;; "profile") ;; *) >&2 echo "Configuration must be 'debug', 'nodebug' or 'nolib'" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ProfilingIsEnabledForAllThreads in -lprofiler" >&5 $as_echo_n "checking for ProfilingIsEnabledForAllThreads in -lprofiler... " >&6; } if${ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lprofiler  $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h.  */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply.  */ #ifdef __cplusplus extern "C" #endif char ProfilingIsEnabledForAllThreads (); int main () { return ProfilingIsEnabledForAllThreads (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads=yes else ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi {$as_echo "$as_me:${as_lineno-$LINENO}: result:$ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads" >&5 $as_echo "$ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads" >&6; } if test "x$ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads" = xyes; then : HAVE_LIBPROFILER=1 else HAVE_LIBPROFILER=0 fi if test "$HAVE_LIBPROFILER" -eq 1; then WITH_LIBPROFILER_TRUE= WITH_LIBPROFILER_FALSE='#' else WITH_LIBPROFILER_TRUE='#' WITH_LIBPROFILER_FALSE= fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for malloc in -ltcmalloc" >&5 $as_echo_n "checking for malloc in -ltcmalloc... " >&6; } if${ac_cv_lib_tcmalloc_malloc+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ltcmalloc  $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h.  */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply.  */ #ifdef __cplusplus extern "C" #endif char malloc (); int main () { return malloc (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_tcmalloc_malloc=yes else ac_cv_lib_tcmalloc_malloc=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi {$as_echo "$as_me:${as_lineno-$LINENO}: result:$ac_cv_lib_tcmalloc_malloc" >&5 $as_echo "$ac_cv_lib_tcmalloc_malloc" >&6; } if test "x$ac_cv_lib_tcmalloc_malloc" = xyes; then : HAVE_LIBTCMALLOC=1 else HAVE_LIBTCMALLOC=0 fi if test "$HAVE_LIBTCMALLOC" -eq 1; then WITH_LIBTCMALLOC_TRUE= WITH_LIBTCMALLOC_FALSE='#' else WITH_LIBTCMALLOC_TRUE='#' WITH_LIBTCMALLOC_FALSE= fi # Checks for header files. for ac_header in libintl.h malloc.h unistd.h if test -z "${WITH_LIBFIBRE_TRUE}" && test -z "${WITH_LIBFIBRE_FALSE}"; then as_fn_error $? "conditional \"WITH_LIBFIBRE\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${WITH_LIBPROFILER_TRUE}" && test -z "${WITH_LIBPROFILER_FALSE}"; then as_fn_error $? "conditional \"WITH_LIBPROFILER\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${WITH_LIBTCMALLOC_TRUE}" && test -z "${WITH_LIBTCMALLOC_FALSE}"; then as_fn_error $? "conditional \"WITH_LIBTCMALLOC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi

• ## doc/theses/aaron_moss_PhD/phd/generic-bench.tex

 rffe2fad \chapter{Generic Stack Benchmarks} \label{generic-bench-app} Throughout, !/***/! designates a counted redundant type annotation; code reformatted slightly for brevity. This appendix includes the generic stack code for all four language variants discussed in Section~\ref{generic-performance-sec}. Throughout, !/***/! designates a counted redundant type annotation; these include !sizeof! on a known type, repetition of a type name in initialization or return statements, and type-specific helper functions. The code is reformatted slightly for brevity. \section{C}
• ## doc/theses/aaron_moss_PhD/phd/generic-types.tex

 rffe2fad int int_list_head( const struct int_list* ls ) { return ls->value; } $\C[\textwidth]{// all code must be duplicated for every generic instantiation}$ // all code must be duplicated for every generic instantiation struct string_list { const char* value; struct string_list* next; }; { return ls->value; } $\C[\textwidth]{// use is efficient and idiomatic}$ // use is efficient and idiomatic int main() { struct list { void* value; struct list* next; }; $\C[\textwidth]{// internal memory management requires helper functions}$ // internal memory management requires helper functions void list_insert( struct list** ls, void* x, void* (*copy)(void*) ) { void* list_head( const struct list* ls ) { return ls->value; } $\C[\textwidth]{// helpers duplicated per type}$ // helpers duplicated per type void* int_copy(void* x) { #include   $\C{// for printf}$ $\C[\textwidth]{// code is nested in macros}$ // code is nested in macros #define list(N) N ## _list define_list(string, const char*); $\C[3in]{// defines string\_list}$ $\C[\textwidth]{// use is efficient, but syntactically idiosyncratic}$ // use is efficient, but syntactically idiosyncratic int main() { \end{figure} \CC{}, Java, and other languages use \emph{generic types} to produce type-safe abstract data types. Design and implementation of generic types for \CFA{} is the first major contribution of this thesis, a summary of which is published in \cite{Moss18} and from which this chapter is closely based. \CC{}, Java, and other languages use \emph{generic types} (or \emph{parameterized types}) to produce type-safe abstract data types. Design and implementation of generic types for \CFA{} is the first major contribution of this thesis, a summary of which is published in \cite{Moss18} and on which this chapter is closely based. \CFA{} generic types integrate efficiently and naturally with the existing polymorphic functions in \CFA{}, while retaining backward compatibility with C in layout and support for separate compilation. A generic type can be declared in \CFA{} by placing a !forall! specifier on a !struct! or !union! declaration, and instantiated using a parenthesized list of types after the generic name. forall(otype T) struct list { T value; list(T)* next; }; $\C[\textwidth]{// single polymorphic implementation of each function}$ $\C[\textwidth]{// overloading reduces need for namespace prefixes}$ // single polymorphic implementation of each function // overloading reduces need for namespace prefixes forall(otype T) void insert( list(T)** ls, T x ) { forall(otype T) T head( const list(T)* ls ) { return ls->value; } $\C[\textwidth]{// use is clear and efficient}$ // use is clear and efficient int main() { A key insight for this design is that C already possesses a handful of built-in generic types (\emph{derived types} in the language of the standard \cite[\S{}6.2.5]{C11}), notably pointer (!T*!) and array (!T[]!), and that user-definable generics should act similarly. \subsection{Related Work} \subsection{Related Work} \label{generic-related-sec} One approach to the design of generic types is that taken by \CC{} templates \cite{C++}. Java \cite{Java8} has another prominent implementation for generic types, introduced in Java~5 and based on a significantly different approach than \CC{}. The Java approach has much more in common with the !void*!-polymorphism shown in Figure~\ref{void-generic-fig}; since in Java nearly all data is stored by reference, the Java approach to polymorphic data is to store pointers to arbitrary data and insert type-checked implicit casts at compile-time. This process of \emph{type erasure} has the benefit of allowing a single instantiation of polymorphic code, but relies heavily on Java's object model and garbage collector. This process of \emph{type erasure} has the benefit of allowing a single instantiation of polymorphic code, but relies heavily on Java's object model. To use this model, a more C-like language such as \CFA{} would be required to dynamically allocate internal storage for variables, track their lifetime, and properly clean them up afterward. A \emph{dtype-static} type has polymorphic parameters but is still concrete. Polymorphic pointers are an example of dtype-static types; given some type variable !T!, !T! is a polymorphic type, but !T*! has a fixed size and can therefore be represented by a !void*! in code generation. In particular, generic types where all parameters are un-!sized! (\ie{} they do not conform to the built-in !sized! trait because the compiler does not know their size and alignment) are always concrete, as there is no possibility for their layout to vary based on type parameters of unknown size and alignment. In particular, generic types where all parameters are un-!sized! (\ie{} they do not conform to the built-in !sized! trait, which is satisfied by all types the compiler knows the size and alignment of) are always concrete, as there is no possibility for their layout to vary based on type parameters of unknown size and alignment. More precisely, a type is concrete if and only if all of its !sized! type parameters are concrete, and a concrete type is dtype-static if any of its type parameters are (possibly recursively) polymorphic. To illustrate, the following code using the !pair! type from above has each use of !pair! commented with its class: \end{cfa} Here, !_assign_T! is passed in as an implicit parameter from !otype T! and takes two !T*! (!void*! in the generated code), a destination and a source, and !_retval! is the pointer to a caller-allocated buffer for the return value, the usual \CFA{} method to handle dynamically-sized return types. Here, !_assign_T! is passed in as an implicit parameter from !otype T! and takes two !T*! (!void*! in the generated code\footnote{A GCC extension allows arithmetic on \lstinline{void*}, calculated as if \lstinline{sizeof(void) == 1}.}), a destination and a source, and !_retval! is the pointer to a caller-allocated buffer for the return value, the usual \CFA{} method to handle dynamically-sized return types. !_offsetof_pair! is the offset array passed into !value!; this array is statically generated at the call site as: Similarly, the layout function can only safely be called from a context where the generic type definition is visible, because otherwise the caller does not know how large to allocate the array of member offsets. The C standard does not specify a memory layout for structs, but the POSIX ABI for x86 \cite{POSIX08} does; this memory layout is common for C implementations, but is a platform-specific issue for porting \CFA{}. The C standard does not specify a memory layout for structs, but the System V ABI \cite{SysVABI} does; compatibility with this standard is sufficient for \CFA{}'s currently-supported architectures, though future ports may require different layout-function generation algorithms. This algorithm, sketched below in pseudo-\CFA{}, is a straightforward mapping of consecutive fields into the first properly-aligned offset in the !struct! layout; layout functions for !union! types omit the offset array and simply calculate the maximum size and alignment over all union variants. Since \CFACC{} generates a distinct layout function for each type, constant-folding and loop unrolling are applied. To validate the practicality of this generic type design, microbenchmark-based tests were conducted against a number of comparable code designs in C and \CC{}, first published in \cite{Moss18}. Since all these languages are all C-based and compiled with the same compiler backend, maximal-performance benchmarks should show little runtime variance, differing only in length and clarity of source code. Since these languages are all C-based and compiled with the same compiler backend, maximal-performance benchmarks should show little runtime variance, differing only in length and clarity of source code. A more illustrative comparison measures the costs of idiomatic usage of each language's features. The code below shows the \CFA{} benchmark tests for a generic stack based on a singly-linked list; the test suite is equivalent for the other other languages, code for which is included in Appendix~\ref{generic-bench-app}. The experiment uses element types !int! and !pair(short, char)! and pushes $N = 40M$ elements on a generic stack, copies the stack, clears one of the stacks, and finds the maximum value in the other stack. The code below shows the \CFA{} benchmark tests for a generic stack based on a singly-linked list; the test suite is equivalent for the other languages, code for which is included in Appendix~\ref{generic-bench-app}. The experiment uses element types !int! and !pair(short, char)! and pushes $N = 4M$ elements on a generic stack, copies the stack, clears one of the stacks, and finds the maximum value in the other stack. \begin{cfa} The four versions of the benchmark implemented are C with !void*!-based polymorphism, \CFA{} with parametric polymorphism, \CC{} with templates, and \CC{} using only class inheritance for polymorphism, denoted \CCV{}. The \CCV{} variant illustrates an alternative object-oriented idiom where all objects inherit from a base !object! class, mimicking a Java-like interface; in particular, runtime checks are necessary to safely downcast objects. The \CCV{} variant illustrates an alternative object-oriented idiom where all objects inherit from a base !object! class, a language design similar to Java 4; in particular, runtime checks are necessary to safely downcast objects. The most notable difference among the implementations is the memory layout of generic types: \CFA{} and \CC{} inline the stack and pair elements into corresponding list and pair nodes, while C and \CCV{} lack such capability and, instead, must store generic objects via pointers to separately allocated objects. Note that the C benchmark uses unchecked casts as C has no runtime mechanism to perform such checks, whereas \CFA{} and \CC{} provide type safety statically. The C and \CCV{} variants are generally the slowest and have the largest memory footprint, due to their less-efficient memory layout and the pointer indirection necessary to implement generic types in those languages; this inefficiency is exacerbated by the second level of generic types in the pair benchmarks. By contrast, the \CFA{} and \CC{} variants run in roughly equivalent time for both the integer and pair because of the equivalent storage layout, with the inlined libraries (\ie{} no separate compilation) and greater maturity of the \CC{} compiler contributing to its lead. By contrast, the \CFA{} and \CC{} variants run in noticeably less time for both the integer and pair because of the equivalent storage layout, with the inlined libraries (\ie{} no separate compilation) and greater maturity of the \CC{} compiler contributing to its lead. \CCV{} is slower than C largely due to the cost of runtime type checking of downcasts (implemented with !dynamic_cast!); the outlier for \CFA{}, pop !pair!, results from the complexity of the generated-C polymorphic code. The gcc compiler is unable to optimize some dead code and condense nested calls; a compiler designed for \CFA{} could more easily perform these optimizations. Finally, the binary size for \CFA{} is larger because of static linking with \CFA{} libraries. Finally, the binary size for \CFA{} is larger because of static linking with the \CFA{} prelude library, which includes function definitions for all the built-in operators. \CFA{} is also competitive in terms of source code size, measured as a proxy for programmer effort.
• ## doc/theses/aaron_moss_PhD/phd/introduction.tex

 rffe2fad \chapter{Introduction} The C programming language has had a wide-ranging impact on the design of software and programming languages. In the 30 years since its first standardization, it has consistently been one of the most popular programming languages, with billions of lines of C code still in active use, and tens of thousands of trained programmers producing it. The TIOBE index\cite{TIOBE} tracks popularity of programming languages over time, and C has never dropped below second place: The C programming language~\cite{C11} has had a wide-ranging impact on the design of software and programming languages. In the 30 years since its first standardization, it has consistently been one of the most popular programming languages, with billions of lines of C code still in active use, and tens of thousands of trained programmers producing it. The TIOBE index~\cite{TIOBE} tracks popularity of programming languages over time, and C has never dropped below second place: \begin{table}[h] The impact of C on programming language design is also obvious from Table~\ref{tiobe-table}; with the exception of Python, all of the top five languages use C-like syntax and control structures. \CC{} is even a largely backwards-compatible extension of C. \CC{}~\cite{C++} is even a largely backwards-compatible extension of C. Though its lasting popularity and wide impact on programming language design point to the continued relevance of C, there is also widespread desire of programmers for languages with more expressive power and programmer-friendly features; accommodating both maintenance of legacy C code and development of the software of the future is a difficult task for a single programming language. The new features make \CFA{} more powerful and expressive than C, while maintaining strong backward-compatibility with both C code and the procedural paradigm expected by C programmers. Unlike other popular C extensions like \CC{} and Objective-C, \CFA{} adds modern features to C without imposing an object-oriented paradigm to use them. However, these new features do impose a compile-time cost, particularly in the expression resolver, which must evaluate the typing rules of a significantly more complex type-system. However, these new features do impose a compile-time cost, particularly in the expression resolver, which must evaluate the typing rules of a significantly more complex type system. This thesis is focused on making \CFA{} a more powerful and expressive language, both by adding new features to the \CFA{} type system and ensuring that both added and existing features can be efficiently implemented in \CFACC{}, the \CFA{} reference compiler. \end{itemize} Though the direction and validation of this work is fairly narrowly focused on the \CFA{} programming language, the tools used and results obtained should be of interest to a wider compiler and programming language design community. In particular, with the addition of \emph{concepts} in \CCtwenty{} \cite{C++Concepts}, conforming \CC{} compilers must support a model of type assertions very similar to that in \CFA{}, and the algorithmic techniques used here may prove useful. Type environments are also widely modelled in compiler implementations, particularly for functional languages, though also increasingly commonly for other languages (such as Rust) which perform type inference; the type environment presented here may be useful to those language implementers. The prototype system, which implements the algorithmic contributions of this thesis, is the first performant type-checker implementation for a \CFA{}-style type system. As the existence of an efficient compiler is necessary for the practical viability of a programming language, the contributions of this thesis comprise a validation of the \CFA{} language design that was previously lacking. Though the direction and experimental validation of this work is fairly narrowly focused on the \CFA{} programming language, the tools used and results obtained should be of interest to a wider compiler and programming language design community. In particular, with the addition of \emph{concepts} in \CCtwenty{}~\cite{C++Concepts}, conforming \CC{} compilers must support a model of type assertions very similar to that in \CFA{}, and the algorithmic techniques used here may prove useful. Much of the difficulty of type-checking \CFA{} stems from the language design choice to allow overload selection from the context of a function call based on function return type in addition to the type of the arguments to the call; this feature allows the programmer to specify fewer redundant type annotations on functions that are polymorphic in their return type. As an example in \CC{}: \begin{C++} template T* zero() { return new T( 0 ); } int* z = zero();  $\C{// must specify int twice}$ \end{C++} \CFA{} allows !int* z = zero()!, which elides the second !int!. While the !auto! keyword in \CCeleven{} supports similar inference in a limited set of contexts (\eg{} !auto z = zero()!), the demonstration of the richer inference in \CFA{} raises possibilities for similar features in future versions of \CC{}. By contrast to \CC{}, Java~8~\cite{Java8} and Scala~\cite{Scala} use comparably powerful forms of type inference to \CFA{}, so the algorithmic techniques in this thesis may be effective for those languages' compiler implementors. Type environments are also widely modelled in compiler implementations, particularly for functional languages, though also increasingly commonly for other languages (such as Rust~\cite{Rust}) that perform type inference; the type environment presented here may be useful to those language implementors. One area of inquiry that is outside the scope of this thesis is formalization of the \CFA{} type system. Ditchfield~\cite{Ditchfield92} defined the $F_\omega^\ni$ polymorphic lambda calculus, which is the theoretical basis for the \CFA{} type system. Ditchfield did not, however, prove any soundness or completeness properties for $F_\omega^\ni$; such proofs remain future work. A number of formalisms other than $F_\omega^\ni$ could potentially be adapted to model \CFA{}. One promising candidate is \emph{local type inference} \cite{Pierce00,Odersky01}, which describes similar contextual propagation of type information; another is the polymorphic conformity-based model of the Emerald~\cite{Black90} programming language, which defines a subtyping relation on types that is conceptually similar to \CFA{} traits. These modelling approaches could potentially be used to extend an existing formal semantics for C such as Cholera \cite{Norrish98}, CompCert \cite{Leroy09}, or Formalin \cite{Krebbers14}.

• ## doc/theses/aaron_moss_PhD/phd/resolution-heuristics.tex

 rffe2fad \label{resolution-chap} % consider using "satisfaction" throughout when talking about assertions % "valid" instead of "feasible" interpretations The main task of the \CFACC{} type-checker is \emph{expression resolution}, determining which declarations the identifiers in each expression correspond to. Resolution is a straightforward task in C, as no declarations share identifiers, but in \CFA{} the name overloading features discussed in Section~\ref{overloading-sec} generate multiple candidate declarations for each identifier. The main task of the \CFACC{} type-checker is \emph{expression resolution}: determining which declarations the identifiers in each expression correspond to. Resolution is a straightforward task in C, as no simultaneously-visible declarations share identifiers, but in \CFA{}, the name overloading features discussed in Section~\ref{overloading-sec} generate multiple candidate declarations for each identifier. A given matching between identifiers and declarations in an expression is an \emph{interpretation}; an interpretation also includes information about polymorphic type bindings and implicit casts to support the \CFA{} features discussed in Sections~\ref{poly-func-sec} and~\ref{implicit-conv-sec}, each of which increase the number of valid candidate interpretations. To choose among valid interpretations, a \emph{conversion cost} is used to rank interpretations. Hence, the expression resolution problem is to find the unique minimal-cost interpretation for an expression, reporting an error if no such interpretation exists. This conversion cost is summed over all subexpression interpretations in the interpretation of a top-level expression. Hence, the expression resolution problem is to find the unique minimal-cost interpretation for an expression, reporting an error if no such unique interpretation exists. \section{Expression Resolution} The expression resolution pass in \CFACC{} must traverse an input expression, match identifiers to available declarations, rank candidate interpretations according to their conversion cost, and check type assertion satisfaction for these candidates. Once the set of valid interpretations for the top-level expression is found, the expression resolver selects the unique minimal-cost candidate or reports an error. The expression resolution problem in \CFA{} is more difficult than the analogous problems in C or \CC{}. As mentioned above, the lack of name overloading in C (except for built-in operators) makes its resolution problem substantially easier. A comparison of the richer type systems in \CFA{} and \CC{} highlights some of the challenges in \CFA{} expression resolution. The key distinction between \CFA{} and \CC{} resolution is that \CC{} uses a greedy algorithm for selection of candidate functions given their argument interpretations, whereas \CFA{} allows contextual information from superexpressions to influence the choice among candidate functions. One key use of this contextual information is for type inference of polymorphic return types; \CC{} requires explicit specification of template parameters that only occur in a function's return type, while \CFA{} allows the instantiation of these type parameters to be inferred from context (and in fact does not allow explicit specification of type parameters to a function), as in the following example: \begin{cfa} forall(dtype T) T& deref(T*); $\C{// dereferences pointer}$ forall(otype T) T* def(); $\C{// new heap-allocated default-initialized value}$ int& i = deref( def() ); \end{cfa} In this example, the \CFA{} compiler infers the type arguments of !deref! and !def! from the !int&! type of !i!; \CC{}, by contrast, requires a type parameter on !def!\footnote{The type parameter of \lstinline{deref} can be inferred from its argument.}, \ie{} !deref( def() )!. Similarly, while both \CFA{} and \CC{} rank candidate functions based on a cost metric for implicit conversions, \CFA{} allows a suboptimal subexpression interpretation to be selected if it allows a lower-cost overall interpretation, while \CC{} requires that each subexpression interpretation have minimal cost. Because of this use of contextual information, the \CFA{} expression resolver must consider multiple interpretations of each function argument, while the \CC{} compiler has only a single interpretation for each argument\footnote{With the exception of address-of operations on functions.}. Additionally, until the introduction of concepts in \CCtwenty{} \cite{C++Concepts}, \CC{} expression resolution has no analogue to \CFA{} assertion satisfaction checking, a further  complication for a \CFA{} compiler. The precise definition of \CFA{} expression resolution in this section further expands on the challenges of this problem. \subsection{Type Unification} \subsection{Conversion Cost} \label{conv-cost-sec} C does not have an explicit cost model for implicit conversions, but the usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11} used to decide which arithmetic operators to use define one implicitly. \CFA{}, like C, allows inexact matches between the type of function parameters and function call arguments. Both languages insert \emph{implicit conversions} in these situations to produce an exact type match, and \CFA{} also uses the relative \emph{cost} of different conversions to select among overloaded function candidates. C does not have an explicit cost model for implicit conversions, but the usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11} used to decide which arithmetic operators to apply define one implicitly. The only context in which C has name overloading is the arithmetic operators, and the usual arithmetic conversions define a \emph{common type} for mixed-type arguments to binary arithmetic operators. Since for backward-compatibility purposes the conversion costs of \CFA{} must produce an equivalent result to these common type rules, it is appropriate to summarize \cite[\S{}6.3.1.8]{C11} here: \begin{itemize} \item If either operand is a floating-point type, the common type is the size of the largest floating-point type. If either operand is !_Complex!, the common type is also !_Complex!. \item If either operand is a floating-point type, the common type is the size of the largest floating-point type. If either operand is !_Complex!, the common type is also \linebreak !_Complex!. \item If both operands are of integral type, the common type has the same size\footnote{Technically, the C standard defines a notion of \emph{rank} in \cite[\S{}6.3.1.1]{C11}, a distinct value for each \lstinline{signed} and \lstinline{unsigned} pair; integral types of the same size thus may have distinct ranks. For instance, though \lstinline{int} and \lstinline{long} may have the same size, \lstinline{long} always has greater rank. The standard-defined types are declared to have greater rank than any types of the same size added as compiler extensions.} as the larger type. \item If the operands have opposite signedness, the common type is !signed! if the !signed! operand is strictly larger, or !unsigned! otherwise. If the operands have the same signedness, the common type shares it. With more specificity, the cost is a lexicographically-ordered tuple, where each element corresponds to a particular kind of conversion. In Bilson's design, conversion cost is a 3-tuple, $(unsafe, poly, safe)$, where $unsafe$ is the count of unsafe (narrowing) conversions, $poly$ is the count of polymorphic type bindings, and $safe$ is the sum of the degree of safe (widening) conversions. Degree of safe conversion is calculated as path weight in a directed graph of safe conversions between types; Bilson's version and the current version of this graph are in Figures~\ref{bilson-conv-fig} and~\ref{extended-conv-fig}, respectively. Degree of safe conversion is calculated as path weight in a directed graph of safe conversions between types; Bilson's version of this graph is in Figure~\ref{bilson-conv-fig}. The safe conversion graph is designed such that the common type $c$ of two types $u$ and $v$ is compatible with the C standard definitions from \cite[\S{}6.3.1.8]{C11} and can be calculated as the unique type minimizing the sum of the path weights of $\overrightarrow{uc}$ and $\overrightarrow{vc}$. The following example lists the cost in the Bilson model of calling each of the following functions with two !int! parameters: The following example lists the cost in the Bilson model of calling each of the following functions with two !int! parameters, where the interpretation with the minimum total cost will be selected: \begin{cfa} \begin{cfa} forall(dtype T | { T& ++?(T&); }) T& advance$$$1$$$(T& i, int n); forall(dtype T | { T& ++?(T&); T& ?+=?(T&, int)}) T& advance$$$2$$$(T& i, int n); forall(dtype T | { T& ++?(T&); }) T& advance$$$_1$$$(T& i, int n); forall(dtype T | { T& ++?(T&); T& ?+=?(T&, int)}) T& advance$$$_2$$$(T& i, int n); \end{cfa} \begin{cfa} forall(otype T, otype U) void f$$$_1$$$(T, U);  $\C[3.25in]{// polymorphic}$ forall(otype T) void f$$$_2$$$(T, T);  $\C[3.25in]{// less polymorphic}$ forall(otype T) void f$$$_3$$$(T, int);  $\C[3.25in]{// even less polymorphic}$ forall(otype T) void f$$$_4$$$(T*, int); $\C[3.25in]{// least polymorphic}$ forall(otype T, otype U) void f$$$_1$$$(T, U);  $\C[3.125in]{// polymorphic}$ forall(otype T) void f$$$_2$$$(T, T);  $\C[3.125in]{// less polymorphic}$ forall(otype T) void f$$$_3$$$(T, int);  $\C[3.125in]{// even less polymorphic}$ forall(otype T) void f$$$_4$$$(T*, int); $\C[3.125in]{// least polymorphic}$ \end{cfa} The new cost model accounts for the fact that functions with more polymorphic variables are less constrained by introducing a $var$ cost element that counts the number of type variables on a candidate function. In the example above, !f!$_1$ has $var = 2$, while the others have $var = 1$. The new cost model also accounts for a nuance unhandled by Ditchfield or Bilson, in that it makes the more specific !f!$_4$ cheaper than the more generic !f!$_3$; !f!$_4$ is presumably somewhat optimized for handling pointers, but the prior \CFA{} cost model could not account for the more specific binding, as it simply counted the number of polymorphic unifications. In the modified model, each level of constraint on a polymorphic type in the parameter list results in a decrement of the $specialization$ cost element, which is shared with the count of assertions due to their common nature as constraints on polymorphic type bindings. Thus, all else equal, if both a binding to !T! and a binding to !T*! are available, the model chooses the more specific !T*! binding with $specialization = -1$. For multi-argument generic types, the least-specialized polymorphic parameter sets the specialization cost, \eg{} the specialization cost of !pair(T, S*)! is $-1$ (from !T!) rather than $-2$ (from !S!). Specialization cost is not counted on the return type list; since $specialization$ is a property of the function declaration, a lower specialization cost prioritizes one declaration over another. User programmers can choose between functions with varying parameter lists by adjusting the arguments, but the same is not true in general of varying return types\footnote{In particular, as described in Section~\ref{expr-cost-sec}, cast expressions take the cheapest valid and convertable interpretation of the argument expression, and expressions are resolved as a cast to \lstinline{void}. As a result of this, including return types in the $specialization$ cost means that a function with return type \lstinline{T*} for some polymorphic type \lstinline{T} would \emph{always} be chosen over a function with the same parameter types returning \lstinline{void}, even for \lstinline{void} contexts, an unacceptably counter-intuitive result.}, so the return types are omitted from the $specialization$ element. User programmers can choose between functions with varying parameter lists by adjusting the arguments, but the same is not true in general of varying return types\footnote{In particular, as described in Section~\ref{expr-cost-sec}, cast expressions take the cheapest valid and convertible interpretation of the argument expression, and expressions are resolved as a cast to \lstinline{void}. As a result of this, including return types in the $specialization$ cost means that a function with return type \lstinline{T*} for some polymorphic type \lstinline{T} would \emph{always} be chosen over a function with the same parameter types returning \lstinline{void}, even for \lstinline{void} contexts, an unacceptably counter-intuitive result.}, so the return types are omitted from the $specialization$ element. Since both $vars$ and $specialization$ are properties of the declaration rather than any particular interpretation, they are prioritized less than the interpretation-specific conversion costs from Bilson's original 3-tuple. In the redesign, for consistency with the approach of the usual arithmetic conversions, which select a common type primarily based on size, but secondarily on sign, arcs in the new graph are annotated with whether they represent a sign change, and such sign changes are summed in a new $sign$ cost element that lexicographically succeeds $safe$. This means that sign conversions are approximately the same cost as widening conversions, but slightly more expensive (as opposed to less expensive in Bilson's graph), so maintaining the same signedness is consistently favoured. This refined conversion graph is shown in Figure~\ref{extended-conv-fig}. With these modifications, the current \CFA{} cost tuple is as follows: \subsection{Expression Cost} \label{expr-cost-sec} The mapping from \CFA{} expressions to cost tuples is described by Bilson in \cite{Bilson03}, and remains effectively unchanged modulo the refinements to the cost tuple described above. The mapping from \CFA{} expressions to cost tuples is described by Bilson in \cite{Bilson03}, and remains effectively unchanged with the exception of the refinements to the cost tuple described above. Nonetheless, some salient details are repeated here for the sake of completeness. In terms of the core argument-parameter matching algorithm, overloaded variables are handled the same as zero-argument function calls, aside from a different pool of candidate declarations and setup for different code generation. Similarly, an aggregate member expression !a.m! can be modelled as a unary function !m! that takes one argument of the aggregate type. Literals do not require sophisticated resolution, as in C the syntactic form of each implies their result types (!42! is !int!, !"hello"! is !char*!, \etc{}), though struct literals (\eg{} !(S){ 1, 2, 3 }! for some struct !S!) require resolution of the implied constructor call. Literals do not require sophisticated resolution, as in C the syntactic form of each implies their result types: !42! is !int!, !"hello"! is !char*!, \etc{}\footnote{Struct literals (\eg{} \lstinline|(S)\{ 1, 2, 3 \}| for some struct \lstinline{S}) are a somewhat special case, as they are known to be of type \lstinline{S}, but require resolution of the implied constructor call described in Section~\ref{ctor-sec}.}. Since most expressions can be treated as function calls, nested function calls are the primary component of complexity in expression resolution. \end{cfa} Considered independently, !g!$_1$!(42)! is the cheapest interpretation of !g(42)!, with cost $(0,0,0,0,0,0)$ since the argument type is an exact match. However, in context, an unsafe conversion is required to downcast the return type of !g!$_1$ to an !int! suitable for !f!, for a total cost of $(1,0,0,0,0,0)$ for !f( g!$_1$!(42) )!. If !g!$_2$ is chosen, on the other hand, there is a safe upcast from the !int! type of !42! to !long!, but no cast on the return of !g!$_2$, for a total cost of $(0,0,1,0,0,0)$ for !f( g!$_2$!(42) )!; as this is cheaper, !g!$_2$ is chosen. Due to this design, all valid interpretations of subexpressions must in general be propagated to the top of the expression tree before any can be eliminated, a lazy form of expression resolution, as opposed to the eager expression resolution allowed by C, where each expression can be resolved given only the resolution of its immediate subexpressions. Considered independently, !g!$_1$!(42)! is the cheapest interpretation of !g(42)!, with cost $(0,0,0,0,0,0,0)$ since the argument type is an exact match. However, in context, an unsafe conversion is required to downcast the return type of !g!$_1$ to an !int! suitable for !f!, for a total cost of $(1,0,0,0,0,0,0)$ for !f( g!$_1$!(42) )!. If !g!$_2$ is chosen, on the other hand, there is a safe upcast from the !int! type of !42! to !long!, but no cast on the return of !g!$_2$, for a total cost of $(0,0,1,0,0,0,0)$ for !f( g!$_2$!(42) )!; as this is cheaper, !g!$_2$ is chosen. Due to this design, all valid interpretations of subexpressions must in general be propagated to the top of the expression tree before any can be eliminated, a lazy form of expression resolution, as opposed to the eager expression resolution allowed by C or \CC{}, where each expression can be resolved given only the resolution of its immediate subexpressions. If there are no valid interpretations of the top-level expression, expression resolution fails and must produce an appropriate error message. \end{cfa} In C semantics, this example is unambiguously upcasting !32! to !unsigned long long!, performing the shift, then downcasting the result to !unsigned!, at total cost $(1,0,3,1,0,0,0)$. In C semantics, this example is unambiguously upcasting !32! to !unsigned long long!, performing the shift, then downcasting the result to !unsigned!, at cost $(1,0,3,1,0,0,0)$. If ascription were allowed to be a first-class interpretation of a cast expression, it would be cheaper to select the !unsigned! interpretation of !?>>?! by downcasting !x! to !unsigned! and upcasting !32! to !unsigned!, at a total cost of $(1,0,1,1,0,0,0)$. However, this break from C semantics is not backwards compatibile, so to maintain C compatibility, the \CFA{} resolver selects the lowest-cost interpretation of the cast argument for which a conversion or coercion to the target type exists (upcasting to !unsigned long long! in the example above, due to the lack of unsafe downcasts), using the cost of the conversion itself only as a tie-breaker. However, this break from C semantics is not backwards compatible, so to maintain C compatibility, the \CFA{} resolver selects the lowest-cost interpretation of the cast argument for which a conversion or coercion to the target type exists (upcasting to !unsigned long long! in the example above, due to the lack of unsafe downcasts), using the cost of the conversion itself only as a tie-breaker. For example, in !int x; double x; (int)x;!, both declarations have zero-cost interpretations as !x!, but the !int x! interpretation is cheaper to cast to !int!, and is thus selected. Thus, in contrast to the lazy resolution of nested function-call expressions discussed above, where final interpretations for each subexpression are not chosen until the top-level expression is reached, cast expressions introduce eager resolution of their argument subexpressions, as if that argument was itself a top-level expression. Pruning possible interpretations as early as possible is one way to reduce the real-world cost of expression resolution, provided that a sufficient proportion of interpretations are pruned to pay for the cost of the pruning algorithm. One opportunity for interpretation pruning is by the argument-parameter type matching, but the literature provides no clear answers on whether candidate functions should be chosen according to their available arguments, or whether argument resolution should be driven by the available function candidates. One opportunity for interpretation pruning is by the argument-parameter type matching, but the literature \cite{Baker82,Bilson03,Cormack81,Ganzinger80,Pennello80,PW:overload} provides no clear answers on whether candidate functions should be chosen according to their available arguments, or whether argument resolution should be driven by the available function candidates. For programming languages without implicit conversions, argument-parameter matching is essentially the entirety of the expression resolution problem, and is generally referred to as overload resolution'' in the literature. All expression-resolution algorithms form a DAG of interpretations, some explicitly, some implicitly; in this DAG, arcs point from function-call interpretations to argument interpretations, as in Figure~\ref{res-dag-fig} This approach of filtering out invalid types is unsuited to \CFA{} expression resolution, however, due to the presence of polymorphic functions and implicit conversions. Some other language designs solve the matching problem by forcing a bottom-up order. \CC{}, for instance, defines its overload-selection algorithm in terms of a partial order between function overloads given a fixed list of argument candidates, implying that the arguments must be selected before the function. This design choice improves worst-case expression resolution time by only propagating a single candidate for each subexpression, but type annotations must be provided for any function call that is polymorphic in its return type, and these annotations are often redundant: \begin{C++} template T* malloc() { /* ... */ } int* p = malloc(); $\C{// T = int must be explicitly supplied}$ \end{C++} \CFA{} saves programmers from redundant annotations with its richer inference: \begin{cfa} forall(dtype T | sized(T)) T* malloc(); int* p = malloc(); $\C{// Infers T = int from left-hand side}$ \end{cfa} Baker~\cite{Baker82} left empirical comparison of different overload resolution algorithms to future work; Bilson~\cite{Bilson03} described an extension of Baker's algorithm to handle implicit conversions and polymorphism, but did not further explore the space of algorithmic approaches to handle both overloaded names and implicit conversions. This thesis closes that gap in the literature by performing performance comparisons of both top-down and bottom-up expression resolution algorithms, with results reported in Chapter~\ref{expr-chap}. The assertion satisfaction algorithm designed by Bilson~\cite{Bilson03} for the original \CFACC{} is the most-relevant prior work to this project. Before accepting any subexpression candidate, Bilson first checks that that candidate's assertions can all be resolved; this is necessary due to Bilson's addition of assertion satisfaction costs to candidate costs (discussed in Section~\ref{conv-cost-sec}). If this subexpression interpretation ends up not being used in the final resolution, than the (sometimes substantial) work of checking its assertions ends up wasted. If this subexpression interpretation ends up not being used in the final resolution, then the (sometimes substantial) work of checking its assertions ends up wasted. Bilson's assertion checking function recurses on two lists, !need! and !newNeed!, the current declaration's assertion set and those implied by the assertion-satisfying declarations, respectively, as detailed in the pseudo-code below (ancillary aspects of the algorithm are omitted for clarity): During the course of checking the assertions of a single top-level expression, the results are cached for each assertion checked. The search key for this cache is the assertion declaration with its type variables substituted according to the type environment to distinguish satisfaction of the same assertion for different types. This adjusted assertion declaration is then run through the \CFA{} name-mangling algorithm to produce an equivalent string-type key. This adjusted assertion declaration is then run through the \CFA{} name-mangling algorithm to produce an equivalent string-type key. One superficially-promising optimization, which I did not pursue, is caching assertion-satisfaction judgments among top-level expressions. This approach would be difficult to correctly implement in a \CFA{} compiler, due to the lack of a closed set of operations for a given type. New declarations related to a particular type can be introduced in any lexical scope in \CFA{}, and these added declarations may cause an assertion that was previously satisfiable to fail due to an introduced ambiguity. Furthermore, given the recursive nature of assertion satisfaction and the possibility of this satisfaction judgment depending on an inferred type, an added declaration may break satisfaction of an assertion with a different name and that operates on different types. Given these concerns, correctly invalidating a cross-expression assertion satisfaction cache for \CFA{} is a non-trivial problem, and the overhead of such an approach may possibly outweigh any benefits from such caching. The assertion satisfaction aspect of \CFA{} expression resolution bears some similarity to satisfiability problems from logic, and as such other languages with similar trait and assertion mechanisms make use of logic-program solvers in their compilers. The combination of the assertion satisfaction elements of the problem with the conversion-cost model of \CFA{} makes this logic-solver approach difficult to apply in \CFACC{}, however. Expressing assertion resolution as a satisfiability problem ignores the cost optimization aspect, which is necessary to decide among what are often many possible satisfying assignments of declarations to assertions. (MaxSAT solvers \cite{Morgado13}, which allow weights on solutions to satisfiability problems, may be a productive avenue for future investigation.) On the other hand, the deeply-recursive nature of the satisfiability problem makes it difficult to adapt to optimizing solver approaches such as linear programming. To maintain a well-defined programming language, any optimization algorithm used must provide an exact (rather than approximate) solution; this constraint also rules out a whole class of approximately-optimal generalized solvers. The main challenge to implement this approach in \CFACC{} is applying the implicit conversions generated by the resolution process in the code-generation for the thunk functions that \CFACC{} uses to pass type assertions to their requesting functions with the proper signatures. Though performance of the existing algorithms is promising, some further optimizations do present themselves. One \CFA{} feature that could be added to improve the ergonomics of overload selection is an \emph{ascription cast}; as discussed in Section~\ref{expr-cost-sec}, the semantics of the C cast operator are to choose the cheapest argument interpretation which is convertible to the target type, using the conversion cost as a tie-breaker. An ascription cast would reverse these priorities, choosing the argument interpretation with the cheapest conversion to the target type, only using interpretation cost to break ties\footnote{A possible stricter semantics would be to select the cheapest interpretation with a zero-cost conversion to the target type, reporting a compiler error otherwise.}. This would allow ascription casts to the desired return type to be used for overload selection: \begin{cfa} int f$$$_1$$$(int); int f$$$_2$$$(double); int g$$$_1$$$(int); double g$$$_2$$$(long); f((double)42);  $\C[4.5in]{// select f$$_2$$ by argument cast}$ (as double)g(42); $\C[4.5in]{// select g$$_2$$ by return ascription cast}$ (double)g(42); $\C[4.5in]{// select g$$_1$$ NOT g$$_2$$ because of parameter conversion cost}$ \end{cfa} Though performance of the existing resolution algorithms is promising, some further optimizations do present themselves. The refined cost model discussed in Section~\ref{conv-cost-sec} is more expressive, but requires more than twice as many fields; it may be fruitful to investigate more tightly-packed in-memory representations of the cost-tuple, as well as comparison operations that require fewer instructions than a full lexicographic comparison. Integer or vector operations on a more-packed representation may prove effective, though dealing with the negative-valued $specialization$ field may require some effort.
• ## doc/theses/aaron_moss_PhD/phd/thesis.tex

 rffe2fad % Use the "hyperref" package % N.B. HYPERREF MUST BE THE LAST PACKAGE LOADED; ADD ADDITIONAL PKGS ABOVE %\usepackage[pdftex,pagebackref=false]{hyperref} % with basic options \usepackage[pagebackref=false]{hyperref} %\usepackage[pdftex,pagebackref=false]{hyperref} % with basic options\ \usepackage[breaklinks,pagebackref=false]{hyperref} % N.B. pagebackref=true provides links back from the References to the body text. This can cause trouble for printing.
• ## doc/theses/aaron_moss_PhD/phd/type-environment.tex

 rffe2fad For purposes of this chapter, a \emph{type environment} $T$ is a set of \emph{type classes} $\myset{T_1, T_2, \cdots, T_{|T|}}$. Each type class $T_i$ contains a set of \emph{type variables} $\myset{v_{i,1}, v_{i,2}, \cdots, v_{i,|T_i|}}$. Since the type classes represent an equivalence relation over the type variables the sets of variables contained in two distinct classes in the same environment must be disjoint. Each individual type class $T_i$ may also be associated with a \emph{bound}, $b_i$; this bound contains the \emph{bound type} that the variables in the type class are replaced with, but also includes other information in \CFACC{}, including whether type conversions are permissible on the bound type and what sort of type variables are contained in the class (data types, function types, or variadic tuples). Since the type classes represent an equivalence relation over the type variables the sets of variables contained in two distinct classes in the same environment must be \emph{disjoint}. Each individual type class $T_i$ may also be associated with a \emph{bound}, $b_i$; this bound contains the \emph{bound type} that the variables in the type class are replaced with, but also includes other information in \CFACC{}, including whether type conversions are permissible on the bound type and what sort of type variables are contained in the class (data types, function types, or variadic tuples). The following example demonstrates the use of a type environment for unification: \begin{cfa} forall(otype F) F f(F, F); forall(otype G) G g(G); f( g(10), g(20) ); \end{cfa} Expression resolution starts from an empty type environment; from this empty environment, the calls to !g! can be independently resolved. These resolutions result in two new type environments, $T = \{ \myset{\mathsf{G}_1} \rightarrow$ !int!$\}$ and $T' = \{ \myset{\mathsf{G}_2} \rightarrow$ !int!$\}$; the calls to !g! have generated distinct type variables !G!$_1$ and !G!$_2$, each bound to !int! by unification with the type of its argument (!10! and !20!, both !int!). To complete resolution of the call to !f!, both environments must be combined; resolving the first argument to !f! produces a new type environment $T'' = \{ \myset{\mathsf{G}_1, \mathsf{F}_1} \rightarrow$ !int!$\}$: the new type variable !F!$_1$ has been introduced and unified with !G!$_1$ (the return type of !g(10)!), and consequently bound to !int!. To resolve the second argument to !f!, $T''$ must be checked for compatibility with $T'$; since !F!$_1$ unifies with !G!$_2$, their type classes must be merged. Since both !F!$_1$ and !G!$_2$ are bound to !int!, this merge succeeds, producing the final environment $T'' = \{ \myset{\mathsf{G}_1, \mathsf{F}_1, \mathsf{G}_2} \rightarrow$ !int!$\}$. \begin{table} \end{table} Given this basic structure, type environments in \CFACC{} need to support eleven basic operations, summarized in Table~\ref{env-op-table}. Type environments in \CFACC{} need to support eleven basic operations, summarized in Table~\ref{env-op-table}. The first six operations are straightforward queries and updates on these data structures: The lookup operation $find(T, v_{i,j})$ produces $T_i$, the type class in $T$ that contains variable $v_{i,j}$, or an invalid sentinel value for no such class. The other two query operations act on type classes, where $report(T_i)$ produces the set $\myset{v_{i,1}, v_{i,2}, \cdots, v_{i,|T_i|}}$ of all type variables in a class $T_i$ and $bound(T_i)$ produces the bound $b_i$ of that class, or a sentinel indicating no bound is set. The update operation $insert(T, v_{i,1})$ creates a new type class $T_i$ in $T$ that contains only the variable $v_{i,1}$ and no bound; due to the disjointness property, $v_{i,1}$ cannot belong to any other type class in $T$. The update operation $insert(T, v_{i,1})$ creates a new type class $T_i$ in $T$ that contains only the variable $v_{i,1}$ and no bound; due to the disjointness property, $v_{i,1}$ must not belong to any other type class in $T$. The $add(T_i, v_{i,j})$ operation adds a new type variable $v_{i,j}$ to class $T_i$; again, $v_{i,j}$ cannot exist elsewhere in $T$. $bind(T_i, b_i)$ mutates the bound for a type class, setting or updating the current bound. \subsection{Na\"{\i}ve} \label{naive-env-sec} The type environment data structure used in Bilson's~\cite{Bilson03} original implementation of \CFACC{} is a straightforward translation of the definitions in Section~\ref{env-defn-sec} to \CC{} code; a !TypeEnvironment! contains a list of !EqvClass! type equivalence classes, each of which contains the type bound information and a tree-based sorted set of type variables. The type environment data structure used in Bilson's~\cite{Bilson03} original implementation of \CFACC{} is a simple translation of the definitions in Section~\ref{env-defn-sec} to \CC{} code; a !TypeEnvironment! contains a list of !EqvClass! type equivalence classes, each of which contains the type bound information and a tree-based sorted set of type variables. This approach has the benefit of being easy to understand and not imposing life-cycle or inheritance constraints on its use, but, as can be seen in Table~\ref{env-bounds-table}, does not support many of the desired operations with any particular efficiency. Some variations on this structure may improve performance somewhat; for instance, replacing the !EqvClass! variable storage with a hash-based set reduces search and update times from $O(\log n)$ to amortized $O(1)$, while adding an index for the type variables in the entire environment removes the need to check each type class individually to maintain the disjointness property. In particular, the type-class bound cannot be easily included in the union-find data structure, as the requirement to make it the class representative breaks the balancing properties of $union$, and requires too-close integration of the type environment $unifyBound$ internal operation. This issue can be solved by including a side map from class representatives to the type-class bound. If placeholder values are inserted in this map for type classes without bounds than this also has the useful property that the key set of the map provides an easily obtainable list of all the class representatives, a list which cannot be derived from the union-find data structure without a linear search for class representatives through all elements. If placeholder values are inserted in this map for type classes without bounds then this also has the useful property that the key set of the map provides an easily obtainable list of all the class representatives, a list which cannot be derived from the union-find data structure without a linear search for class representatives through all elements. \subsection{Union-Find with Classes} \label{env-union-find-classes-approach} The na\"{\i}ve $combine$ operation must traverse each of the classes of one environment, merging in any class of the other environment that shares a type variable. Since there are at most $n$ classes to unify, the unification cost is $O(nm + nu(n))$, while traversal and $find$ costs to locate classes to merge total $O(n^2m)$, for an overall cost of $O(n^2m + nu(n))$. The incremental $combine$ operation works similarly, but only needs to consider classes modified in either environment with respect to the common ancestor of both environments, allowing the $n$ cost terms to be substituted for $p$, for an overall cost of $O(p^m + pu(n))$. The incremental $combine$ operation works similarly, but only needs to consider classes modified in either environment with respect to the common ancestor of both environments, allowing the $n$ cost terms to be substituted for $p$, for an overall cost of $O(p^2m + pu(n))$. Neither variant supports the $split$ operation to undo a $unify$. This persistent union-find data structure is efficient, but not thread-safe; as suggested in Section~\ref{resn-conclusion-sec}, it may be valuable to parallelize the \CFA{} expression resolver. However, allowing multiple threads concurrent access to the persistent data structure is likely to result in `reroot thrashing'', as different threads reroot the data structure to their own versions of interest. This contention could be mitigated by partitioning the data structure into separate subtrees for each thread, with each subtree having its own root node, and the boundaries among them implemented with a lock-equipped !ThreadBoundary! edit node. This contention could be mitigated by partitioning the data structure into separate subtrees for each thread, with each subtree having its own root node, and the boundaries among them implemented with a lock-equipped !ThreadBoundary! edit node. Alternatively, the concurrent hash trie of Prokopec \etal{} \cite{Prokopec11,Prokopec12} may be a useful hash-table replacement.

• ## libcfa/src/stdlib.hfa

 rffe2fad // Created On       : Thu Jan 28 17:12:35 2016 // Last Modified By : Peter A. Buhr // Last Modified On : Mon Dec 17 15:37:45 2018 // Update Count     : 346 // Last Modified On : Wed Apr 24 17:35:43 2019 // Update Count     : 352 // } // malloc // T & malloc( void ) { //      int & p = *(T *)(void *)malloc( (size_t)sizeof(T) ); // C malloc //      printf( "& malloc %p\n", &p ); //      return p; //      //      return (T &)*(T *)(void *)malloc( (size_t)sizeof(T) ); // C malloc // } // malloc T * calloc( size_t dim ) { return (T *)(void *)calloc( dim, sizeof(T) );   // C calloc T * alloc( char fill ) { T * ptr = (T *)(void *)malloc( (size_t)sizeof(T) );     // C malloc return (T *)memset( ptr, (int)fill, sizeof(T) );        // initial with fill value return (T *)memset( ptr, (int)fill, sizeof(T) ); // initialize with fill value } // alloc T * alloc( size_t dim, char fill ) { T * ptr = (T *)(void *)malloc( dim * (size_t)sizeof(T) ); // C malloc return (T *)memset( ptr, (int)fill, dim * sizeof(T) );    // initial with fill value T * ptr = (T *)(void *)malloc( dim * (size_t)sizeof(T) ); // C calloc return (T *)memset( ptr, (int)fill, dim * sizeof(T) ); // initialize with fill value } // alloc
• ## src/Makefile.am

 rffe2fad ___driver_cfa_cpp_LDADD = -ldl                  # yywrap AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O2 -g -std=c++14 if WITH_LIBPROFILER ___driver_cfa_cpp_LDADD += -lprofiler endif if WITH_LIBTCMALLOC ___driver_cfa_cpp_LDADD += -ltcmalloc endif AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O3 -g -std=c++14 AM_LDFLAGS  = @HOST_FLAGS@ -Xlinker -export-dynamic ARFLAGS     = cr
• ## src/Makefile.in

 rffe2fad ___driver_cfa_cpp_SOURCES = $(SRC) ___driver_cfa_cpp_LDADD = -ldl # yywrap AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O2 -g -std=c++14 AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I\$(srcdir)/include -DYY_NO_INPUT -O3 -g -std=c++14 AM_LDFLAGS = @HOST_FLAGS@ -Xlinker -export-dynamic ARFLAGS = cr @rm BasicTypes-gen @WITH_LIBPROFILER_TRUE@ ___driver_cfa_cpp_LDADD += -lprofiler @WITH_LIBTCMALLOC_TRUE@ ___driver_cfa_cpp_LDADD += -ltcmalloc # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded.
Note: See TracChangeset for help on using the changeset viewer.