From 69e76897047257646c628b10324a3223fc7da166 Mon Sep 17 00:00:00 2001
From: Mathnerd314 <mathnerd314.gph+hs@gmail.com>
Date: Mon, 15 Jan 2024 15:41:38 -0700
Subject: [PATCH] fix Sphinx warnings

---
 docs/Commentary/Implementation/Compiler.rst   |   65 +-
 docs/Commentary/Implementation/CoreSyntax.rst |    2 +-
 docs/Commentary/Implementation/Errors.rst     |    2 +-
 docs/Commentary/Implementation/IR.rst         |   15 +-
 .../Implementation/Implementation.rst         |   18 +
 .../Memory.rst}                               |  302 +--
 .../Implementation/TermRewriting.rst          |  161 +-
 .../Language/Evaluation-Strategy.rst          |  151 +-
 .../{Implementation => Language}/Fastest.rst  |    0
 docs/Commentary/Language/FunctionalLogic.rst  |  126 +-
 docs/Commentary/Language/LogicProgramming.rst |   36 -
 docs/Commentary/Language/Memory.rst           |  186 ++
 docs/Commentary/Language/Objects.rst          |    8 +-
 docs/Commentary/Language/Sets.rst             |    8 +-
 docs/Commentary/Language/State.rst            |    6 +-
 docs/Commentary/Language/TermRewriting.rst    |  133 +-
 docs/Commentary/Language/Types.rst            |    2 +-
 docs/Commentary/Language/Values.rst           |    4 +-
 .../Commentary/Libraries/Compiler-Library.rst |    9 +-
 docs/Commentary/Libraries/Parsing.rst         |   18 +-
 docs/Commentary/Libraries/Syntax.rst          |  337 ++-
 docs/Commentary/Libraries/Units.rst           |    4 +-
 docs/Commentary/Meta/Code-of-conduct.rst      |    2 +-
 docs/Commentary/Meta/Guidelines.rst           |    5 +-
 docs/Commentary/Meta/Learning.rst             |    3 +-
 docs/Commentary/Meta/OtherPL.rst              |    2 +-
 docs/Commentary/index.rst                     |    6 +-
 docs/GettingStarted/FAQ.rst                   |   17 -
 docs/Reference/Errors.rst                     |    2 +
 docs/Reference/Modules.rst                    |    9 +-
 docs/Reference/Values.rst                     |    4 +-
 docs/conf.py                                  |    3 +
 docs/index.rst                                |    1 -
 docs/references.bib                           | 2058 ++++++++++++++++-
 34 files changed, 2750 insertions(+), 955 deletions(-)
 rename docs/Commentary/{Language/Memory-Management.rst => Implementation/Memory.rst} (53%)
 rename docs/Commentary/{Implementation => Language}/Fastest.rst (100%)
 create mode 100644 docs/Commentary/Language/Memory.rst

diff --git a/docs/Commentary/Implementation/Compiler.rst b/docs/Commentary/Implementation/Compiler.rst
index 32f78c8..f2e52ce 100644
--- a/docs/Commentary/Implementation/Compiler.rst
+++ b/docs/Commentary/Implementation/Compiler.rst
@@ -189,11 +189,14 @@ Being able to break big complicated bytecode instructions down into more simple
 
 
 Example: Fibonacci function
-def fibonacci(n)
-  a, b = 0, 1
-  for _ in range(n): # inner loop
-    a, b = b, a + b # update a and b by adding them together
-  return a
+
+.. code-block:: python
+
+  def fibonacci(n)
+    a, b = 0, 1
+    for _ in range(n): # inner loop
+      a, b = b, a + b # update a and b by adding them together
+    return a
 
 The bytecode for the loop is something like this:
 FOR_ITER
@@ -229,35 +232,39 @@ LuaJIT
 
 see paper for benchmarks, of course multiple tiers are better, but tl;dr is copy-and-patch is a nice middle tier. It is a template JIT compiler. In particular, it works by copying over a static pre-compiled machine code "template" into executable memory, and then going through that machine code and patching up instructions that need to have runtime data encoded in them. This is sort of like the relocation phase of linking/loading an ELF file. And actually we can use LLVM to build an ELF object file and generate our templates. For example:
 
-extern int MAGICALLY_INSERT_THE_OPARG;
-extern int MAGICALLY_CONTINUE_EXECUTION(_PyInterpreterFrame *frame, PyObject **stack_pointer);
-int load_fast(_PyInterpreterFrame *frame, PyObject **stack_pointer)
-{
-  int oparg = &MAGICALLY_INSERT_THE_OPARG;
-  PyObject *value = frame->localsplus[oparg];
-  Py_INCREF(value);
-  *stack_pointer++ = value;
-  __attribute__((musttail)) return MAGICALLY_CONTINUE_EXECUTION(frame, stack_pointer);
-}
+.. code-block:: c
+
+  extern int MAGICALLY_INSERT_THE_OPARG;
+  extern int MAGICALLY_CONTINUE_EXECUTION(_PyInterpreterFrame *frame, PyObject **stack_pointer);
+  int load_fast(_PyInterpreterFrame *frame, PyObject **stack_pointer)
+  {
+    int oparg = &MAGICALLY_INSERT_THE_OPARG;
+    PyObject *value = frame->localsplus[oparg];
+    Py_INCREF(value);
+    *stack_pointer++ = value;
+    __attribute__((musttail)) return MAGICALLY_CONTINUE_EXECUTION(frame, stack_pointer);
+  }
 
 So there are extern placeholders for inserting the oparg and continuing execution.
 For the oparg, we use the address of the extern for our oparg. This generates more efficient code because the relocation inserts the constant directly, instead of needing to dereference the address.
 And for continuing execution, we use LLVM's `musttail` so we get a single jump to the next opcode, and even better, if that jump happens to be of length zero, we can just skip the jump entirely. So, the object file that we get out of this looks like this:
 
-.static
-00: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax
-0a: 48 98 cltq
-0c: 49 8b 44 c5 48 movq 0x48(%r13,%rax,8), %rax
-11: 8b 08 movl (%rax), %ecx
-13: ff c1 incl %ecx
-15: 74 02 je 0x19 <load_fast+0x19>
-17: 89 08 movl %ecx, (%rax)
-19: 48 89 45 00 movq %rax, (%rbp)
-1d: 48 83 c5 08 addq $0x8, %rbp
-21: e9 00 00 00 00 jmp 0x26 <load_fast+0x26>
-.reloc
-02: R_X86_64_64 MAGICALLY_INSERT_THE_OPARG
-22: R_X86_64_PLT32 MAGICALLY_CONTINUE_EXECUTION - 0x4
+.. code-block:: none
+
+  .static
+  00: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax
+  0a: 48 98 cltq
+  0c: 49 8b 44 c5 48 movq 0x48(%r13,%rax,8), %rax
+  11: 8b 08 movl (%rax), %ecx
+  13: ff c1 incl %ecx
+  15: 74 02 je 0x19 <load_fast+0x19>
+  17: 89 08 movl %ecx, (%rax)
+  19: 48 89 45 00 movq %rax, (%rbp)
+  1d: 48 83 c5 08 addq $0x8, %rbp
+  21: e9 00 00 00 00 jmp 0x26 <load_fast+0x26>
+  .reloc
+  02: R_X86_64_64 MAGICALLY_INSERT_THE_OPARG
+  22: R_X86_64_PLT32 MAGICALLY_CONTINUE_EXECUTION - 0x4
 
 We have the machine code, and the relocations, and we know the calling convention. And so we can take this, parse it out and put it in static header files as data, and then we can implement copy and patch for real. There is python code https://github.com/brandtbucher/cpython/tree/justin/Tools/jit (c4904e44167de6d3f7a1f985697710fd8219b3b2) that handles actually extracting all the cases, compiling each one, parsing out the ELF (by dumping with LLVM to JSON), and then generating the header files. Then the final build has no LLVM dependency and is a self-contained JIT. And because clang/LLVM is portable, you can cross-compile for all platforms from Linux, or do whatever.
 
diff --git a/docs/Commentary/Implementation/CoreSyntax.rst b/docs/Commentary/Implementation/CoreSyntax.rst
index 6c425e0..74175bb 100644
--- a/docs/Commentary/Implementation/CoreSyntax.rst
+++ b/docs/Commentary/Implementation/CoreSyntax.rst
@@ -17,7 +17,7 @@ We use a simple program, boolean "and", presented in a Haskell-ish language:
 Derivation tree
 ~~~~~~~~~~~~~~~
 
-See the connectives :math:`\text{B} = \text{Bool}` and :math:`\multimap` defined :ref:`above <connectives>`. :math:`\multimap` is right associative as usual. Our program then has the following derivation tree, among others (we could add a bang to the first argument, use a multiple-argument function instead of currying, expand out the identity, etc.).
+See the connectives :math:`\text{B} = \text{Bool}` and :math:`\multimap` defined in :ref:`Reference/Logic:Common connectives`. :math:`\multimap` is right associative as usual. Our program then has the following derivation tree, among others (we could add a bang to the first argument, use a multiple-argument function instead of currying, expand out the identity, etc.).
 
 .. image:: /_static/Stroscot_AND_Proof_Tree.svg
 
diff --git a/docs/Commentary/Implementation/Errors.rst b/docs/Commentary/Implementation/Errors.rst
index 1090668..d6b30a3 100644
--- a/docs/Commentary/Implementation/Errors.rst
+++ b/docs/Commentary/Implementation/Errors.rst
@@ -101,7 +101,7 @@ The wording may be important. A Java editor called Decaf intercepted and re-word
 
   StackOverflow and compiler error messages used 3 argument layouts: claim alone, a simple argument consisting of claim, grounds, and warrant, and an extended argument which is a simple argument plus backing. These layouts are multiplied times 2 depending on whether there was a resolution in the claim; my notation is that "claim" means a claim without resolution. The tested results were claim < {simple,extended}, extended < claim+resolution (claim+resolution being dubbed a non-logical "quick fix" instruction).
 
-  Per the thesis :cite:`barikErrorMessagesRational` extended arguments are mainly useful for novices and unfamiliar code. Theorizing, if the developer knows what's going on, they likely want brief messages and their preference is claim+resolution > simple > extended > others. But with an ``--explain`` flag their preference is more like extended+resolution > simple+resolution > claim+resolution > extended > simple > others. It's probably worth a survey comparing error messages of varying verbosities to confirm.
+  Per the thesis :cite:`barikErrorMessagesRational2018` extended arguments are mainly useful for novices and unfamiliar code. Theorizing, if the developer knows what's going on, they likely want brief messages and their preference is claim+resolution > simple > extended > others. But with an ``--explain`` flag their preference is more like extended+resolution > simple+resolution > claim+resolution > extended > simple > others. It's probably worth a survey comparing error messages of varying verbosities to confirm.
 
 * Report errors at the right time: Generally one wants to see errors as soon as possible, using static analysis tools.
 
diff --git a/docs/Commentary/Implementation/IR.rst b/docs/Commentary/Implementation/IR.rst
index 3f10893..8b9f08b 100644
--- a/docs/Commentary/Implementation/IR.rst
+++ b/docs/Commentary/Implementation/IR.rst
@@ -195,7 +195,7 @@ Turner writes https://www.cs.kent.ac.uk/people/staff/dat/miranda/manual/30.html:
 
 1. easier to read - six separate chunks of information rather than one big one
 2. easier to debug - each of its functions can be exercised separately, on appropriate test data, within a Miranda session
-3. more robust for future development - for example if we later wish to add a second `main' function that solves a different problem by using the same five auxiliary functions in another way, we can do so without having to restructure any existing code.
+3. more robust for future development - for example if we later wish to add a second ``main`` function that solves a different problem by using the same five auxiliary functions in another way, we can do so without having to restructure any existing code.
 4. in the current implementation, functions defined inside a "where" clause cannot have their types explicitly specified
 
 In practice, programmers tend to use fewer than ten parameters, but little nesting, with a mixture of parameter lifting/dropping and block floating/sinking.
@@ -327,16 +327,11 @@ A basic block is a mixture of jump and non-jump instructions that is complete, i
 
 Although phi nodes were an interesting idea all the `cool kids <https://mlir.llvm.org/docs/Rationale/Rationale/#block-arguments-vs-phi-nodes>`__ are now using block arguments. Blocks arguments fit better into various analysis passes.
 
-Blocks
-======
-
 From a user perspective there are two types of jumpable addresses:
 
-memory - effective address computation
-SIB addressing form, where the index register is not used in address calculation, Scale is ignored. Only the base and displacement are used in effective address calculation.
-VSIB memory addressing
-
-
+* memory - effective address computation
+* SIB addressing form, where the index register is not used in address calculation, Scale is ignored. Only the base and displacement are used in effective address calculation.
+* VSIB memory addressing
 
 Memory and the program counter are virtualized as well, using labels. A label refers to a memory location with a specific block of code loaded. The blocks are not ordered, so unconditional jumps must be inserted between blocks if necessary. The block order can be determined using profiling, removing the unconditional jump that is taken most often.
 
@@ -355,7 +350,7 @@ There are also constraints from the ABI calling convention: https://gitlab.com/x
 Values
 ======
 
-Since all values are representable in memory, we could use bytes in the IR for values. But this would lose the type information. So instead we must support all the value types listed in :ref:`Values`.
+Since all values are representable in memory, we could use bytes in the IR for values. But this would lose the type information. So instead we must support all the value types listed in `Values`_.
 
 Thorin
 ======
diff --git a/docs/Commentary/Implementation/Implementation.rst b/docs/Commentary/Implementation/Implementation.rst
index aba5a7d..5240708 100644
--- a/docs/Commentary/Implementation/Implementation.rst
+++ b/docs/Commentary/Implementation/Implementation.rst
@@ -8,3 +8,21 @@ Steelman 1F. "The language shall be composed from features that [...] can be imp
 13E. Translators for the language will be written in the language and will be able to produce code for a variety of object machines. The machine independent parts of translators should be separate from code generators. Although it is desirable, translators need not be able to execute on every object machine. The internal characteristics of the translator (i.e., the translation method) shall not be specified by the language definition or standards.
 
 13F. Translators shall fail to translate otherwise correct programs only when the program requires more resources during translation than are available on the host machine or when the program calls for resources that are unavailable in the specified object system configuration. Neither the language nor its translators shall impose arbitrary restrictions on language features. For example, they shall not impose restrictions on the number of array dimensions, on the number of identifiers, on the length of identifiers, or on the number of nested parentheses levels.
+
+Language
+========
+
+A near-term goal is to write Stroscot in itself. However, it has to generate code first. I originally picked JavaScript to start for a number of reasons:
+
+* It's the fastest interpreted language available
+* It has reasonably up-to-date syntax and features thanks to TC39
+* A lot of the inspiring projects were written in JS (Wat, macro lambda calculus)
+* LLVM compiles to JS and there are LLVM bindings available for JS
+* TypeScript doesn't add much besides compilation overhead
+
+Since then, development has shifted to Haskell, for other reasons:
+
+* The compiler/type system prevents a lot of common errors (particularly typos, which JS doesn't detect until late)
+* A lot of other compiler-theory-heavy projects are written in Haskell or similar functional languages
+* I'm most familiar with Haskell.
+
diff --git a/docs/Commentary/Language/Memory-Management.rst b/docs/Commentary/Implementation/Memory.rst
similarity index 53%
rename from docs/Commentary/Language/Memory-Management.rst
rename to docs/Commentary/Implementation/Memory.rst
index 864b4cf..de09c78 100644
--- a/docs/Commentary/Language/Memory-Management.rst
+++ b/docs/Commentary/Implementation/Memory.rst
@@ -1,219 +1,8 @@
-Memory management
-#################
-
-The language should have automatic memory management. Manual memory management is slow, tedious, and error prone. Automatic memory management is better in all respects, but the implementation has to be flexible enough to be usable for all the things manual memory management is.
-
-Memory models
-=============
-
-3-3I. It shall be possible to define types whose elements are indirectly accessed. Elements of such types may have components of their own type, may have substructure that can be altered during execution, and may be distinct while having identical component values. Such types shall be distinguishable from other composite types in their definitions. An element of an indirect type shall remain allocated as long as it can be referenced by the program. [Note that indirect types require pointers and sometimes heap storage in their implementation.]
-
-3-3J. Each execution of the constructor operation for an indirect type shall create a distinct element of the type. An operation that distinguishes between different elements, an operation that replaces all of the component values of an element without altering the element's identity, and an operation that produces a new element having the same component values as its argument, shall be automatically defined for each indirect type.
-
-Per :cite:`kangFormalMemoryModel2015` there are pretty much two models of memory, pointers and references. Pointers model memory as an integer-indexed array of 2^32 or 2^64 words, accessed by the OS/hardware APIs. References model memory as an associative array from symbolic "references" (potentially infinite in number) to "cells", values (stored in some unspecified format, but with lossless storage).
-
-Kang describes how combinations of these can be made, for example the "quasi-concrete model" which uses a data type that starts out containing a reference, implements various arithmetic operations symbolically, but switches to a pointer once an integer address is requested. You can also imagine the other direction, a pointer that masquerades as a reference but errors when attempting to store a value larger than the allocation. But references and pointers are the fundamental ideas and serve to implement all other possibilities.
-
-:cite:`brightProgrammingLanguageIdeas2022` brings up the old x86 16-bit pointer model. There were data, code, stack, and extra segment registers. A near pointer simply adds an offset to the appropriate segment register. Far and huge pointers set the segment register first, allowing access to other segments. Far pointers were unnormalized, while huge points were normalized to a canonical segment+offset pair. Nowadays, in x86-64, pointers are just represented as a uniform 64-bit absolute address. The only residue of segment addressing is there are some "load relative" instructions that take offsets instead of absolute pointers.
-
-Bright suggests that the lesson is to only have one type of pointer. But I disagree. The lesson is really to ensure that a pointer is self-contained, in that it always points to the same location, and unique, in that no other pointer value refers to that location. In the 16-bit pointer model, only far and huge pointers were self-contained. And far and huge pointers had the issue of allowing multiple representations of the same address. The normalization solved this, but there were disagreements on how to normalize and it was often skipped for performance reasons. Comparatively, the 64-bit model has a unique pointer value for every address. Turning now to modern models, the concrete and symbolic models are both fine in this regard; integers and symbols are self-contained and unique.
-
-Bright also raises the specter that "You will wind up with two versions of every function, one with manually managed pointers and one with garbage collected pointers (references). The two versions will require different implementations. You'll be sorry." How worrisome is this?
-
-Well, first let's try to use a pointer as a reference. There are many issues to consider:
-
-* Allocation size: Generally it is assumed the pointer points to some fixed-size buffer of bytes. But this means we can't store arbitrary-sized values; they just don't fit. Usually this is solved by restricting the possible values to a finite set, then the storage is fixed.
-* Serialization: To mimic the ability of a reference to store heterogeneous types of data, strings, numbers, lists, functions, and so on, we need a universal serialization function, that e.g. stores a type tag. We can probably expose such a serialization function from the compiler, as the compiler needs such a function to implement references. Alternatively, for a restricted type, this is solved by writing a custom serialization function.
-* Ownership - Pointers can just be calculated out of thin air, so some other function could overwrite our buffer. The format could be corrupted, or the memory could be deallocated altogether. Usually this is solved by making a private copy of the buffer at some isolated address that no other part of the program uses, and only writing back changes at the end in one atomic operation.
-
-Is someone really going to work through these issues and write a second version of the function? When they could just make the pointer into a reference with ``newRef (deserialize (readBytes 10 ptr))`` and let the compiler do all the work? References should have decent performance so there will be no reason to try to shoehorn a pointer into a reference-like API. Pointers are really a low-level, byte-based abstraction whose only business is interfacing with C code. As evidence that they are needed I offer `C# <https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/unsafe-code#pointer-types>`__ which has included them since 1.0.
-
-As far as using a reference as a pointer, as long as we don't want to do pointer arithmetic, we can just store an array of bytes in the reference. Such a pattern is common in Java, e.g. the ArrayList class. But when we want to materialize some bytes at a given memory address, there is no way to do it with references. References just don't support interfacing with C code.
-
-I guess it is possible that someone will have two versions of a function, one that implements it in pure Stroscot via references and one that calls out a C library with pointers. But externally, I think both of them should interact with the rest of the code using references. Using pointers with the C code might avoid a few conversion calls, but references are a lot cleaner to use, e.g. avoiding the use of callbacks, and there is the guaranteed optimization that you can use a reference as a pointer with zero-cost. So I don't think this poses an issue. Even if the C wrapper did use pointers because it was easier than converting to/from references all the time, that's a judgement call on the part of the library author and I don't think there is a solution that would let everyone standardize on one universal type. The best a "pointerOrRef" type can support, even restricted to a small type like ``int8``, is get/set like a regular reference.
-
-Aliasing
---------
-
-Steelman 7I. The language shall attempt to prevent aliasing (l.e., multiple access paths to the same variable or record component) that is not intended, but shall not prohibit all aliasing. [...] All aliasing of components of elements of an indirect type shall be considered intentional.
-
-The language is convoluted and hard to understand, but the way I read this is that anyone who uses an indirection expects aliasing and the language should not do anything to prevent it. Certainly, if you don't need aliasing, you could just use a constant directly.
-
-Pointers
-========
-
-Pointers are the low-level API, they can interface with the OS or other languages (mainly C). I did a study of Windows/Linux memory APIs and concluded that memory is best modeled as the global mutable array ``Memory = Map (Word,BitIdx) Status``. The status allows storing metadata, it's `a complex ADT <https://github.com/Mathnerd314/stroscot/blob/master/src/model/MemoryStatus.hs>`__ which has various states like unallocated, committed, etc. The array is indexed at the bit level because that's the granularity `Valgrind's Memcheck <https://valgrind.org/docs/manual/mc-manual.html#mc-manual.machine>`__ uses, but most of the status will be the same for a byte or page as the memory allocators / OS operations work at higher granularity.
-
-It is simple enough to maintain "extra" status bits, and instrument memory functions to check the status of memory before operating. This is essentially what Valgrind does. With this it is possible to identify many common errors, like double free, use after free, access to undefined memory, and null pointer dereferencing. But there is still the possibility of overflowing a buffer into an adjacent allocation, or more generally `type punning <https://en.wikipedia.org/wiki/Type_punning>`__ by reading some memory as a format it was not written with. These sorts of possibilities are intrinsic to the "big array of bits" model, and many low-level hacks rely on such functionality, so I would say to use references if you want to avoid such things. But of course someone can easily add bounds-checking etc. on top of the basic pointer model as a library.
-
-Most addresses will not be allocated (status Free), hence the array is sparse in some sense. It is in fact possible to implement the typical `sparse array operations <https://developer.android.com/reference/android/util/SparseArray>`__. There are functions to directly allocate memory at an address. Reading and writing are done directly in assembly. The list of currently mapped pages can be had from ``/proc/self/maps`` and `VirtualQueryEx <https://reverseengineering.stackexchange.com/questions/8297/proc-self-maps-equivalent-on-windows/8299>`__, although this has to be filtered to remove pages reserved by the kernel and internal pages allocated by the runtime, and looks slow - it's easier to wrap the allocation functions and maintain a separate list of user-level allocations. Clearing mappings, hashing memory, and indexing by mapped pages all work when restricted to the list of user pages. It's a little more complicated than simple sparsity because there are many different statuses and the operations overlap.
-
-Storage vs. memory
--------------------
-
-In practice, the path from cloud to CPU is long, and accessible storage is not just RAM. Some latency numbers and the programming API:
-
-* Physical registers (0.3 ns): managed by the CPU
-* Logical registers (0.3 ns): assembly read/write
-* Memory Ordering Buffers (MOB), L1/L2/L3 Cache (0.5-7 ns): Managed by the CPU
-* Main Memory (0.1us-4us): assembly read/write
-* GPU memory (0.2us-0.5us): assembly read/write, driver ioctl's
-* NVRAM (200us-250us): assembly read/write, special calls
-* SSD (250-500us): kernel file APIs
-* LAN (0.5-500ms): kernel network stack, driver bypass
-* HDD (3 ms): kernel file APIs
-* WAN (150ms): kernel network stack, driver bypass
-
-Not all applications will use all of these, but all will use some and there is an application that uses each. So all of these have to be modeled in order to create a performant application. Ideally the memory management system would be a "storage management system" that combines all of these into a single pointer-like abstraction and allows copying data between locations as appropriate. But it's a leaky abstraction, I'm not sure it can be pulled off except as a library.
-
-"You-choose" Allocation
------------------------
-
-In practice, fixed-address allocation / assignment is not commonly used. Instead, there are ``mmap NULL``, ``malloc``, and the C library API alloc/realloc, which allocate memory with system-chosen / allocator-chosen location. For verifying behavior, the right model for this is adversarial, i.e. the allocator chooses the worst possible location, subject to restrictions such as that the allocation must be suitably aligned and disjoint from all unrevoked allocations. More formally, the behavior of a correct program should not depend on what addresses the system picks, i.e. all choices should be observationally equivalent. (The system can also return an out of memory error, but this doesn't have to result in equivalent behavior.)
-
-Of course, the actual allocation strategy should not be the worst, rather it should try to achieve the best performance. For the most part, people do not seem to pay much attention to allocator design, because it is pretty cheap. For example `in Doom 3 <https://www.forrestthewoods.com/blog/benchmarking-malloc-with-doom3/>`__ the median time for is 31 nanoseconds, ranging from 21 nanoseconds to 201 microseconds, and free is comparable.
-
-But, speeding up allocation is actually fairly important. Combining operations into a single larger operation (allocate a larger buffer, call ``close_range`` to close several open FD's than to iterate over them individually) by pushing allocations forward and delaying frees, as long as there is sufficient memory or resource capacity available, can be a big win. In contrast, reads and writes are always real work, and besides SIMD there is not much way to optimize it.
-
-There are also a lot of locality and cache effects from the address allocation algorithm. In the trivial case, the memory usage can be predicted in advance and allocations given fixed assignments, giving zero cost memory allocation. In more practical applications, variable allocations will need to be tracked, but there are still tricks for grouping allocations based on access patterns, avoiding fragmentation. Most research has been on runtime allocation optimization, but many of these optimizations can be precomputed at compile time. For example:
-
-* A loop that allocates and deallocates a scratch buffer in the body is much more performant if the buffer is allocated to the same location every time - the allocation/deallocation code can even be pulled out of the loop.
-* Grouping hot variables into a page, so the page is always loaded and ready
-* Grouping things that will be freed together (pools/arenas)
-
-Optimizing access
------------------
-
-Generally, optimizations are allowed to eliminate possibilities allowed by the memory model, but there could also be an option to strictly preserve the set of possibilities.
-
-Eliminating a pointer read amounts to tracking down the matching pointer write and propagating the value directly, which can be accomplished by tracing control flow. There is the issue of data races with concurrent writes, but the memory model dictates which values a read may resolve to, and the verifier already handles nondeterminism, so it is not much harder than normal value propagation. There is also modeling foreign code, specifically determining whether the foreign code can write a pointer (i.e, whether the pointer is shared or not).
-
-Eliminating a pointer write requires proving that the address is never read before deallocation or another pointer write. Again there are the issues of data races and foreign code.
-
-CHERI
------
-
-CHERI pointers are 129-bit, consisting of a 1-bit validity tag, bounds, permissions, object type, and actual pointer. Valid pointers may only be materialized in a register or memory by transforming an initial unbounded pointer obtained from the OS. This means that the simple model of pointers as integers is no longer valid. Instead, a pointer is the combination of an integer address and a capability. The `CHERI C/C++ API <https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-947.pdf>`__ represents the address+capability value as ``void*`` and addresses as ``vaddr_t``; there doesn't seem to be a way to refer to a capability without an address.
-
-I tried to read further, but the model is complicated, essentially implementing a GC to avoid dangling pointers, so I am not sure it will ever become mainstream.
-
-Persistent memory
------------------
-
-The pointer API, assembly wrapping, and OS calls cover using persistent memory via standard file APIs or memory-mapped DAX. Memory is volatile while persistent memory is not, so persistent memory is faster storage, not weird RAM. And storage is complex enough that it seems best handled by libraries. Making the memory management system memkind-aware seems possible, like memory bound to NUMA nodes.
-
-References
-==========
-
-5B. Each variable must be declared explicitly. Variables may be of any type. The type of each variable must be specified as part of its declaration and must be determinable during translation. [Note, "variable" throughout this document refers not only to simple variables but also to composite variables and to components of arrays and records.]
-5E. There shall be no default initial-values for variables.
-5F. Assignment and an implicit value access operation shall be automatically defined for each variable.
-9C. It shall be.possible to mark variables that are shared among parallel processes. An unmarked variable that is assigned on one path and used on another shall cause a warning.
-
-A reference is a symbolic index into a global associative array of objects, ``Map Reference Object``. The array allows allocating new references, deleting them, and reading/writing the reference. Reference symbols can be compared for equality, hashed to an integer, and packed/unpacked to/from an integer.
-
-The packing and hashing requires a little explanation. Packing the same reference always returns the same value during a program execution, and the packed value is distinct from the packed value of any other reference. But the exact value is internal to the memory system - it is an "adversarial" model similar to pointers where if the program's behavior depends on the choice of packed value it is incorrect. The hashing is similar to packing, it is again the same value for the same reference, it is just that there is no distinctiveness constraint (so the program must have the same behavior even if all references hash to 0), and also no way to unhash the value, so there is no need to worry about resolving unpack invocations.
-
-There are higher-level types like immutable references and reference wrappers, but those all translate away to normal references or pointer access and don't need involvement from the compiler. Per :cite:`ichbiahRationaleDesignADA1979` we should provide a "freeze" operation which recursively removes all reference indirections and turns a reference-containing value into a truly immutable/constant object, as this is "the most useful and should be retained as the unique meaning of constancy".
-
-Pointer conversion
-------------------
-
-The location of the data of a reference is not fixed. If it's small enough it could just be in a register, or there could be multiple copies of the data in memory. Also GC can move/copy the reference. The data could be produced on-demand and be represented by a thunk. All that can really be said is that the compiler will respect the semantics of storing and retrieving data.
-
-Foreign operations like OS calls require a pointer to a memory address, because references don't necessarily exist in memory. The canonical way of doing this is simply reading the reference value and storing it in a buffer represented by a pointer ("materializing" it in memory). Internally, when compiling away the reference, the compiler tries to find a good way to store the reference - if it's lucky, it can backpropagate the pointer request and store the data there from the beginning, so that the "read and store" operation is actually a no-op that makes zero copies.
-
-But, in the fallback case of storing a few words, where a memory allocation is appropriate, the reference translates directly to a pointer allocation. The memory is configured to trap on stray user-level access, so that only the compiler-generated code has access. Even in this case, though, the reference's internal value is not the pointer itself, rather there is a more complex strategy of using a "handle" identifier that allows moving the data around after it is allocated.
-
-
-
-
-local (“arena”) allocators speed up short-running programs, keep long–running ones from slowing down over time. All global allocators eventually exhibit diffusion–i.e., memory initially dispensed and therefore (coincidentally) accessed contiguously, over time, ceases to remain so, hence runtime performance invariably degrades. This form of degradation has little to do with the runtime performance of the allocator used, but rather is endemic to the program itself as well as the underlying computer platform, which invariably thrives on locality of reference."
-diffusion should not be confused with fragmentation–an entirely different phenomenon pertaining solely to (“coalescing”) allocators (not covered in this paper) where initially large chunks of contiguous memory decay into many smaller (non-adjacent) ones, thereby precluding larger ones from subsequently being allocated –even though there is sufficient total memory available to accommodate the request. Substituting a pooling allocator, such as theone used in this benchmark (AS7), is a well-known solution to the fragmentationproblems that might otherwise threaten long-running mission-critical systems."
-
-
-Arena-based bump allocator for objects
-Cheap write barrier in the common case
-Mark-and-compact collection for oldest generation
-Copying generational collection for younger generations
-Special space (in cache?) for nursery generation
-State Transitions
-
-
-I think it's better to write a faster GC than to try to special-case various types of allocation. The GC itself can special case things. Optimizing requires global information and only the GC has a global view.
-
-Static immutable data should be interned.
-
-Compress strings with shoco https://github.com/Ed-von-Schleck/shoco or  the sequitur algorithm http://www.sequitur.info/. Maybe can fit into a 64-bit word. Cleaning the dictionary periodically would probably have to happen to avoid resource leaks, which might have to recompress every relevant string. Fortunately, long strings tend to be fairly long-lived.
-
-https://github.com/ollef/sixten talks about being able to represent intrusive lists. I experimented with allowing the decision of pointer vs direct storage to be made in pack, but it really simplifies the code a lot to require all pack functions to produce flat blobs of data.
-
-Destructors are inspired by C++ RAII destructors, hence the name. Admittedly the actual API doesn't bear much resemblance. `Finalizers <https://en.wikipedia.org/wiki/Finalizer>`__ can resurrect objects and don't have deterministic execution, hence would be a bad name. Go's defer statement and try-finally are related, but they only work locally and have imprecise execution semantics.
-
-Portable mmap:
-* Yu virtualalloc https://github.com/alpha123/yu/tree/master/src/platform
-* Go: https://github.com/edsrzf/mmap-go
-* C: mmap on windows https://github.com/alitrack/mman-win32
-* C++: https://github.com/mandreyel/mio
-* Rust: https://github.com/RazrFalcon/memmap2-rs
-
-Representation
-==============
-
-11A. The language shall permit but not require programs to specify a single physical representation for the elements of a type. These specifications shall be separate from the logical descriptions. Physical representation shall include object representation of enumeration elements, order of fields, width of fields, presence of "don't care" fields, positions of word boundaries, and object machine addresses. In particular, the facility shall be sufficient to specify the physical representation of any record whose format is determined by considerations that are entirely external to the program, translator, and language. The language and its translators shall not guarantee any particular choice for those aspects of physical representation that are unspecified by the program. It shall be possible to specify the association of physical resources (e.g., interrupts) to program elements (e.g., exceptions or signals).
-
-A lot of languages have a fixed or default memory representation for values, e.g. a C struct, a Haskell ADT, and a Python object are always laid out in pretty much the same way. The more systems-level languages allow controlling the layout with flags, for example Rust has `type layout <https://doc.rust-lang.org/reference/type-layout.html>`__ and also C compatibility. Layout is then defined by its size, alignment, padding/stride, and field offsets. Now it's great to have a compact representation of the memory layout - but only if you can actually write the memory layout you want using these features. But these flags are't really that powerful. Here's some examples of what can't generally be done with the current memory DSL's:
-
-* specify the in-memory order of fields differently from their logical order
-* specifying how to encode enumeration constants (per struct it appears in)
-* turn array-of-structs into struct-of-arrays
-* flattening a datatype, like ``Either Bool Int`` into ``(Bool,Int)``, or representing a linked list as a contiguous series of records.
-* storing some parts via pointer indirections (non-contiguous memory layout)
-* NaN-boxing and NuN-boxing (`ref <https://wingolog.org/archives/2011/05/18/value-representation-in-javascript-implementations>`__ `2 <https://searchfox.org/mozilla-central/source/js/public/Value.h#526>`__), representing the JS ``Any`` type as a single 64-bit word.
-* parsing network packets into structured data
-
-Maybe some of these could be addressed by flags, but from the last two, it is clear that we are really looking for a general-purpose memory serialization interface. I looked at `Data.Binary <https://hackage.haskell.org/package/binary-0.8.9.1/docs/src/Data.Binary.Get.Internal.html#Decoder>`__, `store <https://github.com/mgsloan/store/blob/master/store-core/src/Data/Store/Core.hs>`__, and :cite:`delawareNarcissusCorrectbyconstructionDerivation2019`. Narcissus is too complex IMO:
-
-::
-
-  Format = Set (S, St, T, St)
-  Encode = S -> St -> Option (T, St)
-  Decode = T -> St -> Option (S, St)
-
-The state parameter can be gotten rid of by defining ``S = (S,St), T = (T,St)``:
-
-::
-
-  Format = Set (S, T)
-  Encode = S -> Option T
-  Decode = T -> Option S
-
-And we can make encode/decode total by defining ``S = {s | exists t. (s,t) in Format}``, ``T = {t | exists s. (s,t) in Format}``.
-
-I thought about letting ``pack`` narrow the range of values, e.g. rounding 1.23 to 1.2, but concluded that it would be surprising if storing a value to memory changed it. The rounding can be defined as a pre-pass over the data to convert it to a ``Measurement`` type that then has optimized storage.
-
-One tricky part is that the naive way to specify types interferes with overloading, subtyping and implicit conversions. ``pack (Int8 1)`` can give a byte as expected, but it can also implicitly convert to an ``Int32`` and give 4 bytes. Since we have dependent types this isn't a real issue, just make sure the code generated after representation specialization passes the type explicitly: ``pack Int32 (Int8 1)``.
-
-A few things need to optimize away for reasonable performance.  ``length . pack`` should optimize to something like ``const 20`` for most values, or at least something that doesn't allocate, so that field accesses are independent and values can be allocated sanely. These functions might have to be hacked in, specializing to constant-sized values.
-
-Since writing these serialization functions all the time would be tedious, we can make a format DSL that specifies the functions in a nicer way. Although one of these DSL's will be the standard / default, it'll be some kind of macro / constraint system, so defining new format DSLs for specific purposes shouldn't be hard.
-
-The translation to use pack is pretty simple: every value is wrapped in a call to pack, the result is stored as a tuple ``(cell,unpack)``, and every usage applies unpack to the cell. The translation uses whatever pack is in scope; pack can be overridden like any other implicit parameters. The unpack functions will end up getting passed around a lot, but function pointers are cheap constants, and constant propagation is a thing, so it shouldn't be an issue.
-
-So finally the most general API is ``Write = Alloc (Size,Align) (Addr -> Write) | Store, Store = Map Addr MaskedWord`` and ``Unpack a = Maybe Addr -> Read -> a, Read = Map Addr Word``. This allows masked writes and multiple or fixed allocation addresses, but does not allow failing to read the value back. Also the ``pack`` function allows passing arbitrary side-band data to the ``unpack`` function. Maybe though, it is still not general enough, we should just have lens-like functions like ``write : Memory -> a -> Memory`` and ``read :: Memory -> a``. There still need to be constraints though, like that you get back what you wrote and non-interference of writes.
-
-Now we also want to allow optimization of the memory representation. Consider some data points - if there is only one possible value, then the compiler should optimize this to a constant and not store it at all. If there are two possible values, the compiler should probably use a boolean flag and again hard-code the values as constants. If the potential values include all values of a given type (and nothing else), then the compiler should use the representation for that type. If the potential values include a given type, and also members of another type, then the compiler should use the most narrowly-defined representation that contains both of those types. And it should consider whether it can choose the representation of the union type so as to minimize the amount of conversion needed for the more commonly used type (as in NaN/NuN-boxing). If the potential values can be anything, then the compiler should use the universal representation.
-
-The process of fixing the memory representation of a program can be modeled as follows. We start with a program that passes around values. Then we insert conversion operations: on every declaration, we insert a conversion to binary, and on every use, we insert a conversion from binary. As the binary representation is defined so that a read of a write is is the identity, this transformation does not change the meaning of the program. Then we additionally write this binary representation to memory on the declaration, and read this binary representation from memory on use. Again this does not change the semantics due to the non-interference of writes property. Although, in reality it could change the semantics: maybe a cosmic ray or something could change what we have written. But at this point, our program operates purely on memory and does not have any values floating around.
-
-Model
-=====
+Memory model
+############
 
 For memory management we have to consider values, called objects. Pointers are manually freed and hence don't need to be managed.
 
-
-
-
 An invalidate queue is more like a store buffer, but it's part of the memory system, not the CPU. Basically it is a queue that keeps track of invalidations and ensures that they complete properly so that a cache can take ownership of a cache line so it can then write that line. A load queue is a speculative structure that keeps track of in-flight loads in the out of order processor. For example, the following can occur
 
     CPU speculatively issue a load from X
@@ -223,38 +12,6 @@ An invalidate queue is more like a store buffer, but it's part of the memory sys
 A store buffer is a speculative structure that exists in the CPU, just like the load queue and is for allowing the CPU to speculate on stores. A write combining buffer is part of the memory system and essentially takes a bunch of small writes (think 8 byte writes) and packs them into a single larger transaction (a 64-byte cache line) before sending them to the memory system. These writes are not speculative and are part of the coherence protocol. The goal is to save bus bandwidth. Typically, a write combining buffer is used for uncached writes to I/O devices (often for graphics cards). It's typical in I/O devices to do a bunch of programming of device registers by doing 8 byte writes and the write combining buffer allows those writes to be combined into larger transactions when shipping them out past the cache.
 
 
-Allocator
-=========
-
-ultimate allocator - steal features from all other allocators. It's one of those well-researched areas where a few percent lives. Substitution isn't really an option but maybe some components could be pluggable. Thread safe but values are pure and references can be determined to be thread-local so lots of optimizations.
-
-We want to automatically determine the number of allocation regions and their size to maximize locality.
-
-locate memory leaks - places where allocated memory is never getting freed - memory usage profiling
-
-Handling OOM gracefully - non-allocating subset of language. Should be enough to implement "Release some resources and try again" and "Save the user's work and exit" strategies. Dumping core is trivial so doesn't need to be considered.
-
-A derived pointer is a reference plus an offset. When the address and layout of the object is known we can store the derived pointer as the sum of the value address and offset, allowing direct pointer dereferencing. But since the address is known we could also just store the derived pointer as the offset, so it's only useful if computing the sum is necessary and expensive.
-
-An object can be treated as an array, N[i] and N.length.
-
-The array part of shared memory is necessary because there is a double-word CAS operation on x86 (CMPXCHG16B), and also for efficiency.
-
-
-With persistent memory only word-sized stores are atomic, hence the choice of shared memory as an array of words. https://stackoverflow.com/questions/46721075/can-modern-x86-hardware-not-store-a-single-byte-to-memory says that there are in fact atomic x86 load/store instructions on the byte level.
-
-word
-  An integer ``i`` with ``0 <= i < MAX``.
-
-
-Ternary: in current computers all words are some number of bits. Most discussion of ternary uses pure ternary, but IMO words will be a mixture of trits and bits - the mixture allows approximating the magic radix e more effectively. IDK. Whatever the case, the bit/trit (digit) is the smallest unit of memory, and all other data is a string of digits.
-
-Since no commercially available computers support ternary it is not worth supporting explicitly in the language. But for future-proofing, we must ensure that anytime there is a binary string, the APi can be extended to use a mixed binary/ternary string.
-
-
-Eliminating pointers entirely is not possible. But we can minimize the lifetime of pointers in the standard library to the duration of the call, and use values / references everywhere else.
-
-
 Pieces
 ======
 
@@ -411,11 +168,6 @@ A very inefficient pattern is to randomly allocate and pin a large number of ran
 
 I concluded after looking at it again that sharing parts of data structures should be pure, so my plan to use immutable references wasn't going to work because allocating a reference would be impure. So instead there is an allocation interface.
 
-Dumping ground
-==============
-
-
-
 Copying, quad-color incremental, generational garbage collector
 Arena-based bump allocator for heap-allocated values
 Memory allocator API
@@ -546,9 +298,53 @@ But it fills a hole in the memory management design space.
 There is no concise built-in syntax for dereferencing pointers, because there are many different flavours of memory accesses: aligned or unaligned, cached or uncached, secure or non-secure, etc. and it is critical that every memory access is explicit about its flavor. A side effect of putting more information in the access operation is that pointers are untyped, simply a wrapper around bitvectors.
 
 
-Memory model
-~~~~~~~~~~~~
-
 The ‘load’ and ‘store’ instructions are specifically crafted to fully resolve to an element of a memref. These instructions take as arguments n+1 indices for an n-ranked tensor. This disallows the equivalent of pointer arithmetic or the ability to index into the same memref in other ways (something which C arrays allow for example). Furthermore, for the affine constructs, the compiler can follow use-def chains (e.g. through `affine.apply operations <../Dialects/Affine.md/#affineapply-affineapplyop>`__ or through the map attributes of `affine operations <../Dialects/Affine.md/#operations>`__) to precisely analyze references at compile-time using polyhedral techniques. This is possible because of the `restrictions on dimensions and symbols <../Dialects/Affine.md/#restrictions-on-dimensions-and-symbols>`__.
 
 A scalar of element-type (a primitive type or a vector type) that is stored in memory is modeled as a 0-d memref. This is also necessary for scalars that are live out of for loops and if conditionals in a function, for which we don’t yet have an SSA representation – `an extension <#affineif-and-affinefor-extensions-for-escaping-scalars>`__ to allow that is described later in this doc.
+
+local (“arena”) allocators speed up short-running programs, keep long–running ones from slowing down over time. All global allocators eventually exhibit diffusion–i.e., memory initially dispensed and therefore (coincidentally) accessed contiguously, over time, ceases to remain so, hence runtime performance invariably degrades. This form of degradation has little to do with the runtime performance of the allocator used, but rather is endemic to the program itself as well as the underlying computer platform, which invariably thrives on locality of reference."
+diffusion should not be confused with fragmentation–an entirely different phenomenon pertaining solely to (“coalescing”) allocators (not covered in this paper) where initially large chunks of contiguous memory decay into many smaller (non-adjacent) ones, thereby precluding larger ones from subsequently being allocated –even though there is sufficient total memory available to accommodate the request. Substituting a pooling allocator, such as theone used in this benchmark (AS7), is a well-known solution to the fragmentationproblems that might otherwise threaten long-running mission-critical systems."
+
+
+Arena-based bump allocator for objects
+Cheap write barrier in the common case
+Mark-and-compact collection for oldest generation
+Copying generational collection for younger generations
+Special space (in cache?) for nursery generation
+State Transitions
+
+
+I think it's better to write a faster GC than to try to special-case various types of allocation. The GC itself can special case things. Optimizing requires global information and only the GC has a global view.
+
+Static immutable data should be interned.
+
+Compress strings with shoco https://github.com/Ed-von-Schleck/shoco or  the sequitur algorithm http://www.sequitur.info/. Maybe can fit into a 64-bit word. Cleaning the dictionary periodically would probably have to happen to avoid resource leaks, which might have to recompress every relevant string. Fortunately, long strings tend to be fairly long-lived.
+
+
+ultimate allocator - steal features from all other allocators. It's one of those well-researched areas where a few percent lives. Substitution isn't really an option but maybe some components could be pluggable. Thread safe but values are pure and references can be determined to be thread-local so lots of optimizations.
+
+We want to automatically determine the number of allocation regions and their size to maximize locality.
+
+locate memory leaks - places where allocated memory is never getting freed - memory usage profiling
+
+Handling OOM gracefully - non-allocating subset of language. Should be enough to implement "Release some resources and try again" and "Save the user's work and exit" strategies. Dumping core is trivial so doesn't need to be considered.
+
+A derived pointer is a reference plus an offset. When the address and layout of the object is known we can store the derived pointer as the sum of the value address and offset, allowing direct pointer dereferencing. But since the address is known we could also just store the derived pointer as the offset, so it's only useful if computing the sum is necessary and expensive.
+
+An object can be treated as an array, N[i] and N.length.
+
+The array part of shared memory is necessary because there is a double-word CAS operation on x86 (CMPXCHG16B), and also for efficiency.
+
+
+With persistent memory only word-sized stores are atomic, hence the choice of shared memory as an array of words. https://stackoverflow.com/questions/46721075/can-modern-x86-hardware-not-store-a-single-byte-to-memory says that there are in fact atomic x86 load/store instructions on the byte level.
+
+word
+  An integer ``i`` with ``0 <= i < MAX``.
+
+
+Ternary: in current computers all words are some number of bits. Most discussion of ternary uses pure ternary, but IMO words will be a mixture of trits and bits - the mixture allows approximating the magic radix e more effectively. IDK. Whatever the case, the bit/trit (digit) is the smallest unit of memory, and all other data is a string of digits.
+
+Since no commercially available computers support ternary it is not worth supporting explicitly in the language. But for future-proofing, we must ensure that anytime there is a binary string, the APi can be extended to use a mixed binary/ternary string.
+
+
+Eliminating pointers entirely is not possible. But we can minimize the lifetime of pointers in the standard library to the duration of the call, and use values / references everywhere else.
diff --git a/docs/Commentary/Implementation/TermRewriting.rst b/docs/Commentary/Implementation/TermRewriting.rst
index 2583d94..afbdff1 100644
--- a/docs/Commentary/Implementation/TermRewriting.rst
+++ b/docs/Commentary/Implementation/TermRewriting.rst
@@ -1,10 +1,59 @@
 Term rewriting
 ##############
 
+Higher-order matching
+=====================
+
+As a consequence of confluence of the substitution calculus, each rewrite step is composed of an expansion in the substitution calculus, a replacement by applying some rule, and a reduction in the substitution calculus, so it is M <<- C[l] and C[r] ->> N. Handling lambdas in RHSs is fairly straightforward, just treat beta-reduction as a normal reduction step and handle it with the evaluation machinery. But for the lambdas on the left hand side, in the pattern, it is more complex.
+
+Finding the contexts ``C`` is fairly straightforward, just enumerate all the subterms of ``t``. But solving the equation ``s = lθ`` is an instance of higher-order unification (specifically higher-order matching). The complexity of higher order matching is somewhere around :math:`{\mathcal {E}}^{4}`, "the minimal solution is of size at most 2^2^2^2..., the number of 2's proportional to the size of the problem". :cite:`stirlingDecidabilityHigherorderMatching2009` That proof is for the simply typed lambda calculus but the only properties of the STLC used are strong normalization and that terms have a defined eta long form (canonical form), so it is most likely also applicable to all lambda terms with unique normal forms. Naturally determining the normal form / lack of a normal form is of complexity :math:`\Sigma_0^1`, but most lambdas in programs are in normal form already.
+
+There are two main possibilities for implementing higher-order matching. One is to read Stirling's paper and extract an algorithm. He says "implicit in the analysis are positive sensible algorithms for dual interpolation problems", so there is definitely an algorithm to extract. Another is to implement a more general algorithm for solving higher-order unification as opposed to matching, along the lines of Gérard Huet's preunification. There is a semi-algorithm with good performance, :cite:`vukmirovicEfficientFullHigherorder2021`. The implementation is `open source <https://github.com/sneeuwballen/zipperposition/blob/2889c1f0831f01e8e2f8ffabd5fd12b758ba6a30/src/core/JPFull.ml>`__ and only a few hundred lines.
+
+Stirling's method, involving dual interpolation. 3 transformations, and reduction to a tiling game, is conceptually complex. Extracting a usable algorithm from the paper seems like it will be challenging and time-consuming. It is also not clear if the resulting algorithm will be any better than Huet's; it is at least guaranteed to terminate, but perhaps Huet's algorithm terminates for matching problems as well.
+
+In contrast, Huet's algorithm is basically off-the-shelf. I will need unification anyways, for purposes such as determining if rules can overlap. Also the approach in Zipperposition allows plugging in "oracles" - I think Stirling's method can be plugged in as such an oracle. There is the issue of unification being :math:`\Sigma_0^1` and needing to deal with failed unifications but I think Huet's approach makes more sense to start with.
+
+Unification
+-----------
+
+Unification is the problem of finding all solutions to a system of equations. First-order unification solves a set of equalities ``a1=b1, a2=b2, ...`` over tree terms and variables. This can be extended to the "dual unification" problem that also includes disequations ``c1 != d1`` in the list that must not be satisfied. Constraint logic programming requires solving systems of equations over reals or other sets. The solution takes the form of a complete set of unifiers, where each unifier is a substitution that may have its free variables substituted to obtain a solution, together with constraints over those free variables. A substitution is a set of assignments from variables to expressions.
+
+Unification isn't really part of the semantics of logic programming, as the semantics is phrased in terms of satisfiability. But it is a standard technique used in implementing logic programming, and in practice the implementation defines the semantics. Prolog only implements first-order unification. Teyjus / λProlog limit to higher-order "pattern lambdas". With ZipperPosition :cite:`vukmirovicEfficientFullHigherorder2021` there is outlined a full higher-order unification algorithm extending Huet's semi-algorithm - the need to support multiple unifiers for a complete set complicates things a bit.
+
+The outline of every unification algorithm is that it randomly applies simplifying reduction operations to an equation until it results in a substitution, then applies the substitution to the remaining equations (dereferencing). Here we show :cite:`vukmirovicEfficientFullHigherorder2021`'s, adapted to match the presentation on `Wikipedia <https://en.wikipedia.org/wiki/Unification_(computer_science)#A_unification_algorithm>`__:
+
+* delete: ``s=s`` is removed
+* decompose: ``a s1 ... sm = a t1 ... tm`` to equations ``{s1 = t1, ..., sm = tm }``
+* rigid/rigid conflict: ``a sm = b tn`` fails if a and b are different rigid heads
+* dereference: ``F s1 ... sn = t`` to ``(F /. σ) ... = t``, if the substitution σ from another equation maps F
+* empty equation list: trivially soluble
+* alpha/eta normalization: ``λxm.s = λyn.t`` to ``λxm.s = λxm.t' xn+1 . . . xm``, where ``m ≥ n``, ``xi`` disjoint from ``yj``, and ``t' = t /. {y1 → x1 , ... , yn → xn }``
+* beta normalization: reduce left/right to hnf
+* under lambda: apply rule for ``a = b`` to ``λx. a = λx. b``
+
+ZipperPosition has more complex reductions for hard cases:
+
+* oracle fail: ``s=t`` fails if oracle determines to be insoluble
+* oracle success: ``s=t`` has finite CSU, branch to each solution σ_i
+* bind: try projections with the following binding substitutions:
+
+  * flex-rigid ``P(λx. F s = λx. a t)``: try an imitation of a for F, if a is constant, and all Huet-style projections for F, if F is not an identification variable.
+  * flex-flex with different heads ``P(λx. F s = λx. G t)``: all identifications and iterations for both F and G, and all JP-style projections for non-identification variables among F and G.
+  * flex-flex with identical heads and the head is an elimination variable, ``P(λx. s = λx. t)``: no bindings.
+  * flex-flex with identical heads, ``P(λx. F s = λx. F t)``: all iterations for F at arguments of functional type and all eliminations for F.
+
+The flex-binding step is slow, but a good set of oracles makes the algorithm efficient for most practical cases. Of course it would be better to find reduction rules that solve things generally rather than oracles which work on specific cases, but this is hard.
+
+The unifier search can be integrated with the overall logical search for satisfiable formulas.
+
+By default Prolog does not use the `occurs check <https://en.wikipedia.org/wiki/Occurs_check>`__ in unification. This means for ``x == f x`` the substitution ``x -> f x`` is obtained. Denotationally this can be accommodated by allowing states to contain infinite rational terms, :cite:`weijlandSemanticsLogicPrograms1990` ``x = f (f (f (...)))`` in this case. In most Prolog programs the occurs check does not make a difference and simply slows down unification. :cite:`aptWhyOccurcheckNot1992` Prolog defines a ``unify_with_occurs_check`` predicate, and has an option for doing the occurs check in the implicit unification when dispatching predicates. Meanwhile miniKanren always uses the occurs check. The occurs check is needed in first order logic theorem-proving, where skolemization turns quantifiers into variables and is sound only if the occurs check is used.
+
+
 Cycle detection
 ===============
 
-So the first strategy is to prove the system is acyclic. Per :cite:`ketemaViciousCirclesRewriting2005` this includes orthogonal weakly head normalizing higher-order TRSs.
+A first strategy for handling cycles is to prove the system is acyclic. Per :cite:`ketemaViciousCirclesRewriting2005` this includes orthogonal weakly head normalizing higher-order TRSs.
 
 For general-purpose detection there are SCC computation algorithms; Wikipedia has a `list <https://en.wikipedia.org/wiki/Strongly_connected_component#Algorithms>`__. The DFS algorithms seem most appropriate as they can naturally be maintained during the reduction graph search; finding the normal forms of a term essentially already is a DFS. Kosaraju's algorithm is not appropriate as computing the transpose / converse of the reduction relation is not easy. Comparing Tarjan and the path-based algorithms, Tarjan uses a second index (pointer) while the path-based uses a stack. The stack manipulation of the path-based algorithm is simpler to understand than the invariants of Tarjan; the Wikipedia page for Tarjan is constantly vandalized with people who do not understand it. So I would say the path-based algorithm is better.
 
@@ -18,11 +67,10 @@ This sounds a bit tricky to implement but it is not too bad. We can split into t
 * Find a value: Evaluate the expression to a value, any value including exceptional values. Because of the meaningless term reduction, every expression will evaluate to some kind of value. The search should be biased towards finding non-exception values but it does not need to be perfect, for example there are reduction strategies such as parallel outermost that are guaranteed to be normalizing for some classes of TRS. This is where cycle detection and infinite value handling come in.
 * Non-determinism check: We can analyze confluence and the reduction sequence of the value to see if the expression can evaluate to anything else. If there are no other values or all other values are exceptions, we are done; otherwise, we handle the non-determinism appropriately, such as replacing an exception with a non-exception, erroring on multiple non-exception values at the top-level, collecting and returning the multiple values if inside a logical combinator such as ``allOf``, or skipping the non-determinism check entirely for ``oneOf``.
 
-Check for non-exception values: Here is where we have to analyze all reduction sequences for the possibility of avoiding exceptions. , so if these apply and the expression evaluates to an exception we can rule out non-exception values. Also,
-* Nondeterminism check: If , collect all of the possible values;  skip the check; if at the top-level, analyze confluence to see if the expression can evaluate to multiple distinct non-exception values, and error if multiple values.
-* Find exceptional value:  There may be multiple exceptional values but we don't care; the implementation is just allowed to pick one arbitrarily.
+Infinitary rewriting
+====================
 
-The common notions of an ARS carry over to infinitary reductions: :cite:`endrullisInfinitaryTermRewriting2014`
+The common notions of an ARS carry over to infinitary reductions as follows: :cite:`endrullisInfinitaryTermRewriting2014`
 
 * transitive reduction: irreflexive kernel of reduction closure
 * normal form: irreducible term
@@ -30,10 +78,109 @@ The common notions of an ARS carry over to infinitary reductions: :cite:`endrull
 * nonterminating reduction: infinite reduction sequence with no limit or that does not reduce to its limit
 * weakly normalizing (normalizing): every term has a reduction to a normal form
 * confluence: if t reduces to t1 and t2, then there is a common term s such that t1 and t2 reduce to s.
-* Church-Rosser: if t1 is equivalent to t2, then there is a common term s such that t1 and t2 reduce to s.
+* Church-Rosser: if t1 is equivalent via reductions and un-reductions to t2, then there is a common term s such that t1 and t2 reduce to s.
 * normal form property w.r.t. reduction:: if u reduces to t and s, and s is a normal form, then t reduces to s
 * normal form property: if t is equivalent to s and s is a normal form, then t reduces to s
 * unique normalization w.r.t. reduction: if t reduces to t1 and t2, and t1, t2 are normal forms, then t1=t2
 * unique normalization: if t1 is equivalent to t2, and t1, t2 are normal forms, then t1=t2
 
-However common theorems such as Newman's lemma do not, so it is not clear how useful these are.
+However, common theorems such as Newman's lemma do not carry over for the traditional "strong convergence" definition of closure. The examples of non-confluence do have confluent rewrites under T*, so I am not aware of any actual counterexamples, but I would like a proof that Newman's lemma holds under T* to feel assured.
+
+
+Confluence
+----------
+
+Confluence has gotten a lot of attention as well and has automated provers. Confluence implies UN→; it is equivalent if the TRS is weakly normalizing. And there is an extension theorem:  Similarly a system can be shown to be UN= by presenting an extension of it that is confluent. :cite:`klopExtendedTermRewriting1991` So a UN= program is just a partially specified system. UN→ is a little more complex though. And the equivalence classes of terms are uncomputable in general so the extension is as well.
+
+Confluence avoids situations where a system may branch into two distinct diverging states. It makes finding a normalizing strategy much easier as the strategy only has to avoid getting stuck evaluating a term infinitely (using the same rule infinitely often), as opposed to UN→ where the strategy must avoid using the wrong reduction rule at every step.
+
+The Knuth-Bendix algorithm produces a confluent system from a set of non-oriented equations, but the rules in programs are oriented, so using this would be confusing. Not to mention that the algorithm fails often. So that's out.
+
+A necessary condition for confluence is weak/local confluence, i.e. each critical pair is convergent. But this is not sufficient. Newman's lemma is that a terminating locally confluent TRS is confluent. But termination is quite strong. A generalization is a critical pair system :cite:`hirokawaDecreasingDiagramsRelative2009` (also called decreasingly confluent): the system must be left-linear, locally confluent, and its critical pair steps must be *relatively terminating*, i.e. the relation 'arbitrary steps followed by a critical pair step followed by arbitrary steps' is terminating. Trivial critical pair steps can be excluded, hence this includes weakly orthogonal TRSs. For a terminating TRS the TRS syntactic equality notion is equivalent to strict equality, hence the system is left linear in the CTRS sense, hence why this includes Newman's lemma.
+
+We say → has random descent (RD), if for each R:a ↔∗b with b in normal form, all maximal reductions from a have length d(R) and end in b. Systems with random descent are confluent.
+
+Normalization
+-------------
+
+
+A hypernormalizing strategy is a strategy that is normalizing even if arbitrary reduction steps are taken before and after steps of the strategy. This allows the compiler to make optimizations without changing the behavior of the program. A hypernormalizing strategy allows aggressive optimizations and program transforms.
+
+There are also stronger properties than normalization. A Church-Rosser strategy is one with common reducts, i.e. there exist m and n, such that :math:`F^m(t)=F^n(u)` for every t and u equal via forward/backward evaluation. A normalizing strategy is Church-Rosser if the system is confluent and weakly normalizing (i.e. all objects have a normal form). In general a many-step CR strategy exists for effective ARS's, i.e. countable (in a computable fashion) and with a computable reduction relation. But the strategy is quite hard to compute, as it has to synchronize reducing subterms so that all components are reduced the same amount. And it's not clear that this synchronization offers anything to the programmer.
+
+Cofinal strategies are weaker than Church-Rosser but stronger than normalizing: for every term a, if a reduces in a finite number of steps to b, then there is an object c obtained by applying the strategy some number of times to a such that b reduces to c. For critical pair TRSs any "fair" strategy that ensures every redex is eventually contracted is cofinal. The cofinal property provides slick proofs - it ensures every redex not part of a cycle is contracted. But at runtime non-normalizing terms have indistinguishable behavior (infinite loop), hence this means the cofinal strategy is doing unnecessary work.
+
+There are also termination properties like strong convergence that ensure that for every term, there exists some number of reduction steps after which the head cannot be rewritten.
+To ensure that term rewriting halts we probably also want a property like strong convergence, but this is a property of the rewriting strategy, not the TRS proper.
+
+Evaluation strategy
+===================
+
+For convergent (confluent and strongly normalizing) programs, such as the simply typed lambda calculus, all strategies are normalizing and the result is the same no matter how they are reduced. So the focus is on inferring convergence and doing reduction efficiently. "In the small" leftmost innermost ensures "complete development", i.e. a subterm is reduced completely before the outer term, hence we can compute the subterm fully and only store an optimized representation of the normal form. So we can compile to fast assembly like a state machine. "In the large" optimal reduction ensures the smallest number of steps so we can avoid duplicating work and performing unneeded work.
+
+But strongly normalizing implies not Turing complete, hence the termination verification will cause problems for complex programs. We need a fallback for these complex programs. Leftmost outermost reduction is the basis of lazy evaluation and is hypernormalizing for the lambda calculus. But for TRSs LO is only normalizing for left-normal TRSs, where variables do not precede function symbols in the left-hand sides of the rewrite rule. A better strategy is outermost fair (ensuring each outermost redex will eventually be evaluated - the simplest example is parallel outermost) - it's hypernormalizing for critical pair TRSs (decreasingly confluent TRSs), in particular weakly orthogonal TRSs. :cite:`hirokawaStrategiesDecreasinglyConfluent2011` So outermost fair seems a reasonable default, but there are non-orthogonal systems where it fails. The optimal reduction stuff is defined for match sequential TRSs but is a normalizing strategy that computes a result in the smallest number of reduction steps.
+
+We could do user-specified strategies like Stratego, but then how would we know that they're normalizing.
+
+There are is also lenient evaluation which evaluates all redexes in parallel except inside the arms of conditionals and inside lambdas, but it adds extra memory overhead for parameter passing.
+
+Now, one can argue about which computational strategy is better (time, space, parallelism, ...)
+Stroscot: be accepting of programs, ensure a normalizing strategy. But after that aim for most efficient in time/space for strict programs.
+
+Q: can normalizing be as efficient as strict
+profiling, other optimization tricks
+
+So The way we handle cycles in the rewrite engine is something like:
+
+* detect cyclic term via rule cycle detection or presence of AC operator
+* use specialized matching (eg AC matching or Tarjan SCC + memo hash table) to identify all reductions out of SCC
+* end with condensed normal form if no reduction out of SCC
+* otherwise, pick a reduction out of the SCC
+
+Then this infinite term is computed in chunks and fed to the surrounding context on demand (laziness), ensuring that a finite normal form is reached if possible and otherwise implementing an infinite stream of commands.
+
+If the substitution calculus is convergent, then terms can be represented by preterms in normal form.
+
+If reduction does not end in a condensed normal form, then the sequence of terms must be infinitely expanding in the sense that for every size s there is a point in the reduction where terms are always at least size s. Otherwise, assuming a finite number of term symbols, there are only finitely many terms of size < s, so there would be a cycle in the reduction and reduction would end in a condensed normal form.
+
+A context is linear if every hole occurs exactly once.
+
+Verifying confluence
+====================
+
+We often want to prove confluence. There are some key algorithms:
+
+* The decreasing diagrams technique is a complete method for confluence on countable abstract rewrite systems.
+
+* Computing critical pairs. A non-joinable critical pair means the system is not confluent. If all critical pairs are joinable the system is said to be locally confluent. An orthogonal system is one with no critical pairs, while a weakly orthogonal system is one with critical pairs that are trivially joinable. For an HORS there are more constraints to be orthogonal in addition to no critical pairs ("every set of redexes is pairwise simultaneous"). The substitution calculus must be complete, only needed for gluing, a descendant rewriting system, parametric, have head-defined rules, and be naturally closed under substitution. Parallel rewrite steps must be serializable and left-hand sides of rules must be linear.
+
+  V. van Oostrom. Developing developments. TCS, 175(1):159–181, 1997.
+  V. van Oostrom and F. van Raamsdonk. Weak orthogonality implies confluence: The higher order case. In Proc. 3rd LFCS, volume 813 of LNCS, pages 379–392, 1994.
+
+* Proving termination. The Knuth Bendix Criterion (Newmann's lemma) says a terminating system is confluent iff it is locally confluent. Termination can be shown by exhibiting a well-ordering, such as recursive path ordering, dependency graph decomposition, and the subterm criterion.
+
+  WANDA has more advanced techniques. Cynthia Kop. Higher Order Termination. PhD thesis, Vrije Universiteit, Amsterdam, 2012
+
+  TTT2 also has some good techniques.
+
+  Gramlich–Ohlebusch’s criterion says for innermost-terminating TRSs R with no innermost critical pairs, R is confluent if and only if all critical pairs are joinable by innermost reduction. There are innermost terminating systems that aren't terminating so this criterion can prove some systems that Knuth-Bendix can't.
+
+* Decomposition: Several properties allow dividing the system into smaller, more tractable systems. First is modularity, that the disjoint union of two systems with the property has the property. We also usually have the converse, the disjoint union has the property only if the subsystems have the property.
+
+  * Weak normalization and consistency (w.r.t. equivalence) are modular for first-order systems.
+  * Left linearity, confluence, and unique normal forms (w.r.t. equivalence) are modular for semi-equational CTRSs.
+  * Confluence is modular for join and semi-equational CTRSs. In fact if the disjoint union is confluent then the component systems must be confluent.
+  * Confluence plus left linearity is modular for higher-order TRSs.
+  * Weak termination, weak innermost termination, and strong innermost termination are modular for CTRSs in combination with confluence or the property that there are no extra variables in the conditions.
+  * NF, unique normal forms with respect to reduction, and consistency with respect to reduction are modular in combination with left linearity. Consistency w.r.t. reduction means that there is no term reducing to two distinct variables; it is implied by the unique normal form property w.r.t. reduction as variables are normal forms.
+  * Strong normalization plus consistency w.r.t. reduction plus left linearity is modular. This likely holds for CTRSs without extra variables as well.
+
+  Order-sorted decomposition uses persistence of confluence. If sorts can be assigned to all terms and rule variables such that all rules don't increase the sort, then confluence can be separately considered for each sort and confluence as a whole follows from confluence on well-sorted terms.
+
+  Decreasing diagrams allows decomposing a left-linear TRS into duplicating and non-duplicating rules. The TRS is confluent if all critical peaks are decreasing with respect to a rule labeling and the duplicating rules are terminating relative to the non-terminating rules.
+
+  Layer-preserving decomposition decomposes TRSs into minimal pieces such that taking pieces pairwise they form layer-preserving combinations, i.e. rules in one piece operate only on terms of that piece. It is used in CSI.
+
+
+* J. Nagele, B. Felgenhauer, and A. Middeldorp. Improving automatic confluence analysis of rewrite systems by redundant rules. In Proc. 26th RTA, volume 36 of LIPIcs, pages 257–268, 2015.
+
diff --git a/docs/Commentary/Language/Evaluation-Strategy.rst b/docs/Commentary/Language/Evaluation-Strategy.rst
index 92bd9bc..8264da7 100644
--- a/docs/Commentary/Language/Evaluation-Strategy.rst
+++ b/docs/Commentary/Language/Evaluation-Strategy.rst
@@ -9,29 +9,28 @@ This page summarizes the arguments for different types of evaluation strategies:
 * eager vs lazy - whether to make arguments non-strict by default
 * call-by-need vs optimal - if arguments are non-strict, whether to evaluate once per closure or do deeper sharing
 
- lazy evaluation and lazy vs optimal evaluation. The quick summary is that optimal reduction is optimal, hence has better reduction and expressiveness properties than lazy or strict, but it is a complex strategy and in some cases there may be significant space overhead compared to strict due to graph reduction overhead, and there are also cases where the graph reduction overhead exceeds the runtime of the program, so programs can be slower with optimal reduction. To address this Stroscot will special-case optimization for C-like programs to give the expected performance.
+The quick summary is that optimal reduction is optimal, hence has better reduction and expressiveness properties than lazy or strict, but it is a complex strategy and in some cases there may be significant space overhead compared to strict due to graph reduction overhead, and there are also cases where the graph reduction overhead exceeds the runtime of the program, so programs can be slower with optimal reduction. To address this Stroscot will special-case optimization for C-like programs to give the expected performance.
 
 "The next Haskell will be strict". - not necessarily. laziness may yet have a role as well.
 
 Traditionally a function is only defined on values, but lazy evaluation allows functions to produce useful behavior for non-values as well.
 
-Pure vs impure
-==============
-
-SPJ calls laziness "the hair shirt", and says the big benefit of wearing this shirt was the discovery of the benefits of purity. It is really great when the denotation of an integer is that integer, rather than a function or a tuple or whatever structure. Certainly some things like I/O do need more complex denotations, but by and large it is just a big conceptual win.
+Purity
+======
 
-Strict vs non-strict
-====================
+SPJ says laziness offers a form of "hair shirt", an excuse to keep the language pure. The big benefit of wearing this shirt was the discovery of the benefits of purity. It is really great when the denotation of an integer is that integer, rather than a function or a tuple or whatever structure. Certainly some things like I/O do need more complex denotations, but by and large it is just a big conceptual win. Strict languages are often undisciplined in their use of effects and have unclear semantics given by "whatever the compiler does".
 
-Control constructs
-------------------
+:cite:`jonesWearingHairShirt2003` concluded that laziness, in particular the purity that non-strictness requires, was jolly useful. I/O did cause prolonged embarrassment with a confusing variety of solutions (continuations, streams, monads) but Haskell has settled on monads and it is no longer a source of embarrassment.
 
-Non-strictness allows defining if-then-else and short-circuit functions. So generally some amount of non-strictness is required in a language. E.g. ``and c t = if c then t else False``. With strictness ``and false undefined`` evaluates its arguments first and throws even though its substitution does not. Another example is ``fromMaybe (error "BOOO") x``. Haskell has put non-strictness to good use with parser combinator libraries and so on.
+In Haskell, the interaction of ``seq`` and inlining is the source of `numerous bugs <https://gitlab.haskell.org/ghc/ghc/issues/2273>`__. In contrast, optimal reduction is based on a principled approach to sharing - any reduction sequence in the sharing graph will not duplicate work.
 
+Control constructs
+==================
 
+Non-strictness is required to define if-then-else and short-circuit functions, e.g. ``and c t = if c then t else False``. With strictness ``and false undefined`` evaluates its arguments first and throws even though its substitution does not. Another example is ``fromMaybe (error "BOOO") x``. Haskell has put non-strictness to good use with parser combinator libraries and so on.
 
 Function composition
---------------------
+====================
 
 Consider the ``any`` function, which scans the list from the head forwards and as soon as an element that fulfills the predicate is found it returns true and stops scanning the list, otherwise returns false. It's quite natural to express the ``any`` function by reusing the ``map`` and ``or`` functions, ``any p = or . map p``. All the functions involved need to be non-strict to get the desired semantics, processing the list in constant memory.
 
@@ -41,10 +40,12 @@ Similarly there is ``within eps (improve (differentiate h0 f x))`` in :cite:`hug
 
 The related deforestation optimization removes all intermediate cons cells from the non-strict definition of ``any``, making it as efficient as the expanded strict version. In a strict language deforestation can have the effect of making an undefined program defined, hence is invalid. More careful handling of termination can fix this for strict programs (says a random comment in a blog post).
 
-Win for laziness.
+Lazy evaluation of ``avg xs = sum xs / length xs`` keeps the whole list in memory because it does the sum and then the length (`ref <https://donsbot.wordpress.com/2008/05/06/write-haskell-as-fast-as-c-exploiting-strictness-laziness-and-recursion/>__`). My implementation of optimal reduction switches evaluation back and forth between the sum and the length. More specifically, with the sequent calculus IR, cuts get pushed down continually and the natural strategy of reducing the topmost cut performs this alternation. So the average calculation can discard the beginning of the list once it is processed.
+
+But although this case is improved, evaluating a thunk can still be delayed arbitrarily long, in particular it can take a while to discard an unused value.
 
 Partial evaluation
-------------------
+==================
 
 ``snd (undefined,3)`` only works in a non-strict language - ``undefined`` would throw in a strict language. So strict languages must do strictness analysis to discard any code as unneeded.
 
@@ -53,14 +54,14 @@ Partial evaluation
 Overall, a win for non-strictness, and an argument for lax pattern match semantics and termination checking.
 
 Totality
---------
+========
 
 In a total language all evaluation strategies give the same result. But since in particular, strict evaluation must work, totality gives up all the benefits of non-strictness - exceptions and failing conditionals are simply forbidden. Meanwhile the actual evaluation strategy is compiler-specified. In practice, the strategy still has to be decided (e.g. Idris is strict, Agda/Coq have both strict and call-by-need backends), so this doesn't resolve the question. The number of times an expression is evaluated is still observable via the performance.
 
 Conclusion: totality is a compromise that means the worst of strict and non-strict, and in practice is a Trojan horse for strictness.
 
 Simulation
-----------
+==========
 
 To emulate non-strict argument passing in a strict language, there are three options:
 
@@ -70,25 +71,27 @@ To emulate non-strict argument passing in a strict language, there are three opt
 
 Generally, ignoring orthogonal aspects such as the handling of side effects, there is no issue with using non-strict argument passing with a program written with strict semantics in mind; the program can simply be used unmodified. It will have the same semantics in normal conditions and possibly terminate without error in conditions where the strict version would loop infinitely. Slowdown and space leaks are possible issues, though not non-termination. Efficiency can be recovered by compiler optimizations that add back strictness.
 
+Lazy reduction can be simulated in a strict language using thunks, but the sharing graph of optimal reduction is intrusive, so one would have to represent functions via their AST. I guess it could be done. Generally, the issue is that optimal reduction is complicated. Although all of strict, lazy, and optimal reduction can be modeled as graph reduction, optimal reduction uses a more complex graph.
+
 Conclusion: Non-strictness wins in terms of simulation usability (use programs as-is). Performance-wise, practically, both directions of simulation can introduce slowdown and space leaks. With invasive syntax and careful design, strict can simulate non-strict without overhead.
 
 Data structures
----------------
+===============
 
 Laziness allows writing certain amortized data structures, as per :cite:`okasakiPurelyFunctionalData1998`.
 It also allows defining infinite data structures, e.g. ``omega = Succ omega`` or the Fibonacci stream, that have finite time and memory usage if only a part of the data is used. These are hard to replicate in strict code except via thunk simulation. When analyzing performance, the explicit simulation makes the data structures clearer and easier to analyze, but analyzing core IR of a lazy language should provide the same benefit.
 
 A strict, imperative stream (iterator) is one where reading from the stream is an operation ``next : Stream -> Op (Nil | Cons a Stream)``. It is not the same as a lazy stream - accessing elements does I/O, not just pure reduction of thunks. Iterators are ephemeral data structures (objects). An iterator can be turned into a pure data structure by reading it to exhaustion, or buffered using a thunk-like data structure to create a fake-lazy abstraction that still uses I/O but allows pure access to previous elements. Regardless, iterators can be implemented in a lazy langauge as well using an I/O monad, with little overhead.
 
-Normal order
-------------
+Normalizing
+===========
 
 Laziness has the joyous property that you can write down any cyclic rubbish and get a value out if there's any sensible evaluation order.
 
 Strict order can evaluate unnecessarily, so it can fail needlessly if there is an expression that errors when evaluated in the wrong conditions, e.g. ``a`` in ``r where a = b / c; r = if c != 0 then a else 0``.
 
 Time complexity
----------------
+===============
 
 Regarding (sequential) time complexity, lazy reduction uses at most as many reduction steps as the corresponding strict reduction. Lazy corresponds to strict extended with an oracle that skips evaluation of unneeded terms. :cite:`hackettCallbyneedClairvoyantCallbyvalue2019`
 
@@ -121,14 +124,46 @@ The overhead of the lazy bar function call is the creation of a thunk on the bum
 
 So big-O time complexity is within a constant factor. In practice the constant factor is quite important; cache locality and memory access times play a large role in speed. There is some memory fetching overhead with laziness because by the time the thunk is evaluated all of its references may have gone cold.
 
+Although thunks prevent some forms of duplication, and thus can give speedups, lazy reduction still duplicates work. An example is
+
+::
+
+  import System.IO.Unsafe
+  i = \w -> (unsafePerformIO (print "i")) `seq` w
+  z = 2 :: Integer
+  t = 3 :: Integer
+  f = \x -> (x z) + (x t)
+  main = print (f i :: Integer)
+
+Without GHC's optimizations, ``print "i"`` is evaluated twice. With ``-O`` GHC does a "hoisting" transformation that makes ``i = (unsafePerformIO (print "i")) `seq` \w -> w``. But it doesn't optimize another example:
+
+::
+
+  foo 0 = 1
+  foo n =
+    let a = \x -> (foo (n - 1))
+    a r + a s
+
+Without optimizations, this is exponential with lazy, vs linear with optimal. The reason is that with optimal reduction, sub-expressions of function bodies are shared between calls. In particular, the only time an expression is re-evaluated is when it depends on the arguments. Again with ``-O`` this improves: GHC inlines ``a`` and does CSE, giving ``foo n = let a = foo (n-1) in a + a``.
+
+However, there should more complex cases have higher-level sharing that no GHC code transformation mimics. TODO: find some.
+
+Regarding optimal evalation, there are some results:
+
+* Optimal reduction has exponential savings over lazy evaluation when evaluating Church numeral exponentiation. :cite:`aspertiBolognaOptimalHigherorder1996`
+* The optimal non-family reduction sequence is uncomputable for the lambda calculus (best known is essentially a brute force search over all reduction sequences shorter than leftmost-outermost reduction), while the optimal family reduction is simply leftmost-outermost.
+* For elementary linear lambda terms the number of sharing graph reduction steps is at most quadratic compared to the number of leftmost-outermost reduction steps. :cite:`guerriniOptimalImplementationInefficient2017` Actually my implementation avoids bookkeeping and fan-fan duplication and hence is linear instead of quadratic (TODO: prove this). It would be nice to have a bound of optimal graph reduction steps vs. call-by-value (strict) steps but I couldn't find one. I think it is just the same quadratic bound, because lazy is 1-1 with strict.
+* A simply-typed term, when beta-eta expanded to a specific "optimal root" form, reduces to normal form in a number of family reduction steps linearly proportional to the "size" of the term ("size" is defined in a way polynomially more than its number of characters). Since the simply typed terms can compute functions in ℰ4\\ℰ3 of the Grzegorczyk hierarchy with linear size (Statman), one concludes there is a sequence of terms which reduces in a linear number of family reductions but takes ℰ4 time to compute on a Turing machine, for any implementation of family reduction. In particular there are terms taking optimal graph reduction steps proportional to the iterated exponential of 2 to the size of the term, i.e. :math:`2^{2^{2^n}}` for any number of 2's. :cite:`coppolaComplexityOptimalReduction2002`
+
+
 Implementation complexity
--------------------------
+=========================
 
 Compiling a subset of C is succinct, 2048 bytes for the `obfuscated tiny C compiler <https://bellard.org/otcc/>`__. It's essentially a macro assembler - each operation translates to an assembly sequence that uses the stack.
 I can make a similar compiler for STG (lazy functional language) with a similar macro translation - I'd just need to write a GC library as heap allocation is not built into the hardware, unlike stack allocation. Meanwhile production-quality compilers (GCC, clang/LLVM) are huge and do so many code transformations that the original code is unrecognizable. Similarly GHC is huge. So strict languages don't really fit the hardware any better than lazy - they're both significant overhead for naive translations and huge compilers to remove that overhead.
 
 Space complexity
-----------------
+================
 
 The space complexity is very messy in a lazy language, whereas the stack in a strict language is predictable. For example, lazily evaluating the definition ``sum = foldl (+) 0``, unevaluated addition thunks pile up and are only forced at the end, hence the sum operation takes O(n) memory.
 
@@ -137,26 +172,19 @@ GHC's demand analysis works for ``sum``, but is still incomplete. Haskell has ad
 Space leaks in particular are hard to spot. The difficulty lies in characterizing the evaluatedness of arguments being passed around. R fully evaluates expressions in a number of places which helps a lot, but there is still a lot of code that manually calls ``force`` and ``force_all`` to remove laziness, and each omission is a potential slowdown. And of course all this forcing means there are few libraries taking advantage of laziness. :cite:`goelDesignImplementationUse2019`
 
 Debugging
----------
+=========
 
 For debugging the logic, lazy and strict evaluation can both be modeled as term reduction, so it's just a matter of tracking the term being reduced. The logic that tracks lazy reduction state is more complex, hence is harder to show alongside the term, but not impossibly so.
 
 Parallelism and concurrency
----------------------------
+===========================
 
 Parallel execution is slightly better in a strict language, because expressions are known to be evaluated and can be immediately sent off to a worker thread. Lazy evaluation requires proving or waiting for demand which can be slow. But lenient evaluation is non-strict and eager, and gives more parallelism than either strict or lazy. Even more parallelism can be obtained from speculative execution.
 
 Concurrency is bound up with I/O operations, which are sequential, so the evaluation strategy doesn't have any room to play a role.
 
-Purity
-------
-
-Laziness offers a form of "hair shirt", an excuse to keep the language pure. Strict languages are often undisciplined in their use of effects and have unclear semantics given by "whatever the compiler does".
-
-:cite:`jonesWearingHairShirt2003` concluded that laziness, in particular the purity that non-strictness requires, was jolly useful. I/O did cause prolonged embarrassment with a confusing variety of solutions (continuations, streams, monads) but Haskell has settled on monads.
-
 Types
------
+=====
 
 In Ocaml, a simple list type ``List Nat`` is guaranteed to be finite. In Haskell, a list type ``List Nat`` instead accepts infinite lists like ``fib = [1,1,2,3,...]``. In the denotational semantics, however, infinite lists are still values. So we should be able to define types independent of the evaluation semantics, i.e. have both finite and infinite types in both strict and lazy languages.
 
@@ -165,7 +193,7 @@ With strict languages, using the thunk simulation one gets a natural "thunk" con
 With lazy languages, GHC has developed "strictness annotations" which seem about as useful. So uList. (Nat + List) is an infinite list, while uList. (Nat + !List) is a finite list. There is an alternate convention implied by StrictData which uses ``a`` to denote values of type a and ``~a`` to denote expressions evaluating to type ``a``.
 
 Pipes
------
+=====
 
 One practical case where laziness shows up is UNIX pipes. For finite streams the "strict" semantics of pipes suffices, namely that the first program generates all of its output, this output is sent to the next program, which generates all of its output that is then sent to the next program, etc., until the output is to the terminal. Most programs have finite output on finite input and block gracefully while waiting for input, so interleaved execution or laziness is not necessary.
 
@@ -174,7 +202,7 @@ However, for long outputs, interleaved or "lazy" execution saves memory and impr
 But laziness means you can implement interleaving once in the language (as the evaluation strategy) as opposed to piecemeal for each program.
 
 Referential transparency
-------------------------
+========================
 
 Common subexpression elimination "pulls out" a repeated expression by giving it a fresh name and generally improves performance by sharing the result (although it could be a tie with the compiler inlining the expression again if it is really cheap). For instance ``e + e`` is the same as ``(\x -> x + x) e``, but in the second ``e`` is only evaluated once.
 
@@ -188,63 +216,6 @@ A win for laziness.
 Non-strict arguments are passed as computations, so they can include non-terminating computations, whereas in a strict language arguments are evaluated values. But when we actually use a value it gets evaluated, so these computations resolve themselves. There is no way in a lazy language (barring runtime reflection or exception handling) to observe that an argument is non-termination as opposed to a real value, i.e. to make a function ``f _|_ = 0, f () = 1``. So stating that non-termination or ``undefined`` is a value in lazy languages is wrong. Similarly ``Succ undefined`` is not a value - it is WHNF but not normal form. These are programs (unevaluated expressions) that only come up when we talk about totality. Some people have confused the notions of "value" and "argument" in lazy languages. The term "laziness" has a lot of baggage, perhaps it is better to market the language as "normal order".
 
 
-Lazy vs optimal
-===============
-
-Optimal reduction is similar to lazy reduction in that the evaluation loop computes a "needed" redex and reduces it. It differs in that it can share the bodies of lambda abstractions. It's optimal in the sense that it ensures the minimal amount of family reduction steps. but this does not imply the fastest real-world performance.
-
-Exponential speedup
--------------------
-
-Although thunks prevent some forms of duplication, lazy reduction still duplicates work. An example is
-
-::
-
-  import System.IO.Unsafe
-  i = \w -> (unsafePerformIO (print "i")) `seq` w
-  z = 2 :: Integer
-  t = 3 :: Integer
-  f = \x -> (x z) + (x t)
-  main = print (f i :: Integer)
-
-Without GHC's optimizations, ``print "i"`` is evaluated twice. With ``-O`` GHC does a "hoisting" transformation that makes ``i = (unsafePerformIO (print "i")) `seq` \w -> w``. But it doesn't optimize another example:
-
-::
-
-  foo 0 = 1
-  foo n =
-    let a = \x -> (foo (n - 1))
-    a r + a s
-
-Without optimizations, this is exponential with lazy, vs linear with optimal. The reason is that with optimal reduction, sub-expressions of function bodies are shared between calls. In particular, the only time an expression is re-evaluated is when it depends on the arguments. Again with ``-O`` this improves: GHC inlines ``a`` and does CSE, giving ``foo n = let a = foo (n-1) in a + a``.
-
-However, there should more complex cases have higher-level sharing that no GHC code transformation mimics. TODO: find some.
-
-Principled
-----------
-
-The interaction of ``seq`` and inlining is the source of `numerous bugs <https://gitlab.haskell.org/ghc/ghc/issues/2273>`__. In contrast, optimal reduction is based on a principled approach to sharing - any reduction sequence in the sharing graph will at most duplicate work.
-
-Better composition
-------------------
-
-Lazy evaluation of ``avg xs = sum xs / length xs`` keeps the whole list in memory because it does the sum and then the length (`ref <https://donsbot.wordpress.com/2008/05/06/write-haskell-as-fast-as-c-exploiting-strictness-laziness-and-recursion/>__`). My implementation of optimal reduction switches evaluation back and forth between the sum and the length. More specifically, with the sequent calculus IR, cuts get pushed down continually and the natural strategy of reducing the topmost cut performs this alternation. So the average calculation can discard the beginning of the list once it is processed.
-
-But although this case is improved, evaluating a thunk can still be delayed arbitrarily long, in particular it can take a while to discard an unused value.
-
-Complicated
------------
-
-Lazy reduction can be simulated in a strict language using thunks, but the sharing graph of optimal reduction is intrusive, so one would have to represent functions via their AST. I guess it could be done. Generally, the issue is that optimal reduction is complicated. Although all of strict, lazy, and optimal reduction can be modeled as graph reduction, optimal reduction uses a more complex graph.
-
-Time complexity
----------------
-
-* Optimal reduction has exponential savings over lazy evaluation when evaluating Church numeral exponentiation. :cite:`aspertiBolognaOptimalHigherorder1996`
-* The optimal non-family reduction sequence is uncomputable for the lambda calculus (best known is essentially a brute force search over all reduction sequences shorter than leftmost-outermost reduction), while the optimal family reduction is simply leftmost-outermost.
-* For elementary linear lambda terms the number of sharing graph reduction steps is at most quadratic compared to the number of leftmost-outermost reduction steps. :cite:`guerriniOptimalImplementationInefficient2017` Actually my implementation avoids bookkeeping and fan-fan duplication and hence is linear instead of quadratic (TODO: prove this). It would be nice to have a bound of optimal graph reduction steps vs. call-by-value (strict) steps but I couldn't find one. I think it is just the same quadratic bound, because lazy is 1-1 with strict.
-* A simply-typed term, when beta-eta expanded to a specific "optimal root" form, reduces to normal form in a number of family reduction steps linearly proportional to the "size" of the term ("size" is defined in a way polynomially more than its number of characters). Since the simply typed terms can compute functions in ℰ4\\ℰ3 of the Grzegorczyk hierarchy with linear size (Statman), one concludes there is a sequence of terms which reduces in a linear number of family reductions but takes ℰ4 time to compute on a Turing machine, for any implementation of family reduction. In particular there are terms taking optimal graph reduction steps proportional to the iterated exponential of 2 to the size of the term, i.e. :math:`2^{2^{2^n}}` for any number of 2's. :cite:`coppolaComplexityOptimalReduction2002`
-
 Sharing strategies with non-strictness don't extend to ``while``, because the condition and body must be evaluated multiple times. So more generally for iteration constructs we need `call by name <https://docs.scala-lang.org/tour/by-name-parameters.html>`__, macros, fexprs, or monads.
 
 ::
diff --git a/docs/Commentary/Implementation/Fastest.rst b/docs/Commentary/Language/Fastest.rst
similarity index 100%
rename from docs/Commentary/Implementation/Fastest.rst
rename to docs/Commentary/Language/Fastest.rst
diff --git a/docs/Commentary/Language/FunctionalLogic.rst b/docs/Commentary/Language/FunctionalLogic.rst
index fbf606e..5bc9c68 100644
--- a/docs/Commentary/Language/FunctionalLogic.rst
+++ b/docs/Commentary/Language/FunctionalLogic.rst
@@ -6,85 +6,99 @@ Curry, Verse, Flix, Oz, ... all functional logic languages.
 Values. A value v is either a variable x or a head-normal form hnf. A variable counts as a value because an expression may evaluate to an as-yet-unknown logical variable.
 A head-normal form hnf is a conventional value.
 Expressions e include values v and other constructs:
-  sequencing 𝑒𝑞; e. An "expression or equation" 𝑒𝑞 is either an ordinary (possibly failing) expression e, or an equation v = e; the syntax ensures that equations can only occur to the left of a “; ”.
-  ∃x. e - existential, introduces logical variable
-  fail - yield no values
-  choice e1 || e2 - yield multiple values (e1 followed by e2)
-  one{e} - if e fails, fails, otherwise returns the first of the values yielded by e
-  all{e} - reifies choices as a tuple; n choices mean tuple of length n
+
+* sequencing 𝑒𝑞; e. An "expression or equation" 𝑒𝑞 is either an ordinary (possibly failing) expression e, or an equation v = e; the syntax ensures that equations can only occur to the left of a “; ”.
+* ∃x. e - existential, introduces logical variable
+* fail - yield no values
+* choice e1 || e2 - yield multiple values (e1 followed by e2)
+* one{e} - if e fails, fails, otherwise returns the first of the values yielded by e
+* all{e} - reifies choices as a tuple; n choices mean tuple of length n
+
 A program 𝑝 ::= one{e} is a closed expression e (freevars(𝑒) = ∅) taking the first result. If the expression fails, the program fails.
 
 Rewrite rules:
-∃x1 x2 ··· xn. e means ∃x1. ∃x2. ···∃xn. e
-x := e1; e2 means ∃x. x = e1; e2
-⟨e1, ···, en⟩ means x1 := e1; ···; xn := en; ⟨x1, ···, xn⟩ where xi are fresh (again skip rebinding values)
-e1 = e2 means‡ x := e1; x = e2; x where x is fresh (skip if e1 is a value v and the equation is not the last in a sequence)
-if (∃x1 ···xn. e1) then e2 else e3 means (one{(∃x1 ···xn. e1; 𝜆⟨⟩. e2) || (𝜆⟨⟩. e3)})⟨⟩
+
+* ∃x1 x2 ··· xn. e means ∃x1. ∃x2. ···∃xn. e
+* x := e1; e2 means ∃x. x = e1; e2
+* ⟨e1, ···, en⟩ means x1 := e1; ···; xn := en; ⟨x1, ···, xn⟩ where xi are fresh (again skip rebinding values)
+* e1 = e2 means‡ x := e1; x = e2; x where x is fresh (skip if e1 is a value v and the equation is not the last in a sequence)
+* if (∃x1 ···xn. e1) then e2 else e3 means (one{(∃x1 ···xn. e1; 𝜆⟨⟩. e2) || (𝜆⟨⟩. e3)})⟨⟩
+
 Note: In the rules marked with a superscript 𝛼, use 𝛼-conversion to satisfy the side condition
 
+A multi-equation pattern match such as::
 
-A multi-equation pattern match such as
-function pat1 = body1
-function pat2 = body2
+  function pat1 = body1
+  function pat2 = body2
 
 desugars to
 
-function := λp.((∃x1 ··· xn. p = pat1; body1) || (∃x1 ··· xn. p = pat2; body2))
+.. code-block:: none
+
+  function = λp.((∃x1 ··· xn. p = pat1; body1) || (∃x1 ··· xn. p = pat2; body2))
 
 Primops and literals:
-Hnfs include integer constants and primitive operators +, >
-e1 + e2 means add⟨e1, e2⟩
-e1 > e2 means gt⟨e1, e2⟩
-app-add add⟨k1, k2⟩ −→ k3 where 𝑘3 = 𝑘1 + 𝑘2
-app-gt gt⟨k1, k2⟩ −→ k1 if 𝑘1 > 𝑘2
-app-gt-fail gt⟨k1, k2⟩ −→ fail if 𝑘1 ⩽ 𝑘2
-u-lit k1 = k2; e −→ e if 𝑘1 = 𝑘2
+
+* Hnfs include integer constants and primitive operators +, >
+* e1 + e2 means add⟨e1, e2⟩
+* e1 > e2 means gt⟨e1, e2⟩
+* app-add add⟨k1, k2⟩ −→ k3 where 𝑘3 = 𝑘1 + 𝑘2
+* app-gt gt⟨k1, k2⟩ −→ k1 if 𝑘1 > 𝑘2
+* app-gt-fail gt⟨k1, k2⟩ −→ fail if 𝑘1 ⩽ 𝑘2
+* u-lit k1 = k2; e −→ e if 𝑘1 = 𝑘2
 
 Lambdas:
-A head-normal form hnf includes a lambda 𝜆x. e.
-Expressions e include applications v1 v2
-e1 e2 means f := e1; x := e2; f x, where f,x are fresh (skip rebinding values)
-𝜆⟨x1, ···, xn⟩. e means 𝜆p. ∃x1 ··· xn. p = ⟨x1, ···, xn⟩; e p fresh, n ⩾ 0
-app-beta𝛼 (𝜆x. e) (v) −→ e{v/x} if 𝑥 ∉ fvs(v)
-u-lambda a=b is stuck if a or b is a lambda
+
+* A head-normal form hnf includes a lambda 𝜆x. e.
+* Expressions e include applications v1 v2
+* e1 e2 means f := e1; x := e2; f x, where f,x are fresh (skip rebinding values)
+* 𝜆⟨x1, ···, xn⟩. e means 𝜆p. ∃x1 ··· xn. p = ⟨x1, ···, xn⟩; e p fresh, n ⩾ 0
+* app-beta𝛼 (𝜆x. e) (v) −→ e{v/x} if 𝑥 ∉ fvs(v)
+* u-lambda a=b is stuck if a or b is a lambda
 
 Tuples:
-A head-normal form includes a tuple ⟨v1, ···, vn⟩.
-app-tup ⟨v0, ···, vn⟩(v) −→ ∃x. x = v; (x = 0; v0) || ··· || (x = n; vn) fresh x ∉ fvs(v, v0, ···, vn)
-app-tup-0 ⟨⟩(v) −→ fail
-u-tup ⟨v1, ···, vn⟩ = ⟨v′1, ···, v′n⟩; e −→ v1 = v′1; ···; vn = v′n; e
-all-choice all{v1 || ··· || vn } −→ ⟨v1, ···, vn⟩
-all-value all{v} −→ ⟨v⟩
-all-fail all{fail} −→ ⟨⟩
+
+* A head-normal form includes a tuple ⟨v1, ···, vn⟩.
+* app-tup ⟨v0, ···, vn⟩(v) −→ ∃x. x = v; (x = 0; v0) || ··· || (x = n; vn) fresh x ∉ fvs(v, v0, ···, vn)
+* app-tup-0 ⟨⟩(v) −→ fail
+* u-tup ⟨v1, ···, vn⟩ = ⟨v′1, ···, v′n⟩; e −→ v1 = v′1; ···; vn = v′n; e
+* all-choice all{v1 || ··· || vn } −→ ⟨v1, ···, vn⟩
+* all-value all{v} −→ ⟨v⟩
+* all-fail all{fail} −→ ⟨⟩
 
 Failure:
-u-fail hnf1 = hnf2; e −→ fail if no unification
-u-occurs x = V [ x ]; e −→ fail if V ≠ □ (i.e., all but x=x fail)
-fail-elim 𝑋 [ fail] −→ fail
-one-fail one{fail} −→ fail
-choose-r fail || e −→ e
-choose-l e || fail −→ e
+
+* u-fail hnf1 = hnf2; e −→ fail if no unification
+* u-occurs x = V [ x ]; e −→ fail if V ≠ □ (i.e., all but x=x fail)
+* fail-elim 𝑋 [ fail] −→ fail
+* one-fail one{fail} −→ fail
+* choose-r fail || e −→ e
+* choose-l e || fail −→ e
 
 Existential:
-exi-elim ∃x. e −→ e if x ∉ fvs(e)
-eqn-elim ∃x. 𝑋 [ x = v; e ] −→ 𝑋 [ e ] if x ∉ fvs(𝑋 [ e ]) and v ≠ V [ x ]
-exi-float𝛼 𝑋 [ ∃x. e ] −→ ∃x. 𝑋 [ e ] if 𝑥 ∉ fvs(𝑋 )
-exi-swap ∃x. ∃y. e −→ ∃y. ∃x. e
+
+* exi-elim ∃x. e −→ e if x ∉ fvs(e)
+* eqn-elim ∃x. 𝑋 [ x = v; e ] −→ 𝑋 [ e ] if x ∉ fvs(𝑋 [ e ]) and v ≠ V [ x ]
+* exi-float𝛼 𝑋 [ ∃x. e ] −→ ∃x. 𝑋 [ e ] if 𝑥 ∉ fvs(𝑋 )
+* exi-swap ∃x. ∃y. e −→ ∃y. ∃x. e
 
 Equality:
-subst 𝑋 [ x = v; e ] −→ (𝑋 {v/x}) [ x = v; e{v/x} ] if v ≠ V [ x ]
-hnf-swap hnf = v; e −→ v = hnf ; e
-var-swap y = x; e −→ x = y; e if x ≺ y
+
+* subst 𝑋 [ x = v; e ] −→ (𝑋 {v/x}) [ x = v; e{v/x} ] if v ≠ V [ x ]
+* hnf-swap hnf = v; e −→ v = hnf ; e
+* var-swap y = x; e −→ x = y; e if x ≺ y
 
 Sequences:
-seq-swap 𝑒𝑞; x = v; e −→ x = v; 𝑒𝑞; e unless (𝑒𝑞 is y = v′ and y ⪯ x)
-val-elim v; e −→ e
-seq-assoc (𝑒𝑞; e1); e2 −→ 𝑒𝑞; (e1; e2)
-eqn-float v = (𝑒𝑞; e1); e2 −→ 𝑒𝑞; (v = e1; e2)
+
+* seq-swap 𝑒𝑞; x = v; e −→ x = v; 𝑒𝑞; e unless (𝑒𝑞 is y = v′ and y ⪯ x)
+* val-elim v; e −→ e
+* seq-assoc (𝑒𝑞; e1); e2 −→ 𝑒𝑞; (e1; e2)
+* eqn-float v = (𝑒𝑞; e1); e2 −→ 𝑒𝑞; (v = e1; e2)
 
 Choice:
-one-value one{v} −→ v
-one-choice one{v || e} −→ v
-choose-assoc (e1 || e2) || e3 −→ e1 || (e2 || e3)
-choose SX [𝐶𝑋 [ e1 || e2 ] ] −→ SX [𝐶𝑋 [ e1 ] || 𝐶𝑋 [ e2 ] ]
+
+* one-value one{v} −→ v
+* one-choice one{v || e} −→ v
+* choose-assoc (e1 || e2) || e3 −→ e1 || (e2 || e3)
+* choose SX [𝐶𝑋 [ e1 || e2 ] ] −→ SX [𝐶𝑋 [ e1 ] || 𝐶𝑋 [ e2 ] ]
 
diff --git a/docs/Commentary/Language/LogicProgramming.rst b/docs/Commentary/Language/LogicProgramming.rst
index 33ebd5f..428a220 100644
--- a/docs/Commentary/Language/LogicProgramming.rst
+++ b/docs/Commentary/Language/LogicProgramming.rst
@@ -161,42 +161,6 @@ Each mode is a function from inputs to a set of outputs (or output / Maybe, in t
 
 Logic programming allows writing very concise code, although it can be unusably inefficient in some cases. For this, we can allow writing optimized imperative code, and asserting that this implements a specific mode of a predicate. Then the predicate becomes optimized. But with a smart compiler, the imperative code can be avoided most of the time, saving the need for duplication - just tune the hot cases. Similarly writing imperative code in the first place avoids the issue altogether, although it precludes most of the benefits of logic programming.
 
-Unification
-===========
-
-Unification is the problem of finding all solutions to a system of equations. First-order unification solves a set of equalities ``a1=b1, a2=b2, ...`` over tree terms and variables. This can be extended to the "dual unification" problem that also includes disequations ``c1 != d1`` in the list that must not be satisfied. Constraint logic programming requires solving systems of equations over reals or other sets. The solution takes the form of a complete set of unifiers, where each unifier is a substitution that may have its free variables substituted to obtain a solution, together with constraints over those free variables. A substitution is a set of assignments from variables to expressions.
-
-Unification isn't really part of the semantics of logic programming, as the semantics is phrased in terms of satisfiability. But it is a standard technique used in implementing logic programming, and in practice the implementation defines the semantics. Prolog only implements first-order unification. Teyjus / λProlog limit to higher-order "pattern lambdas". With ZipperPosition :cite:`vukmirovicEfficientFullHigherOrder2021` there is outlined a full higher-order unification algorithm extending Huet's semi-algorithm - the need to support multiple unifiers for a complete set complicates things a bit.
-
-The outline of every unification algorithm is that it randomly applies simplifying reduction operations to an equation until it results in a substitution, then applies the substitution to the remaining equations (dereferencing). Here we show :cite:`vukmirovicEfficientFullHigherOrder2021`'s, adapted to match the presentation on `Wikipedia <https://en.wikipedia.org/wiki/Unification_(computer_science)#A_unification_algorithm>`__:
-
-* delete: ``s=s`` is removed
-* decompose: ``a s1 ... sm = a t1 ... tm`` to equations ``{s1 = t1, ..., sm = tm }``
-* rigid/rigid conflict: ``a sm = b tn`` fails if a and b are different rigid heads
-* dereference: ``F s1 ... sn = t`` to ``(F /. σ) ... = t``, if the substitution σ from another equation maps F
-* empty equation list: trivially soluble
-* alpha/eta normalization: ``λxm.s = λyn.t`` to ``λxm.s = λxm.t' xn+1 . . . xm``, where ``m ≥ n``, ``xi`` disjoint from ``yj``, and ``t' = t /. {y1 → x1 , ... , yn → xn }``
-* beta normalization: reduce left/right to hnf
-* under lambda: apply rule for ``a = b`` to ``λx. a = λx. b``
-
-ZipperPosition has more complex reductions for hard cases:
-
-* oracle fail: ``s=t`` fails if oracle determines to be insoluble
-* oracle success: ``s=t`` has finite CSU, branch to each solution σ_i
-* bind: try projections with the following binding substitutions:
-
-  * flex-rigid ``P(λx. F s = λx. a t)``: try an imitation of a for F, if a is constant, and all Huet-style projections for F, if F is not an identification variable.
-  * flex-flex with different heads ``P(λx. F s = λx. G t)``: all identifications and iterations for both F and G, and all JP-style projections for non-identification variables among F and G.
-  * flex-flex with identical heads and the head is an elimination variable, ``P(λx. s = λx. t)``: no bindings.
-  * flex-flex with identical heads, ``P(λx. F s = λx. F t)``: all iterations for F at arguments of functional type and all eliminations for F.
-
-The flex-binding step is slow, but a good set of oracles makes the algorithm efficient for most practical cases. Of course it would be better to find reduction rules that solve things generally rather than oracles which work on specific cases, but this is hard.
-
-The unifier search can be integrated with the overall logical search for satisfiable formulas.
-
-By default Prolog does not use the `occurs check <https://en.wikipedia.org/wiki/Occurs_check>`__ in unification. This means for ``x == f x`` the substitution ``x -> f x`` is obtained. Denotationally this can be accommodated by allowing states to contain infinite rational terms, :cite:`weijlandSemanticsLogicPrograms1990` ``x = f (f (f (...)))`` in this case. In most Prolog programs the occurs check does not make a difference and simply slows down unification. :cite:`aptWhyOccurcheckNot1992` Prolog defines a ``unify_with_occurs_check`` predicate, and has an option for doing the occurs check in the implicit unification when dispatching predicates. Meanwhile miniKanren always uses the occurs check. The occurs check is needed in first order logic theorem-proving, where skolemization turns quantifiers into variables and is sound only if the occurs check is used.
-
-
 Sources
 =======
 
diff --git a/docs/Commentary/Language/Memory.rst b/docs/Commentary/Language/Memory.rst
new file mode 100644
index 0000000..dcdc889
--- /dev/null
+++ b/docs/Commentary/Language/Memory.rst
@@ -0,0 +1,186 @@
+Memory
+######
+
+The language should have automatic memory management. Manual memory management is slow, tedious, and error prone. Automatic memory management is better in all respects, but the implementation has to be flexible enough to be usable for all the things manual memory management is.
+
+Memory models
+=============
+
+3-3I. It shall be possible to define types whose elements are indirectly accessed. Elements of such types may have components of their own type, may have substructure that can be altered during execution, and may be distinct while having identical component values. Such types shall be distinguishable from other composite types in their definitions. An element of an indirect type shall remain allocated as long as it can be referenced by the program. [Note that indirect types require pointers and sometimes heap storage in their implementation.]
+
+3-3J. Each execution of the constructor operation for an indirect type shall create a distinct element of the type. An operation that distinguishes between different elements, an operation that replaces all of the component values of an element without altering the element's identity, and an operation that produces a new element having the same component values as its argument, shall be automatically defined for each indirect type.
+
+Per :cite:`kangFormalMemoryModel2015` there are pretty much two models of memory, pointers and references. Pointers model memory as an integer-indexed array of 2^32 or 2^64 words, accessed by the OS/hardware APIs. References model memory as an associative array from symbolic "references" (potentially infinite in number) to "cells", values (stored in some unspecified format, but with lossless storage).
+
+Kang describes how combinations of these can be made, for example the "quasi-concrete model" which uses a data type that starts out containing a reference, implements various arithmetic operations symbolically, but switches to a pointer once an integer address is requested. You can also imagine the other direction, a pointer that masquerades as a reference but errors when attempting to store a value larger than the allocation. But references and pointers are the fundamental ideas and serve to implement all other possibilities.
+
+:cite:`brightProgrammingLanguageIdeas2022` brings up the old x86 16-bit pointer model. There were data, code, stack, and extra segment registers. A near pointer simply adds an offset to the appropriate segment register. Far and huge pointers set the segment register first, allowing access to other segments. Far pointers were unnormalized, while huge points were normalized to a canonical segment+offset pair. Nowadays, in x86-64, pointers are just represented as a uniform 64-bit absolute address. The only residue of segment addressing is there are some "load relative" instructions that take offsets instead of absolute pointers.
+
+Bright suggests that the lesson is to only have one type of pointer. But I disagree. The lesson is really to ensure that a pointer is self-contained, in that it always points to the same location, and unique, in that no other pointer value refers to that location. In the 16-bit pointer model, only far and huge pointers were self-contained. And far and huge pointers had the issue of allowing multiple representations of the same address. The normalization solved this, but there were disagreements on how to normalize and it was often skipped for performance reasons. Comparatively, the 64-bit model has a unique pointer value for every address. Turning now to modern models, the concrete and symbolic models are both fine in this regard; integers and symbols are self-contained and unique.
+
+Bright also raises the specter that "You will wind up with two versions of every function, one with manually managed pointers and one with garbage collected pointers (references). The two versions will require different implementations. You'll be sorry." How worrisome is this?
+
+Well, first let's try to use a pointer as a reference. There are many issues to consider:
+
+* Allocation size: Generally it is assumed the pointer points to some fixed-size buffer of bytes. But this means we can't store arbitrary-sized values; they just don't fit. Usually this is solved by restricting the possible values to a finite set, then the storage is fixed.
+* Serialization: To mimic the ability of a reference to store heterogeneous types of data, strings, numbers, lists, functions, and so on, we need a universal serialization function, that e.g. stores a type tag. We can probably expose such a serialization function from the compiler, as the compiler needs such a function to implement references. Alternatively, for a restricted type, this is solved by writing a custom serialization function.
+* Ownership - Pointers can just be calculated out of thin air, so some other function could overwrite our buffer. The format could be corrupted, or the memory could be deallocated altogether. Usually this is solved by making a private copy of the buffer at some isolated address that no other part of the program uses, and only writing back changes at the end in one atomic operation.
+
+Is someone really going to work through these issues and write a second version of the function? When they could just make the pointer into a reference with ``newRef (deserialize (readBytes 10 ptr))`` and let the compiler do all the work? References should have decent performance so there will be no reason to try to shoehorn a pointer into a reference-like API. Pointers are really a low-level, byte-based abstraction whose only business is interfacing with C code. As evidence that they are needed I offer `C# <https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/unsafe-code#pointer-types>`__ which has included them since 1.0.
+
+As far as using a reference as a pointer, as long as we don't want to do pointer arithmetic, we can just store an array of bytes in the reference. Such a pattern is common in Java, e.g. the ArrayList class. But when we want to materialize some bytes at a given memory address, there is no way to do it with references. References just don't support interfacing with C code.
+
+I guess it is possible that someone will have two versions of a function, one that implements it in pure Stroscot via references and one that calls out a C library with pointers. But externally, I think both of them should interact with the rest of the code using references. Using pointers with the C code might avoid a few conversion calls, but references are a lot cleaner to use, e.g. avoiding the use of callbacks, and there is the guaranteed optimization that you can use a reference as a pointer with zero-cost. So I don't think this poses an issue. Even if the C wrapper did use pointers because it was easier than converting to/from references all the time, that's a judgement call on the part of the library author and I don't think there is a solution that would let everyone standardize on one universal type. The best a "pointerOrRef" type can support, even restricted to a small type like ``int8``, is get/set like a regular reference.
+
+Aliasing
+--------
+
+Steelman 7I. The language shall attempt to prevent aliasing (l.e., multiple access paths to the same variable or record component) that is not intended, but shall not prohibit all aliasing. [...] All aliasing of components of elements of an indirect type shall be considered intentional.
+
+The language is convoluted and hard to understand, but the way I read this is that anyone who uses an indirection expects aliasing and the language should not do anything to prevent it. Certainly, if you don't need aliasing, you could just use a constant directly.
+
+Pointers
+========
+
+Pointers are the low-level API, they can interface with the OS or other languages (mainly C). I did a study of Windows/Linux memory APIs and concluded that memory is best modeled as the global mutable array ``Memory = Map (Word,BitIdx) Status``. The status allows storing metadata, it's `a complex ADT <https://github.com/Mathnerd314/stroscot/blob/master/src/model/MemoryStatus.hs>`__ which has various states like unallocated, committed, etc. The array is indexed at the bit level because that's the granularity `Valgrind's Memcheck <https://valgrind.org/docs/manual/mc-manual.html#mc-manual.machine>`__ uses, but most of the status will be the same for a byte or page as the memory allocators / OS operations work at higher granularity.
+
+It is simple enough to maintain "extra" status bits, and instrument memory functions to check the status of memory before operating. This is essentially what Valgrind does. With this it is possible to identify many common errors, like double free, use after free, access to undefined memory, and null pointer dereferencing. But there is still the possibility of overflowing a buffer into an adjacent allocation, or more generally `type punning <https://en.wikipedia.org/wiki/Type_punning>`__ by reading some memory as a format it was not written with. These sorts of possibilities are intrinsic to the "big array of bits" model, and many low-level hacks rely on such functionality, so I would say to use references if you want to avoid such things. But of course someone can easily add bounds-checking etc. on top of the basic pointer model as a library.
+
+Most addresses will not be allocated (status Free), hence the array is sparse in some sense. It is in fact possible to implement the typical `sparse array operations <https://developer.android.com/reference/android/util/SparseArray>`__. There are functions to directly allocate memory at an address. Reading and writing are done directly in assembly. The list of currently mapped pages can be had from ``/proc/self/maps`` and `VirtualQueryEx <https://reverseengineering.stackexchange.com/questions/8297/proc-self-maps-equivalent-on-windows/8299>`__, although this has to be filtered to remove pages reserved by the kernel and internal pages allocated by the runtime, and looks slow - it's easier to wrap the allocation functions and maintain a separate list of user-level allocations. Clearing mappings, hashing memory, and indexing by mapped pages all work when restricted to the list of user pages. It's a little more complicated than simple sparsity because there are many different statuses and the operations overlap.
+
+Storage vs. memory
+-------------------
+
+In practice, the path from cloud to CPU is long, and accessible storage is not just RAM. Some latency numbers and the programming API:
+
+* Physical registers (0.3 ns): managed by the CPU
+* Logical registers (0.3 ns): assembly read/write
+* Memory Ordering Buffers (MOB), L1/L2/L3 Cache (0.5-7 ns): Managed by the CPU
+* Main Memory (0.1us-4us): assembly read/write
+* GPU memory (0.2us-0.5us): assembly read/write, driver ioctl's
+* NVRAM (200us-250us): assembly read/write, special calls
+* SSD (250-500us): kernel file APIs
+* LAN (0.5-500ms): kernel network stack, driver bypass
+* HDD (3 ms): kernel file APIs
+* WAN (150ms): kernel network stack, driver bypass
+
+Not all applications will use all of these, but all will use some and there is an application that uses each. So all of these have to be modeled in order to create a performant application. Ideally the memory management system would be a "storage management system" that combines all of these into a single pointer-like abstraction and allows copying data between locations as appropriate. But it's a leaky abstraction, I'm not sure it can be pulled off except as a library.
+
+"You-choose" Allocation
+-----------------------
+
+In practice, fixed-address allocation / assignment is not commonly used. Instead, there are ``mmap NULL``, ``malloc``, and the C library API alloc/realloc, which allocate memory with system-chosen / allocator-chosen location. For verifying behavior, the right model for this is adversarial, i.e. the allocator chooses the worst possible location, subject to restrictions such as that the allocation must be suitably aligned and disjoint from all unrevoked allocations. More formally, the behavior of a correct program should not depend on what addresses the system picks, i.e. all choices should be observationally equivalent. (The system can also return an out of memory error, but this doesn't have to result in equivalent behavior.)
+
+Of course, the actual allocation strategy should not be the worst, rather it should try to achieve the best performance. For the most part, people do not seem to pay much attention to allocator design, because it is pretty cheap. For example `in Doom 3 <https://www.forrestthewoods.com/blog/benchmarking-malloc-with-doom3/>`__ the median time for is 31 nanoseconds, ranging from 21 nanoseconds to 201 microseconds, and free is comparable.
+
+But, speeding up allocation is actually fairly important. Combining operations into a single larger operation (allocate a larger buffer, call ``close_range`` to close several open FD's than to iterate over them individually) by pushing allocations forward and delaying frees, as long as there is sufficient memory or resource capacity available, can be a big win. In contrast, reads and writes are always real work, and besides SIMD there is not much way to optimize it.
+
+There are also a lot of locality and cache effects from the address allocation algorithm. In the trivial case, the memory usage can be predicted in advance and allocations given fixed assignments, giving zero cost memory allocation. In more practical applications, variable allocations will need to be tracked, but there are still tricks for grouping allocations based on access patterns, avoiding fragmentation. Most research has been on runtime allocation optimization, but many of these optimizations can be precomputed at compile time. For example:
+
+* A loop that allocates and deallocates a scratch buffer in the body is much more performant if the buffer is allocated to the same location every time - the allocation/deallocation code can even be pulled out of the loop.
+* Grouping hot variables into a page, so the page is always loaded and ready
+* Grouping things that will be freed together (pools/arenas)
+
+Optimizing access
+-----------------
+
+Generally, optimizations are allowed to eliminate possibilities allowed by the memory model, but there could also be an option to strictly preserve the set of possibilities.
+
+Eliminating a pointer read amounts to tracking down the matching pointer write and propagating the value directly, which can be accomplished by tracing control flow. There is the issue of data races with concurrent writes, but the memory model dictates which values a read may resolve to, and the verifier already handles nondeterminism, so it is not much harder than normal value propagation. There is also modeling foreign code, specifically determining whether the foreign code can write a pointer (i.e, whether the pointer is shared or not).
+
+Eliminating a pointer write requires proving that the address is never read before deallocation or another pointer write. Again there are the issues of data races and foreign code.
+
+CHERI
+-----
+
+CHERI pointers are 129-bit, consisting of a 1-bit validity tag, bounds, permissions, object type, and actual pointer. Valid pointers may only be materialized in a register or memory by transforming an initial unbounded pointer obtained from the OS. This means that the simple model of pointers as integers is no longer valid. Instead, a pointer is the combination of an integer address and a capability. The `CHERI C/C++ API <https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-947.pdf>`__ represents the address+capability value as ``void*`` and addresses as ``vaddr_t``; there doesn't seem to be a way to refer to a capability without an address.
+
+I tried to read further, but the model is complicated, essentially implementing a GC to avoid dangling pointers, so I am not sure it will ever become mainstream.
+
+Persistent memory
+-----------------
+
+The pointer API, assembly wrapping, and OS calls cover using persistent memory via standard file APIs or memory-mapped DAX. Memory is volatile while persistent memory is not, so persistent memory is faster storage, not weird RAM. And storage is complex enough that it seems best handled by libraries. Making the memory management system memkind-aware seems possible, like memory bound to NUMA nodes.
+
+References
+==========
+
+5B. Each variable must be declared explicitly. Variables may be of any type. The type of each variable must be specified as part of its declaration and must be determinable during translation. [Note, "variable" throughout this document refers not only to simple variables but also to composite variables and to components of arrays and records.]
+5E. There shall be no default initial-values for variables.
+5F. Assignment and an implicit value access operation shall be automatically defined for each variable.
+9C. It shall be.possible to mark variables that are shared among parallel processes. An unmarked variable that is assigned on one path and used on another shall cause a warning.
+
+A reference is a symbolic index into a global associative array of objects, ``Map Reference Object``. The array allows allocating new references, deleting them, and reading/writing the reference. Reference symbols can be compared for equality, hashed to an integer, and packed/unpacked to/from an integer.
+
+The packing and hashing requires a little explanation. Packing the same reference always returns the same value during a program execution, and the packed value is distinct from the packed value of any other reference. But the exact value is internal to the memory system - it is an "adversarial" model similar to pointers where if the program's behavior depends on the choice of packed value it is incorrect. The hashing is similar to packing, it is again the same value for the same reference, it is just that there is no distinctiveness constraint (so the program must have the same behavior even if all references hash to 0), and also no way to unhash the value, so there is no need to worry about resolving unpack invocations.
+
+There are higher-level types like immutable references and reference wrappers, but those all translate away to normal references or pointer access and don't need involvement from the compiler. Per :cite:`ichbiahRationaleDesignADA1979` we should provide a "freeze" operation which recursively removes all reference indirections and turns a reference-containing value into a truly immutable/constant object, as this is "the most useful and should be retained as the unique meaning of constancy".
+
+Pointer conversion
+------------------
+
+The location of the data of a reference is not fixed. If it's small enough it could just be in a register, or there could be multiple copies of the data in memory. Also GC can move/copy the reference. The data could be produced on-demand and be represented by a thunk. All that can really be said is that the compiler will respect the semantics of storing and retrieving data.
+
+Foreign operations like OS calls require a pointer to a memory address, because references don't necessarily exist in memory. The canonical way of doing this is simply reading the reference value and storing it in a buffer represented by a pointer ("materializing" it in memory). Internally, when compiling away the reference, the compiler tries to find a good way to store the reference - if it's lucky, it can backpropagate the pointer request and store the data there from the beginning, so that the "read and store" operation is actually a no-op that makes zero copies.
+
+But, in the fallback case of storing a few words, where a memory allocation is appropriate, the reference translates directly to a pointer allocation. The memory is configured to trap on stray user-level access, so that only the compiler-generated code has access. Even in this case, though, the reference's internal value is not the pointer itself, rather there is a more complex strategy of using a "handle" identifier that allows moving the data around after it is allocated.
+
+Destructors are inspired by C++ RAII destructors, hence the name. Admittedly the actual API doesn't bear much resemblance. `Finalizers <https://en.wikipedia.org/wiki/Finalizer>`__ can resurrect objects and don't have deterministic execution, hence would be a bad name. Go's defer statement and try-finally are related, but they only work locally and have imprecise execution semantics.
+
+Portable mmap:
+* Yu virtualalloc https://github.com/alpha123/yu/tree/master/src/platform
+* Go: https://github.com/edsrzf/mmap-go
+* C: mmap on windows https://github.com/alitrack/mman-win32
+* C++: https://github.com/mandreyel/mio
+* Rust: https://github.com/RazrFalcon/memmap2-rs
+
+Representation
+==============
+
+11A. The language shall permit but not require programs to specify a single physical representation for the elements of a type. These specifications shall be separate from the logical descriptions. Physical representation shall include object representation of enumeration elements, order of fields, width of fields, presence of "don't care" fields, positions of word boundaries, and object machine addresses. In particular, the facility shall be sufficient to specify the physical representation of any record whose format is determined by considerations that are entirely external to the program, translator, and language. The language and its translators shall not guarantee any particular choice for those aspects of physical representation that are unspecified by the program. It shall be possible to specify the association of physical resources (e.g., interrupts) to program elements (e.g., exceptions or signals).
+
+A lot of languages have a fixed or default memory representation for values, e.g. a C struct, a Haskell ADT, and a Python object are always laid out in pretty much the same way. The more systems-level languages allow controlling the layout with flags, for example Rust has `type layout <https://doc.rust-lang.org/reference/type-layout.html>`__ and also C compatibility. Layout is then defined by its size, alignment, padding/stride, and field offsets. Now it's great to have a compact representation of the memory layout - but only if you can actually write the memory layout you want using these features. But these flags are't really that powerful. Here's some examples of what can't generally be done with the current memory DSL's:
+
+* specify the in-memory order of fields differently from their logical order
+* specifying how to encode enumeration constants (per struct it appears in)
+* turn array-of-structs into struct-of-arrays
+* flattening a datatype, like ``Either Bool Int`` into ``(Bool,Int)``, or representing a linked list as a contiguous series of records.
+* storing some parts via pointer indirections (non-contiguous memory layout)
+* NaN-boxing and NuN-boxing (`ref <https://wingolog.org/archives/2011/05/18/value-representation-in-javascript-implementations>`__ `2 <https://searchfox.org/mozilla-central/source/js/public/Value.h#526>`__), representing the JS ``Any`` type as a single 64-bit word.
+* parsing network packets into structured data
+
+Maybe some of these could be addressed by flags, but from the last two, it is clear that we are really looking for a general-purpose memory serialization interface. I looked at `Data.Binary <https://hackage.haskell.org/package/binary-0.8.9.1/docs/src/Data.Binary.Get.Internal.html#Decoder>`__, `store <https://github.com/mgsloan/store/blob/master/store-core/src/Data/Store/Core.hs>`__, and :cite:`delawareNarcissusCorrectbyconstructionDerivation2019`. Narcissus is too complex IMO:
+
+::
+
+  Format = Set (S, St, T, St)
+  Encode = S -> St -> Option (T, St)
+  Decode = T -> St -> Option (S, St)
+
+The state parameter can be gotten rid of by defining ``S = (S,St), T = (T,St)``:
+
+::
+
+  Format = Set (S, T)
+  Encode = S -> Option T
+  Decode = T -> Option S
+
+And we can make encode/decode total by defining ``S = {s | exists t. (s,t) in Format}``, ``T = {t | exists s. (s,t) in Format}``.
+
+I thought about letting ``pack`` narrow the range of values, e.g. rounding 1.23 to 1.2, but concluded that it would be surprising if storing a value to memory changed it. The rounding can be defined as a pre-pass over the data to convert it to a ``Measurement`` type that then has optimized storage.
+
+One tricky part is that the naive way to specify types interferes with overloading, subtyping and implicit conversions. ``pack (Int8 1)`` can give a byte as expected, but it can also implicitly convert to an ``Int32`` and give 4 bytes. Since we have dependent types this isn't a real issue, just make sure the code generated after representation specialization passes the type explicitly: ``pack Int32 (Int8 1)``.
+
+A few things need to optimize away for reasonable performance.  ``length . pack`` should optimize to something like ``const 20`` for most values, or at least something that doesn't allocate, so that field accesses are independent and values can be allocated sanely. These functions might have to be hacked in, specializing to constant-sized values.
+
+Since writing these serialization functions all the time would be tedious, we can make a format DSL that specifies the functions in a nicer way. Although one of these DSL's will be the standard / default, it'll be some kind of macro / constraint system, so defining new format DSLs for specific purposes shouldn't be hard.
+
+The translation to use pack is pretty simple: every value is wrapped in a call to pack, the result is stored as a tuple ``(cell,unpack)``, and every usage applies unpack to the cell. The translation uses whatever pack is in scope; pack can be overridden like any other implicit parameters. The unpack functions will end up getting passed around a lot, but function pointers are cheap constants, and constant propagation is a thing, so it shouldn't be an issue.
+
+So finally the most general API is ``Write = Alloc (Size,Align) (Addr -> Write) | Store, Store = Map Addr MaskedWord`` and ``Unpack a = Maybe Addr -> Read -> a, Read = Map Addr Word``. This allows masked writes and multiple or fixed allocation addresses, but does not allow failing to read the value back. Also the ``pack`` function allows passing arbitrary side-band data to the ``unpack`` function. Maybe though, it is still not general enough, we should just have lens-like functions like ``write : Memory -> a -> Memory`` and ``read :: Memory -> a``. There still need to be constraints though, like that you get back what you wrote and non-interference of writes.
+
+Now we also want to allow optimization of the memory representation. Consider some data points - if there is only one possible value, then the compiler should optimize this to a constant and not store it at all. If there are two possible values, the compiler should probably use a boolean flag and again hard-code the values as constants. If the potential values include all values of a given type (and nothing else), then the compiler should use the representation for that type. If the potential values include a given type, and also members of another type, then the compiler should use the most narrowly-defined representation that contains both of those types. And it should consider whether it can choose the representation of the union type so as to minimize the amount of conversion needed for the more commonly used type (as in NaN/NuN-boxing). If the potential values can be anything, then the compiler should use the universal representation.
+
+The process of fixing the memory representation of a program can be modeled as follows. We start with a program that passes around values. Then we insert conversion operations: on every declaration, we insert a conversion to binary, and on every use, we insert a conversion from binary. As the binary representation is defined so that a read of a write is is the identity, this transformation does not change the meaning of the program. Then we additionally write this binary representation to memory on the declaration, and read this binary representation from memory on use. Again this does not change the semantics due to the non-interference of writes property. Although, in reality it could change the semantics: maybe a cosmic ray or something could change what we have written. But at this point, our program operates purely on memory and does not have any values floating around.
+
+https://github.com/ollef/sixten talks about being able to represent intrusive lists. I experimented with allowing the decision of pointer vs direct storage to be made in pack, but it really simplifies the code a lot to require all pack functions to produce flat blobs of data.
diff --git a/docs/Commentary/Language/Objects.rst b/docs/Commentary/Language/Objects.rst
index a813fc8..af9db74 100644
--- a/docs/Commentary/Language/Objects.rst
+++ b/docs/Commentary/Language/Objects.rst
@@ -54,8 +54,8 @@ Briefly looking at the other definitions on C2:
 * Chris Date "An object is essentially just a value (if immutable) or a variable (otherwise)." - certainly a good property, but I think the dictionary definitions capture much more of the intuition
 * Binding Behavior to References - unless "binding behavior" means dictionaries, it's missing a key component of OO, the field and method names
 
-Objects
-=======
+Definition of object
+====================
 
 For the first part of OO, we must define objects, and explain how they can contain properties and methods. For the most part I agree with `ObjectsAreDictionaries <https://wiki.c2.com/?ObjectsAreDictionaries>`__, i.e. objects are a mapping from strings to values. But I want one fewer concept in the core language so I will instead define objects to be modules, in the ML sense: a module is a list of definitions, and these definitions collectively define a mapping from expressions to values. The expression evaluated in the context of an object/module will usually be an identifier, corresponding to a dictionary lookup, but this definition lets us define values for function calls and other bits of syntax as well, so we can write DSLs more easily.
 
@@ -102,7 +102,7 @@ ML modules, because only ML modules provide sophisticated sharing mechanisms tha
 
 Cook goes on to state that "any programming model that allows inspection of the representation of more than one abstraction at a time is not object-oriented." So by his definition C++ and Java are not object-oriented - bleh. In fact this is just a limitation of ML - ML cannot inspect/pattern match on functions; they are opaque. In Stroscot, it is possible to match on the lambdas in Cook's Figure 8 and determine if an ISet was constructed via the Empty, Insert, or Union implementations. We might as well have written ``data ISet = Empty | Insert int ISet | Union ISet ISet`` as in the ADT implementation, except that the lambda presentation is an open data type that allows adding more cases. In Stroscot, we use multimethods to solve the expression problem, so it is just defining symbols and adding more dispatch cases to the relevant multimethods.
 
-::
+.. code-block:: none
 
   interface ISet = {
     isEmpty : bool,
@@ -206,7 +206,7 @@ More generally, all combinations of subtyping and inheritance are possible:
 * S is not a subtype but is a child type of T - S child of T, S -> S is not a subtype of T -> T
 * S is both a subtype and a child type of T - when all inherited fields and methods of the derived type have types which are subtypes of the corresponding fields and methods from the inherited type, and the type is an "open record"
 
-Note that subtype + derived type is only possible with open records - with closed records no derived type is a proper subtype. :cite:`abdelgawadNOOPDomainTheoreticModel2018` formalizes this notion of open records and shows that in Java and other nominally-typed OOP languages, "inheritance is subtyping". More specifically, "a class B is a subtype
+Note that subtype + derived type is only possible with open records - with closed records no derived type is a proper subtype. :cite:`abdelgawadNOOPDomaintheoreticModel2018` formalizes this notion of open records and shows that in Java and other nominally-typed OOP languages, "inheritance is subtyping". More specifically, "a class B is a subtype
 of a class A, in the open record sense, iff B inherits from A." But this property is obtained by placing restrictions on inheritance - in Java, a method only overrides its parent method if its type matches the parent method, and methods cannot be removed. :cite:`taivalsaariNotionInheritance1996` calls this "strict inheritance". Strict inheritance is a pretty weird restriction from a unityped perspective - for example in Smalltalk we can override a field and change its value from an int to a string. So this "inheritance is subtyping" property is a form of type discipline, rather than a free property.
 
 Inheritance-as-subtyping is easy to misuse and the Java platform libraries made numerous mistakes: Stack extends Vector, Properties extends Hashtable - in both cases, not using inheritance and thus avoiding the accompanying field/property inclusion would have been preferable. For example, with Properties (`1 <https://codeblog.jonskeet.uk/2006/03/04/inheritancetax/>`__), ``(Properties) p.getProperty(key)`` takes defaults into account, while ``p.get(key)`` which is inherited from Hashtable does not, and direct access to the underlying Hashtable allows adding non-String objects, when the designers intended that Properties should only be Strings. Once this invariant is violated, it is no longer possible to use other parts of the Properties API (load and store). Without inheritance-as-subtyping, ``get`` could have been overridden to be a subtype, and the other Hashtable methods deleted.
diff --git a/docs/Commentary/Language/Sets.rst b/docs/Commentary/Language/Sets.rst
index b023045..9119001 100644
--- a/docs/Commentary/Language/Sets.rst
+++ b/docs/Commentary/Language/Sets.rst
@@ -1,13 +1,13 @@
 Sets
 ####
 
-Stroscot allows specifying properties about execution, which the compiler then attempts to prove or at least fuzz (see :ref:`Verification <Verification>`). The most common form of property is membership in a set of values, described here. Constraining the set of values enables many useful optimizations to be performed.
-
-Sets
-====
+Stroscot allows specifying properties about execution, which the compiler then attempts to prove or at least fuzz (see :ref:`Commentary/Language/Verification:Verification`). The most common form of property is membership in a set of values, described here. Constraining the set of values enables many useful optimizations to be performed.
 
 Steelman 3C "It shall be possible to define new data types in programs. A type may be defined as an enumeration, an array or record type, an indirect type, an existing type, or a subtype of an existing type. It shall be possible to process type definitions entirely during translation. An identifier may be associated with each type. No restriction shall be imposed on user defined types unless it is imposed on all types."
 
+Definition
+==========
+
 Sets in Stroscots are defined by a predicate ``a isElementOf S : Value -> Set -> Bool``, where ``Set = { s : Any | forall a : Any. (a isElemof s : Bool) }``. The predicate can describe any side effect free computation, so a set can describe any boolean function. As described in :ref:`paradoxes` Stroscot avoids the set-theoretic paradoxes by requiring every set definition to be well-founded.
 
 Sets don't contain null by default, you have to add it to the predicate as an allowed value.
diff --git a/docs/Commentary/Language/State.rst b/docs/Commentary/Language/State.rst
index 7cb3ccd..94146c5 100644
--- a/docs/Commentary/Language/State.rst
+++ b/docs/Commentary/Language/State.rst
@@ -370,14 +370,14 @@ Generally it seems that Yoneda solves a different problem than an I/O monad.
 Algebraic effects
 -----------------
 
-Codensity and algebraic effects are quite similar, both using a data type to represent operations. In fact the two are macro-expressively equivalent. :cite:`forsterExpressivePowerUserDefined2017` But Codensity doesn't require new syntax unlike the handler functionality. In the effect approach, computations are not first-class values.
+Codensity and algebraic effects are quite similar, both using a data type to represent operations. In fact the two are macro-expressively equivalent. :cite:`forsterExpressivePowerUserdefined2017` But Codensity doesn't require new syntax unlike the handler functionality. In the effect approach, computations are not first-class values.
 
 OTOH effect types are quite useful, because you can define code that is polymorphic over the effect type, hence can be used as both pure and impure code. They use a monadic translation and then pure code is the identity monad. This can be shoehorned into continuations too by using a symbol marker with cases for pure and impure but maybe it is not as nice.
 
 Call by push value
 ------------------
 
-CBPV has "values" and "computations". The original presentation has these as separate categories, but :cite:`eggerEnrichedEffectCalculus2014` presents an alternative calculus EC+ where every computation is also a value. There is exactly one primitive that sequences computation, ``M to x. N``, which acts like the monadic bind ``M >>= \x -> N``, and similarly there is ``return``. And the evaluation is CBV. So stripping away the thunk stuff it seems to be a disguised version of monads. And the thunk stuff is a rather fragile way to implement CBN - it doesn't generalize to call by need. :cite:`mcdermottExtendedCallbyPushValueReasoning2019` And then there is jump-with-argument (JWA) which uses continuations and is equivalent to CBPV.
+CBPV has "values" and "computations". The original presentation has these as separate categories, but :cite:`eggerEnrichedEffectCalculus2014` presents an alternative calculus EC+ where every computation is also a value. There is exactly one primitive that sequences computation, ``M to x. N``, which acts like the monadic bind ``M >>= \x -> N``, and similarly there is ``return``. And the evaluation is CBV. So stripping away the thunk stuff it seems to be a disguised version of monads. And the thunk stuff is a rather fragile way to implement CBN - it doesn't generalize to call by need. :cite:`mcdermottExtendedCallbypushvalueReasoning2019` And then there is jump-with-argument (JWA) which uses continuations and is equivalent to CBPV.
 
 Applicative
 -----------
@@ -565,7 +565,7 @@ It has been established that PURE ⊆ CYCLE ⊊ HASK ⊆ IMPURE as far as expres
 
 * For the third relation, the thunk mechanism of HASK can be emulated in IMPURE, showing inclusion. :cite:`ben-amramNotesPippengerComparison1996` theorizes that for IMPURE programs following a read-update-write structure, there is a correspondingly efficient HASK program. Since Haskell 1.0 programs use a lazy stream ``[Response] -> [Request]`` for I/O, this read-update-write model seems to encompass all programs, hence it seems likely that the two languages are of equal efficiency, although nobody has formally proved this (TODO).
 
-The log(n) gap between CYCLE and HASK is calculated using the cost of updating a balanced binary tree. This is the cost of the predecessor problem in the `pointer machine <https://en.wikipedia.org/wiki/Pointer_machine>`__. In the more accurate RAM model the update cost is optimally O(log log m) time under some assumptions. (:cite:`strakaFunctionalDataStuctures2013`, chapter 5) Straka's implementation uses vEB trees which have a huge constant factor and space usage, but y-fast trees probably work too for a practical implementation.
+The log(n) gap between CYCLE and HASK is calculated using the cost of updating a balanced binary tree. This is the cost of the predecessor problem in the `pointer machine <https://en.wikipedia.org/wiki/Pointer_machine>`__. In the more accurate RAM model the update cost is optimally O(log log m) time under some assumptions. (:cite:`strakaFunctionalDataStructures2013`, chapter 5) Straka's implementation uses vEB trees which have a huge constant factor and space usage, but y-fast trees probably work too for a practical implementation.
 
 Still though, a gap is a gap, so to get performance we must provide laziness or destructive update. And programming efficient amortized pure lazy data structures is quite complex, and not well-studied. It seems that any practical programming language will have to provide destructive update.
 
diff --git a/docs/Commentary/Language/TermRewriting.rst b/docs/Commentary/Language/TermRewriting.rst
index 931089e..94f6346 100644
--- a/docs/Commentary/Language/TermRewriting.rst
+++ b/docs/Commentary/Language/TermRewriting.rst
@@ -4,7 +4,7 @@ Term rewriting
 Higher-order rewriting
 ======================
 
-The definition is vaguely based on :cite:`vanoostromConfluenceAbstractHigherOrder1994`. But Ooostrom's HORS definition, like Terese, uses closed terms and no separate substitution, avoiding free variables by adding additional binders, e.g. he writes the rule ``f x = 1`` as ``\x. f x -> \x. 1``. I don't like this because it's unclear how it interacts with currying, eta reduction, and conditional rewriting, so I added the substitution back.
+The definition is vaguely based on :cite:`vanoostromConfluenceAbstractHigherorder1994`. But Ooostrom's HORS definition, like Terese, uses closed terms and no separate substitution, avoiding free variables by adding additional binders, e.g. he writes the rule ``f x = 1`` as ``\x. f x -> \x. 1``. I don't like this because it's unclear how it interacts with currying, eta reduction, and conditional rewriting, so I added the substitution back.
 
 There is some question of whether context substitution is capture-avoiding, i.e. does ``(\x. □)[□ := x]`` not resolve to ``(\x. x)``. Terese says it captures. With Oostrom this substitution is forbidden since ``x`` is not a closed term. In our more liberal definition this resolves to ``(\y. x)`` by alpha-renaming the variable.
 
@@ -85,18 +85,18 @@ This set, like the real numbers, is uncountably large and includes terms with no
 There are various extensions of the transitive closure to infinitary reduction, so the question arises as to which one to use. :cite:`kahrsInfinitaryRewritingClosure2013` discusses several and provides an ordering so that each is mostly a proper subset of the next (not sure about P* subset bi-infinite). Many of these use the monotonic closure operator \*. Specifically X* is the least fixedpoint of the function G defined as G(R) = X(R) union R, which by the (transfinite) Kleene fixed-point theorem exists and is the limit/union of the sequence :math:`X^0 = \emptyset, X^{n+1} = G(X^n), X^\delta = \bigcup_{\alpha < \delta} X^\alpha`.
 
 * S*, the monotonic closure of strongly converging reduction sequences, "strong" being a requirement that the depth of the redexes contracted in successive steps tends to infinity. S=S* for "compatible" TRSs, ones where t R u imply C[t] R C[u] for any context C, which all iTRSs satisfy.
-* W*=A=A*, the monotonic closure of weakly converging reduction sequences, and also the `adherent points <https://en.wikipedia.org/wiki/Adherent_point>`__ of reduction sequences in the metric space. Weak convergence by itself is not transitively closed, e.g. ``a = b; f x a = f (g x) a`` has ``f c a -ω> f (g (g (g ...))) a -> f (g (g (g ...))) b`` :cite:`dershowitzRwriteRewriteRewrite1991` :cite:`simonsenWeakConvergenceUniform2010`, hence the need for closure. By definition of adherent point, each w-reduct is either an accumulation point, i.e. a appears arbitrarily close infinitely often in a reduction sequence, or an isolated point which can be reached in a finite number of reductions.
+* W*=A=A*, the monotonic closure of weakly converging reduction sequences, and also the `adherent points <https://en.wikipedia.org/wiki/Adherent_point>`__ of reduction sequences in the metric space. Weak convergence by itself is not transitively closed, e.g. ``a = b; f x a = f (g x) a`` has ``f c a -ω> f (g (g (g ...))) a -> f (g (g (g ...))) b`` :cite:`dershowitzRewriteRewriteRewrite1991` :cite:`simonsenWeakConvergenceUniform2010`, hence the need for closure. By definition of adherent point, each w-reduct is either an accumulation point, i.e. a appears arbitrarily close infinitely often in a reduction sequence, or an isolated point which can be reached in a finite number of reductions.
 * P*: the monotonic closure of the pointwise closure of the reflexive transitive closure (finite multi-step relation).
 * bi-infinite rewriting, defined in :cite:`endrullisCoinductiveFoundationsInfinitary2018` Section 6.2 as the greatest relation R such that R = the reflexive transitive closure of single-step rewriting union R lifted to apply to subterms.
 * T*: the monotonic closure of T, the topological closure of the reflexive transitive closure. T itself is not transitively closed, e.g. ``leq 0 x = true; leq (s x) (s y) = leq x y; inf = s inf`` has ``leq inf inf T leq (mu x. s x) (mu y. s y) T true`` (by topological closure of finite approximations of the S towers) but not ``leq inf inf T true`` (because the terms are of finite depth). Alternatively I have defined T* as the smallest relation M such that M is reflexively, transitively, and topologically closed and contains the single-step relation, which I think is equivalent.
 
 S* is the standard in the literature but doesn't have much going for it besides that. If there is a reduction that switches heads, ``a X = b (c X); b X = a (c X)``, then S* says there are no w-reductions. W* has ``a e -w> a (mu x. c x)`` and ``a e -w> b (mu x. c x)``. TRSs are in general nondeterministic, so the "strongly converging" definition that requires a single limit to exist is too strong.
 
-For cycle condensation we would like to equate as many terms as possible to get large SCCs, and similarly a large reduction relation means there will be an escape from infinite regresses. As an example, with bi-infinite rewriting or T*, the hypercollapsing term ``mu x. C x`` with rule ``C x = x`` will reduce to every term (limit of approximations ``C^n f = f``), making it ambiguous, while with W* and P* the hypercollapsing term only reduces to itself hence is a condensed normal form. Similarly with ``C A = A`` where ``A`` is a constant, ``mu x. C x = A`` with bi-infinite/T* but W*/P* don't reduce at all. Bi-infinite and T* seem equally simple to formalize since they are both single fixed points, so it seems T* wins because it's larger.
+Hypercollapsing terms are a good question. With bi-infinite rewriting or T*, the hypercollapsing term ``mu x. C x`` with rule ``C x = x`` will reduce to every term (limit of approximations ``C^n f = f``), making it ambiguous, while with W* and P* the hypercollapsing term only reduces to itself hence is a condensed normal form. In contrast, with ``C A = A`` where ``A`` is a constant, ``mu x. C x`` reduces to ``A`` with bi-infinite/T* but W*/P* don't reduce at all. More generally, if we have a predicate ``P`, then ``mu x. C x`` with ``C x | P x = x`` reduces to all terms where ``P`` holds under bi-infinite / T* but does not reduce under W* / P*. The other alternative is to reduce Hypercollapsing terms to a ``Meaningless`` exception. I think the behavior of ``T*`` is the most useful - most likely it will give an error due to ambiguity, but there are cases where it can produce usable values. In contrast W* / P* / meaningless all sort of give up on the reduction of hypercollapsing terms.
 
 Also conditional rewriting can interact with infinite reduction and cause unwanted behavior with a weak closure. For example consider the system ``ds x y | x == y = e`` and reducing the infinite term ``G = ds G G`` (in :cite:`klopCombinatoryReductionSystems1980` this is achieved by the system ``G = a = c a; c x = ds x (c x)``). Since ``e`` is a normal form hence equal to itself, all finite terms defined by ``T = { x : x == e or x in ds T T }`` reduce to ``e``. So using a bi-infinite closure, ``G`` uniquely reduces to ``e``. But with a weak closure ``X = ds e X`` is a normal form and the system becomes nondeterministic. Similarly with ``dk x y | x == y = e x`` and ``G = dk G G``, we should get ``e (e (e ...))`` as the unique result, but with a weak closure we don't. Another tricky system is ``c x | x == c x = e; b = c b`` - the obvious reduction is ``b = mu x. c x = e``, but this system has a hidden circularity of the form ``mu x. c x = e`` if ``mu x. c x = e``. So again based on this we would like a bi-infinite or T* closure.
 
-Overall I think T* is the simplest and easiest to understand, so that's what I picked.
+Overall, from these examples, it seems clear that allowing a reduction is better than forbidding it. Cycle condensation means that we would like to equate as many terms as possible to get large SCCs, and similarly a large reduction relation means there will be an escape from infinite regresses. Bi-infinite and T* seem equally simple to formalize since they are both single fixed points, so it seems T* wins because it's larger.
 
 Meaningless terms
 =================
@@ -129,6 +129,13 @@ A meaningless term set forms an easy set, :cite:`bucciarelliGraphEasySets2016` m
 
 With these reductions every term has a normal form. Proof :cite:`kennawayInfinitaryLambdaCalculus1997`: A term t is either meaningless or not (ignoring reductions to ``Meaningless``). If it is meaningless, it reduces to the normal form ``Meaningless``. If it is not, then it can be reduced to a root-stable term ``s``. Repeating the construction recursively on the subterms of s at depth 1 constructs a reduction of t to a term which is stable at every depth, i.e. a normal form.
 
+It is a bit tricky to come up with an example of a meaningless term, as the cycle condensation and infinitary rewriting make a lot of examples meaningful. For example, :cite:`klopInfinitaryNormalization2005` gives ``A(1)`` with the reduction rule ``A(x) = A(B(x)``. Without infinitary rewriting, the limit would not be in the reduction closure, therefore the reduction closure would consist only of partial reducts, each having a top-level redex, and ``A(1)`` would be root-active and meaningless. Similarly, without cycle condensation, there would be the reduction ``A(B(B(...))) -> A(B(B(...)))``, so the limit would be root-active and meaningless. But in our semantics, the limit ``A(B(B(...)))`` exists, and the cycle is condensed, therefore it is a normal form and ``A(1)`` is not meaningless. Similarly in :cite:`kennawayMeaninglessTermsRewriting1999` there are some examples:
+
+* ``Last(Ones)`` with the rules ``Last(Cons(x,y)) = Last(y)`` and ``Ones=Cons(1,Ones)``. This rewrites to ``Last([1,1,...])`` which then rewrites to every term following the reduction ``Last([1,1,...,x])=x``. So it is ambiguous, but not meaningless.
+* ``A`` with ``A = B A; B x = x``. Again this rewrites to ``B (B (...))`` which rewrites to every term and is not meaningless.
+* ``fix identity`` (``Y I``), where ``fix f = f (fix f); identity x = x``  - This rewrites to terms of the form ``I I ... (fix I)``, which again rewrites to every term.
+
+To be meaningless in our system, a term cannot be root-stable, and to avoid cycle condensation, it must cycle through an infinite non-repeating set of roots. So for example, ``1`` in a system like ``1 = 2; 2 = 3; 3 = 4; ...`` is meaningless; it is not affected by cycle condensation and has no limit points. But note how fragile this is; for example ``1`` in the system ``x=x+1`` reduces to the limit ``((...+1)+1)+1`` which most likely is not meaningless.
 
 Every TRS with unique normal forms (UN=) can be extended to a confluent TRS with the same set of normal forms by adding bottom terms and reductions to normal forms and bottoms that preserve the equivalence classes of terms. :cite:`middeldorpModularAspectsProperties1989` Meaningless terms don't accomplish this extension because a term ``1 amb meaningless`` can reduce to ``Meaningless`` instead of ``1`` hence breaking even UNR.
 
@@ -165,121 +172,3 @@ Terese's example 4.11.5 that join equality is not confluent does not work becaus
 Overall strict equality is the most conservative (least accepting), and the one whose behavior seems easiest to understand. It does reduce the laziness of the language a bit but even Haskell's ``==`` function is strict. So we'll go with strict equality.
 
 There is some question about reducible expressions as patterns, e.g. ``a = b; f a@x x = x``. I think this can be handled separately from non-linear patterns.
-
-Confluence
-----------
-
-Confluence has gotten a lot of attention as well and has automated provers. Confluence implies UN→; it is equivalent if the TRS is weakly normalizing. And there is an extension theorem:  Similarly a system can be shown to be UN= by presenting an extension of it that is confluent. :cite:`klopExtendedTermRewriting1991` So a UN= program is just a partially specified system. UN→ is a little more complex though. And the equivalence classes of terms are uncomputable in general so the extension is as well.
-
-Confluence avoids situations where a system may branch into two distinct diverging states. It makes finding a normalizing strategy much easier as the strategy only has to avoid getting stuck evaluating a term infinitely (using the same rule infinitely often), as opposed to UN→ where the strategy must avoid using the wrong reduction rule at every step.
-
-The Knuth-Bendix algorithm produces a confluent system from a set of non-oriented equations, but the rules in programs are oriented, so using this would be confusing. Not to mention that the algorithm fails often. So that's out.
-
-A necessary condition for confluence is weak/local confluence, i.e. each critical pair is convergent. But this is not sufficient. Newman's lemma is that a terminating locally confluent TRS is confluent. But termination is quite strong. A generalization is a critical pair system :cite:`hirokawaDecreasingDiagramsRelative2009` (also called decreasingly confluent): the system must be left-linear, locally confluent, and its critical pair steps must be *relatively terminating*, i.e. the relation 'arbitrary steps followed by a critical pair step followed by arbitrary steps' is terminating. Trivial critical pair steps can be excluded, hence this includes weakly orthogonal TRSs. For a terminating TRS the TRS syntactic equality notion is equivalent to strict equality, hence the system is left linear in the CTRS sense, hence why this includes Newman's lemma.
-
-We say → has random descent (RD), if for each R:a ↔∗b with b in normal form, all maximal reductions from a have length d(R) and end in b. Systems with random descent are confluent.
-
-Normalization
--------------
-
-
-A hypernormalizing strategy is a strategy that is normalizing even if arbitrary reduction steps are taken before and after steps of the strategy. This allows the compiler to make optimizations without changing the behavior of the program. A hypernormalizing strategy allows aggressive optimizations and program transforms.
-
-There are also stronger properties than normalization. A Church-Rosser strategy is one with common reducts, i.e. there exist m and n, such that :math:`F^m(t)=F^n(u)` for every t and u equal via forward/backward evaluation. A normalizing strategy is Church-Rosser if the system is confluent and weakly normalizing (i.e. all objects have a normal form). In general a many-step CR strategy exists for effective ARS's, i.e. countable (in a computable fashion) and with a computable reduction relation. But the strategy is quite hard to compute, as it has to synchronize reducing subterms so that all components are reduced the same amount. And it's not clear that this synchronization offers anything to the programmer.
-
-Cofinal strategies are weaker than Church-Rosser but stronger than normalizing: for every term a, if a reduces in a finite number of steps to b, then there is an object c obtained by applying the strategy some number of times to a such that b reduces to c. For critical pair TRSs any "fair" strategy that ensures every redex is eventually contracted is cofinal. The cofinal property provides slick proofs - it ensures every redex not part of a cycle is contracted. But at runtime non-normalizing terms have indistinguishable behavior (infinite loop), hence this means the cofinal strategy is doing unnecessary work.
-
-There are also termination properties like strong convergence that ensure that for every term, there exists some number of reduction steps after which the head cannot be rewritten.
-To ensure that term rewriting halts we probably also want a property like strong convergence, but this is a property of the rewriting strategy, not the TRS proper.
-
-Evaluation strategy
-===================
-
-For convergent (confluent and strongly normalizing) programs, such as the simply typed lambda calculus, all strategies are normalizing and the result is the same no matter how they are reduced. So the focus is on inferring convergence and doing reduction efficiently. "In the small" leftmost innermost ensures "complete development", i.e. a subterm is reduced completely before the outer term, hence we can compute the subterm fully and only store an optimized representation of the normal form. So we can compile to fast assembly like a state machine. "In the large" optimal reduction ensures the smallest number of steps so we can avoid duplicating work and performing unneeded work.
-
-But strongly normalizing implies not Turing complete, hence the termination verification will cause problems for complex programs. We need a fallback for these complex programs. Leftmost outermost reduction is the basis of lazy evaluation and is hypernormalizing for the lambda calculus. But for TRSs LO is only normalizing for left-normal TRSs, where variables do not precede function symbols in the left-hand sides of the rewrite rule. A better strategy is outermost fair (ensuring each outermost redex will eventually be evaluated - the simplest example is parallel outermost) - it's hypernormalizing for critical pair TRSs (decreasingly confluent TRSs), in particular weakly orthogonal TRSs. :cite:`hirokawaStrategiesDecreasinglyConfluent2011` So outermost fair seems a reasonable default, but there are non-orthogonal systems where it fails. The optimal reduction stuff is defined for match sequential TRSs but is a normalizing strategy that computes a result in the smallest number of reduction steps.
-
-We could do user-specified strategies like Stratego, but then how would we know that they're normalizing.
-
-There are is also lenient evaluation which evaluates all redexes in parallel except inside the arms of conditionals and inside lambdas, but it adds extra memory overhead for parameter passing.
-
-Now, one can argue about which computational strategy is better (time, space, parallelism, ...)
-Stroscot: be accepting of programs, ensure a normalizing strategy. But after that aim for most efficient in time/space for strict programs.
-
-Q: can normalizing be as efficient as strict
-profiling, other optimization tricks
-
-So The way we handle cycles in the rewrite engine is something like:
-
-* detect cyclic term via rule cycle detection or presence of AC operator
-* use specialized matching (eg AC matching or Tarjan SCC + memo hash table) to identify all reductions out of SCC
-* end with condensed normal form if no reduction out of SCC
-* otherwise, pick a reduction out of the SCC
-
-Then this infinite term is computed in chunks and fed to the surrounding context on demand (laziness), ensuring that a finite normal form is reached if possible and otherwise implementing an infinite stream of commands.
-
-Higher-order matching
----------------------
-
-If the substitution calculus is convergent, then terms can be represented by preterms in normal form.
-
-Handling lambdas in RHSs is fairly straightforward, just beta-reduce as much as possible when they are encountered. But in higher-order term rewriting systems the lambdas can show up on the left hand side, in the pattern. The rewriting system is then defined modulo lambda reduction.
-
-Finding the contexts ``C`` is fairly straightforward, just enumerate all the subterms of ``t``. But solving the equation ``s = lθ`` is an instance of higher-order unification (specifically higher-order matching).  The λ-superposition calculus relies on complete sets of unifiers (CSUs). The
-CSU for s and t, with respect to a set of variables V , denoted by CSUV (s, t), is a
-set of unifiers such that for any unifier % of s and t, there exists a σ ∈ CSUV (s, t)
-and θ such that %(X) = (σ◦θ)(X) for all X ∈ V . The set X is used to distinguish
-between important and auxiliary variables. We can normally leave it implicit
-
-Higher order matching is decidable for the simply typed lambda calculus. But the proof is of the form "the minimal solution is of size at most 2^2^2^2..., the number of 2's proportional to the size of the problem". There are 3 transformations presented in the proof which reduce a larger solution to a smaller solution. These might be usable to prune the search tree. But at the end of the day it's mostly brute-force.
-
-The proof relies on some properties of the STLC, namely normalization and that terms have a defined eta long form (canonical form).
-
-It is not clear if there is a way to do untyped higher order matching for general lambda patterns.
-
-As a consequence of confluence each rewrite step is composed of an expansion in the substitution calculus, a replacement by applying some rule, and a reduction in the substitution calculus, so it is M <<- C[l] and C[r] ->> N
-
-
-If reduction does not end in a condensed normal form, then the sequence of terms must be infinitely expanding in the sense that for every size s there is a point in the reduction where terms are always at least size s. Otherwise, assuming a finite number of term symbols, there are only finitely many terms of size < s, so there would be a cycle in the reduction and reduction would end in a condensed normal form.
-
-A context is linear if every hole occurs exactly once.
-
-Verifying confluence
-====================
-
-We often want to prove confluence. There are some key algorithms:
-
-* The decreasing diagrams technique is a complete method for confluence on countable abstract rewrite systems.
-
-* Computing critical pairs. A non-joinable critical pair means the system is not confluent. If all critical pairs are joinable the system is said to be locally confluent. An orthogonal system is one with no critical pairs, while a weakly orthogonal system is one with critical pairs that are trivially joinable. For an HORS there are more constraints to be orthogonal in addition to no critical pairs ("every set of redexes is pairwise simultaneous"). The substitution calculus must be complete, only needed for gluing, a descendant rewriting system, parametric, have head-defined rules, and be naturally closed under substitution. Parallel rewrite steps must be serializable and left-hand sides of rules must be linear.
-
-  V. van Oostrom. Developing developments. TCS, 175(1):159–181, 1997.
-  V. van Oostrom and F. van Raamsdonk. Weak orthogonality implies confluence: The higher order case. In Proc. 3rd LFCS, volume 813 of LNCS, pages 379–392, 1994.
-
-* Proving termination. The Knuth Bendix Criterion (Newmann's lemma) says a terminating system is confluent iff it is locally confluent. Termination can be shown by exhibiting a well-ordering, such as recursive path ordering, dependency graph decomposition, and the subterm criterion.
-
-  WANDA has more advanced techniques. Cynthia Kop. Higher Order Termination. PhD thesis, Vrije Universiteit, Amsterdam, 2012
-
-  TTT2 also has some good techniques.
-
-  Gramlich–Ohlebusch’s criterion says for innermost-terminating TRSs R with no innermost critical pairs, R is confluent if and only if all critical pairs are joinable by innermost reduction. There are innermost terminating systems that aren't terminating so this criterion can prove some systems that Knuth-Bendix can't.
-
-* Decomposition: Several properties allow dividing the system into smaller, more tractable systems. First is modularity, that the disjoint union of two systems with the property has the property. We also usually have the converse, the disjoint union has the property only if the subsystems have the property.
-
-  * Weak normalization and consistency (w.r.t. equivalence) are modular for first-order systems.
-  * Left linearity, confluence, and unique normal forms (w.r.t. equivalence) are modular for semi-equational CTRSs.
-  * Confluence is modular for join and semi-equational CTRSs. In fact if the disjoint union is confluent then the component systems must be confluent.
-  * Confluence plus left linearity is modular for higher-order TRSs.
-  * Weak termination, weak innermost termination, and strong innermost termination are modular for CTRSs in combination with confluence or the property that there are no extra variables in the conditions.
-  * NF, unique normal forms with respect to reduction, and consistency with respect to reduction are modular in combination with left linearity. Consistency w.r.t. reduction means that there is no term reducing to two distinct variables; it is implied by the unique normal form property w.r.t. reduction as variables are normal forms.
-  * Strong normalization plus consistency w.r.t. reduction plus left linearity is modular. This likely holds for CTRSs without extra variables as well.
-
-  Order-sorted decomposition uses persistence of confluence. If sorts can be assigned to all terms and rule variables such that all rules don't increase the sort, then confluence can be separately considered for each sort and confluence as a whole follows from confluence on well-sorted terms.
-
-  Decreasing diagrams allows decomposing a left-linear TRS into duplicating and non-duplicating rules. The TRS is confluent if all critical peaks are decreasing with respect to a rule labeling and the duplicating rules are terminating relative to the non-terminating rules.
-
-  Layer-preserving decomposition decomposes TRSs into minimal pieces such that taking pieces pairwise they form layer-preserving combinations, i.e. rules in one piece operate only on terms of that piece. It is used in CSI.
-
-
-* J. Nagele, B. Felgenhauer, and A. Middeldorp. Improving automatic confluence analysis of rewrite systems by redundant rules. In Proc. 26th RTA, volume 36 of LIPIcs, pages 257–268, 2015.
-
diff --git a/docs/Commentary/Language/Types.rst b/docs/Commentary/Language/Types.rst
index 79e2773..94b1f08 100644
--- a/docs/Commentary/Language/Types.rst
+++ b/docs/Commentary/Language/Types.rst
@@ -109,7 +109,7 @@ Type inference is often used with the idea that its failure means there is a typ
 
 Type inference means many signatures can be omitted, like unityping with implicitly assigning the universal type. But type-inference algorithms are complex- they can fail, and even if they succeed their results are not obvious to humans. Unityping means the semantics doesn't depend on types at all, meaning one less thing to think about, hence making programming easier. Type inference allows writing some programs without thinking about types, but there is always the chance the program is untypeable - and there are many examples of untypeable programs, e.g. ``\z. (z 1, z "x")`` for H-M. Cliff Click's system can type this but fails on `a more complex program <https://github.com/cliffclick/aa/issues/28>`__ that runs fine in a unityped system. The errors on these untypeable programs will always be verbose (because of the inferred type) and confusing (because the programmer was not thinking about the inference algorithm).
 
-Types can used to describe the ABI, :cite:`jonesTypesAreCalling2009` hence type inference is a form of ABI selection. But the ABI selection is based on performance. Furthermore the ABI types can be conditioned on state, and there is a fallback catch-all format for hard cases. So overall ABI selection uses a separate notion of type based on representation, with no principality requirement like for normal type inference.
+Types can used to describe the ABI, :cite:`bolingbrokeTypesAreCalling2009` hence type inference is a form of ABI selection. But the ABI selection is based on performance. Furthermore the ABI types can be conditioned on state, and there is a fallback catch-all format for hard cases. So overall ABI selection uses a separate notion of type based on representation, with no principality requirement like for normal type inference.
 
 At the REPL systems such Haskell provide a command to display the inferred type of an expression, and similarly Haddock can show pretty-printed inferred type signatures. But this doesn't extend well to complex type systems:
 
diff --git a/docs/Commentary/Language/Values.rst b/docs/Commentary/Language/Values.rst
index 8b74a5c..f38a143 100644
--- a/docs/Commentary/Language/Values.rst
+++ b/docs/Commentary/Language/Values.rst
@@ -11,7 +11,7 @@ A value represent an element within the `universe of discourse <https://en.wikip
 
 * Non-Reducibility: Ideally, values are normal forms, meaning they are not subject to evaluation, reduction, or computation. For example, "1/3" and "let x=3 in 1/x" may be equivalent under reduction, but only "1/3" is considered a value because it cannot be further reduced or evaluated. WHNF is not sufficient to ensure a value, e.g. ``[1,undefined]`` reduces to ``undefined`` hence is not a value. In practice, reduction rules are dependent on context, so we take a liberal perspective and accept values as "values" even if they are not normal forms in all contexts.
 
-On account of their canonical representation, the simplest concrete representation of a value is as a string, but in practice values will generally be represented in memory as more efficient formats, for example a list as a contiguous array of pointers - see :ref:`Memory`. This representation must be lossless, in the sense that converting from a string to the format and then back to the string should be the identity.
+On account of their canonical representation, the simplest concrete representation of a value is as a string, but in practice values will generally be represented in memory as more efficient formats, for example a list as a contiguous array of pointers - see :ref:`Commentary/Language/Memory:Memory`. This representation must be lossless, in the sense that converting from a string to the format and then back to the string should be the identity.
 
 Values are immutable (as `Rich Hickey says <https://github.com/matthiasn/talk-transcripts/blob/master/Hickey_Rich/PersistentDataStructure/00.11.36.jpg>`__). In terms of memory management, values are just data - they can be copied freely, and discarded once they will no longer be used.
 
@@ -65,7 +65,7 @@ Binders/Logical proofs
 
 Conceptually a binder is a structure with some number of "slots" and numbered references to these slots in the body (a value but with "holes" for the slots). In practice, the binder is specified using variables (symbols), and these variable names are preserved for debugging purposes. Nonetheless, renaming the variables should not change the meaning of the binder (alpha-equivalence). The easiest way to ensure this is to use a nameless graph representation but to preserve variable names at the site of the binder as metadata. For example, a lambda expression ``\x. x x`` is really more like ``(\. 1 1, x)``.
 
-The sequent calculus is used to represent binders via derivation trees built up using the various rules. We use the logic described in :ref:`Logic`.
+The sequent calculus is used to represent binders via derivation trees built up using the various rules. We use the logic described in :ref:`Reference/Logic:Logic`.
 
 * Jumbo: The Jumbo break rule contains a set of values and a function from those values to a derivation tree. It also specifies a target in each derivation for each side formula. The Jumbo build rule takes a value and a list of left/right derivation trees and combines them, specifying a target for each tree.
 * Exponentials: Promotion specifies a target for each formula in the sequent. Dereliction has one target, Weakening has no target, contraction has n (n>2) targets.
diff --git a/docs/Commentary/Libraries/Compiler-Library.rst b/docs/Commentary/Libraries/Compiler-Library.rst
index bcf1466..b4b0406 100644
--- a/docs/Commentary/Libraries/Compiler-Library.rst
+++ b/docs/Commentary/Libraries/Compiler-Library.rst
@@ -169,6 +169,8 @@ A decimal number consists of a sign and a list of digits, a decimal marker, and
 Representation
 --------------
 
+Normal number literals are simply terms, but a real number is a function ``ω+1 -> {-1,+1}`` excluding certain trivial sequences (c.f. `surreal numbers <https://en.wikipedia.org/wiki/Surreal_number>`__ S_ω "the field of real numbers" and sign expansion representation), and similarly p-adic numbers need special handling as the different metric completion of the rational numbers.
+
 3-3B. Range, precision, and scale specifications shall be required for each component of appropriate numeric type.
 
 There are various ways to represent these numbers. Naturals are generally represented as a list of digits in some base (a decimal). Integers are naturals with a sign. Rationals may be written as a (possibly improper) fraction of integers, a terminating or infinitely repeating decimal, a "mixed number" an integer and a proper fraction, or a floating point of a decimal times an exponent 1/2^n. For the complete fields such as reals and p-adics there are even more representations:
@@ -918,11 +920,6 @@ timeouts - is it better to just churn through a computation and let the user do
 
 Infinite values separate control and data - first you construct an infinite value (data), then you decide what to do with it (control). In many languages, if you wanted to generate a list of 10 primes, and a list of 100 primes, you'd need to write a function to generate a finite list, and call it twice with a different amount of stuff to generate. But with more complicated usages of infinite data structures, it's not that simple - for example, if you want the first ten even numbers, with an infinite list it's just taking elements from the filtered list, but without, you would have to back-propagate the demand to generate 1 through 20. In general there is no clear way to do it up front by allocating a finite list.
 
-Numbers
-=======
-
-Normal numbers are simply terms, but a real number is a function ``ω+1 -> {-1,+1}`` excluding certain trivial sequences (c.f. `surreal numbers <https://en.wikipedia.org/wiki/Surreal_number>`__ S_ω "the field of real numbers" and sign expansion representation), and similarly p-adic numbers need special handling as the different metric completion of the rational numbers.
-
 Modules
 =======
 
@@ -956,7 +953,7 @@ Testing
 
 Per `Borretti <https://borretti.me/article/test-autodiscovery-silver-bullet>`__, the process for writing and running a unit test in Rust, from scratch, is:
 
-::
+.. code-block:: none
 
   $ cargo new hello-world
   $ cd hello-world
diff --git a/docs/Commentary/Libraries/Parsing.rst b/docs/Commentary/Libraries/Parsing.rst
index 8801010..290990d 100644
--- a/docs/Commentary/Libraries/Parsing.rst
+++ b/docs/Commentary/Libraries/Parsing.rst
@@ -40,7 +40,7 @@ Per :cite:`mightParsingDerivativesFunctional2011`, a parser combinator consumes
 
 Then we have derived combinators:
 
-* ``p - q = p <&&> not q``, ``p {reject} q`` - set subtraction, relative complement, reject production. Acts as p but fails if q would succeed. :cite:`brachthauserParsingFirstClassDerivatives` has ``p <|> not (p <> always) <&&> q``. This maps 1-1 to the "prefer literals" implicit lexer rule, via the transform in :cite:`visserSyntaxDefinitionLanguage1997` section 3.6.
+* ``p - q = p <&&> not q``, ``p {reject} q`` - set subtraction, relative complement, reject production. Acts as p but fails if q would succeed. :cite:`brachthauserParsingFirstclassDerivatives` has ``p <|> not (p <> always) <&&> q``. This maps 1-1 to the "prefer literals" implicit lexer rule, via the transform in :cite:`visserSyntaxDefinitionLanguage1997` section 3.6.
 * ``p {prefer} q = p <|> (q - p)``, ``q {avoid} p`` - Biased (preferential) choice, similar to that used in PEG's or backtracking parsers. If ``p`` matches, returns only the parses from ``p``. Otherwise, acts as ``q``.
 * ``always = not mzero`` - always succeeds, consumes an arbitrary amount of input
 * ``x <&&> y = lookAhead x <> y, and x y`` - conjunction / set intersection. must match both x and y, returns parse tree of ``y``.
@@ -111,7 +111,7 @@ Character/byte/token combinators:
 
 "Selective" combinators  (Mokhov et al. 2019) decide which branch to take based on the result of another parser, somewhere between monads and applicatives. For example ``branch either left right`` parses ``either``, then, if successful and ``Left`` is returned, tries ``left`, otherwise, if ``Right`` is produced, the parser ``right`` is executed. This can be mimicked without the dependent behavior by narrowing the productions, ``eitherL left <|> eitherR right`` where ``eitherL`` is the language of ``either`` that returns ``Left`` and similarly for ``eitherR``. I don't really like having to compute the set of all strings for which a function returns a given value, so it seems good to avoid this. But maybe it can be implemented easily.
 
-Per :cite:`brachthauserParsingFirstClassDerivatives` it is worth exposing the derivative function as a parser combinator ``feed p c = p << c``. It's not clear though if this functionality is useful without being able to do monadic bind and write something like ``char >>= \c -> feed p c``.
+Per :cite:`brachthauserParsingFirstclassDerivatives` it is worth exposing the derivative function as a parser combinator ``feed p c = p << c``. It's not clear though if this functionality is useful without being able to do monadic bind and write something like ``char >>= \c -> feed p c``.
 
 Layout: per :cite:`erdwegLayoutsensitiveGeneralizedParsing2013`, can be implemented with "layout constraints", specialized semantic predicates. The constraints examine the starting/ending line and column of the first/last/leftmost of middle lines/rightmost of middle lines characters of each direct sub-tree of the parse. Then they can express boolean formulas of comparison constraints (equal, less than, greater than), e.g. the offside rule is ``1.first.startCol < 1.left.startCol``. :cite:`adamsPrincipledParsingIndentationsensitive2013` says it can be done in a more principled manner by annotating each production with its column and using constraints that the sub-production must be at column 0 or must be equal, greater than, or greater than or each to to the column of the start of of the production. :cite:`amorimDeclarativeSpecificationIndentation2018` specifies some higher-level constaints like ``align`` that can be used for both parsing (translating to column-based layout constraints) and for pretty-printing, and gives the full algorithm for incrementally constructing parse trees with column information.
 
@@ -215,7 +215,9 @@ Intersection: closed for recursively enumerable languages
 
 it's easier and faster to match in a byte oriented way than to decode utf-8 in a preprocessing step. It works because there is only one representation of each character as a UTF-8 byte sequence.
 
-Normalizing/compacting grammars is important for equality comparison and efficiency::
+Normalizing/compacting grammars is important for equality comparison and efficiency
+
+.. code-block:: none
 
   (r∗)∗ ≈ r∗
   ∅∗ ≈ ε
@@ -244,7 +246,7 @@ Normalizing/compacting grammars is important for equality comparison and efficie
 
 A nullable expression is one that matches the empty string. Nullability is important to know, as the derivative of a concatenation (defined next) depends on whether the first expression is nullable. Recursion is handled via the least fixed point of the equations (e.g., ``L = L & L`` is not nullable).
 
-::
+.. code-block:: none
 
   ν(∅) = F
   ν(ε) = T
@@ -257,7 +259,9 @@ A nullable expression is one that matches the empty string. Nullability is impor
   ν(r & s) = ν(r) && ν(s)
   ν(r + s) = ν(r) || ν(s)
 
-The derivative of an grammar expression E with respect to a character (or set of strings) C is a grammar expression d_C E such that its language is { s : exists c in C. c s in L(E) }. I.e., you take the strings in L(E) that begin with C, and then you chop off the C. For example the derivative of ``ab|ac|de`` w.r.t. ``a`` is ``b|c``. Some derivatives are as follows::
+The derivative of an grammar expression E with respect to a character (or set of strings) C is a grammar expression d_C E such that its language is { s : exists c in C. c s in L(E) }. I.e., you take the strings in L(E) that begin with C, and then you chop off the C. For example the derivative of ``ab|ac|de`` w.r.t. ``a`` is ``b|c``. Some derivatives are as follows:
+
+.. code-block:: none
 
   ∂a ∅ = ∅
   ∂a ε = ∅
@@ -276,7 +280,9 @@ With this we can already implement an interpreter-style recognizer, by computing
 
 To compile a derivative parser to a DFA, we do a traversal of the state graph of grammar expressions, e.g. depth-first. Starting at the original expression ``E``, we compute successive derivatives with respect to all possible characters, normalize the resulting expressions, and minimize the resulting DFA state graph by interning equivalent grammar expressions. The nullable expressions are accepting states. The textbook approach to compiling regular expressions constructs an NFA, constructs the DFA from that, and then minimizes the DFA. But derivative parsing allows you to avoid the NFA entirely, and produces a result much closer to the minimal DFA right off the bat, saving a lot of work.
 
-An important speedup of minimization is identifying partitions of state transitions w.r.t. byte values. Basically, rather than computing the derivatives w.r.t. 0, 1, 2, up to 255 individually and checking for equality afterwards, you can determine from the structure of the expression that it can transition to up to n other states and that each of some set of byte values will transition to a given state. This can be represented by n bitsets of length 256 for n possible next states, with the AND of any two bitsets 0 and the OR of all of them the bitset of all 1's (basically redgrep's representation, although it specifically inverts the first one to model it as a "default" case), or as a packed array-table with ceil(log_2(n)) bits for each byte value, or maybe with ranges if the states are generally clustered in contiguous ranges. The rules for partitions are as follows::
+An important speedup of minimization is identifying partitions of state transitions w.r.t. byte values. Basically, rather than computing the derivatives w.r.t. 0, 1, 2, up to 255 individually and checking for equality afterwards, you can determine from the structure of the expression that it can transition to up to n other states and that each of some set of byte values will transition to a given state. This can be represented by n bitsets of length 256 for n possible next states, with the AND of any two bitsets 0 and the OR of all of them the bitset of all 1's (basically redgrep's representation, although it specifically inverts the first one to model it as a "default" case), or as a packed array-table with ceil(log_2(n)) bits for each byte value, or maybe with ranges if the states are generally clustered in contiguous ranges. The rules for partitions are as follows:
+
+.. code-block:: none
 
   C(∅) = {Σ}
   C(ε) = {Σ}
diff --git a/docs/Commentary/Libraries/Syntax.rst b/docs/Commentary/Libraries/Syntax.rst
index bc7252a..0099c00 100644
--- a/docs/Commentary/Libraries/Syntax.rst
+++ b/docs/Commentary/Libraries/Syntax.rst
@@ -31,9 +31,7 @@ Another set of principles is Constantine and Lockwood's principles of usage-cent
 * The simplicity principle: I would rephrase this as "common tasks should be easy and clear, difficult tasks should not be obscured by the syntax". This is an overall principle for Stroscot but I guess it needs special attention in the syntax.
 * The visibility principle: This can go a few different ways. One is that a function should generally be at most one screenful of code. Another is that IDE navigation support is a must. Another is that if the programmer has a mockup, design document, or reference paper, then this should be able to be included in the code somehow. You can pretty much do this with comments (VSCode even hyperlinks them) but maybe there is a better solution. On the flip side, it also means syntax should not be noisy and it should always be clear how to write a program in the best style.
 * The feedback principle: A program by itself is inert, no feedback. This principle therefore applies to the compiler: compilation progress, execution progress, program state, debugger state, warnings and errors. I am not sure it applies to syntax except to ensure that each syntactic construct is localized so it can give a good underline error in VSCode.
-* The tolerance principle: This is sort of Stroscot's principle of no fatal compiler errors. Another interpretation is to tolerate syntax errors, e.g. using a large language model that will identify (some approximation of) intent of the program and allow varied syntaxes and input notation.
-
- The design should be flexible and tolerant, reducing the cost of mistakes and misuse  while also preventing errors
+* The tolerance principle: This is sort of Stroscot's principle of no fatal compiler errors. Another interpretation is to tolerate syntax errors, e.g. using a large language model that will identify (some approximation of) intent of the program and allow varied syntaxes and input notation. "The design should be flexible and tolerant, reducing the cost of mistakes and misuse while also preventing errors"
 * The reuse principle: The design should reuse internal and external components and behaviors, maintaining consistency with purpose rather than merely arbitrary consistency, thus reducing the need for users to rethink and remember.
 
 Going through these, they are a bit hard to apply. Per `Wikipedia <https://en.wikipedia.org/wiki/Principle_of_least_astonishment>`__  there is the law of least astonishment, which in 1972 read as follows:
@@ -212,36 +210,6 @@ TODO: test it out by modifying https://github.com/isral/elastic_tabstops_mono.vs
 
 The advantage of tablike spaces over elastic tabstops is that the underlying text file looks fine when viewed in a monospaced font. So it's only the visual presentation that changes, hence it can be used on a team.
 
-DSLs
-----
-
-Stroscot aims to be a "pluggable" language, where you can write syntax, type checking, etc. for a DSL and have it embed seamlessly into the main language. This may be possible due to macros, which allows pattern-matching the AST of any expression, like ``javascript (1 + "abc" { 234 })``, or may need special handling in the parser to also do character-level embedding or seamless integration of parsers / escape sequences.
-
-Example DSLs:
-
-* SQL
-
-::
-
-  run_sql_statement { SELECT ... }
-
-* Assembly and C++.
-
-::
-
-  result = asm { sumsq (toregister x), (toregister y) }
-  my_func = load("foo.cpp").lookup("my_func")
-
-* TeX / mathematical expressions:
-
-::
-
-   tex { result = ax^4+cx^2 }
-   math { beta = phi lambda }
-
-
-It is not just fancy syntax. DSLs that use vanilla syntax are useful for staging computations, like passes that fuse multiple operations such as expmod and accuracy optimizers that figure out the best way to stage a computation.
-
 Typeable
 --------
 
@@ -360,12 +328,12 @@ There are several reasons to allow the use of Unicode mathematical symbols in St
 
 Stroscot's user-defined syntax is flexible enough to create symbolic operators if desired. But compare this example of computing the prime numbers less than ``R`` in APL vs. a Haskell lexical+prefix style:
 
-::
+.. code-block:: apl
 
   T←1↓⍳R
   (~T∊T∘.×T)/T
 
-::
+.. code-block:: haskell
 
   T = drop 1 (count R)
   scan (not (isElementOf T (tie 0 (*) T T))) T
@@ -397,7 +365,9 @@ NFKC
 
 NFKC is often brought up as an alternative/extension of NFC. For example `Python <https://peps.python.org/pep-3131/>`__ uses NKFC for identifiers, and Go similarly has a `proposal <https://github.com/golang/go/issues/27896>`__ to use NFKC.
 
-There are two choices for using NFKC, requiring input to be NFKC or applying NFKC to the input. Python only applies NFKC, so `the following <https://groups.google.com/g/dev-python/c/LkCtik9LyyE/m/QcRz1gdfAQAJ>`__ is a valid Python program::
+There are two choices for using NFKC, requiring input to be NFKC or applying NFKC to the input. Python only applies NFKC, so `the following <https://groups.google.com/g/dev-python/c/LkCtik9LyyE/m/QcRz1gdfAQAJ>`__ is a valid Python program
+
+.. code-block:: python
 
   def 𝚑𝓮𝖑𝒍𝑜():
     try:
@@ -420,7 +390,9 @@ There are two choices for using NFKC, requiring input to be NFKC or applying NFK
     𝒉eℓˡ𝗈()
 
 
-If we required the input to be in NFKC it would have to look like::
+If we required the input to be in NFKC it would have to look like:
+
+.. code-block:: python
 
  def hello():
   try:
@@ -443,7 +415,7 @@ If we required the input to be in NFKC it would have to look like::
 
 Meanwhile with NFC the variable names would have to be consistent and built-in names could not be transformed, so a program could look like:
 
-::
+.. code-block:: python
 
   def 𝚑𝓮𝖑𝒍𝑜():
       try:
@@ -541,7 +513,7 @@ TR31
 
   where these classes are defined as follows
 
-  ::
+  .. code-block:: none
 
     $T = \p{Joining_Type=Transparent}
     $RJ = [\p{Joining_Type=Dual_Joining}\p{Joining_Type=Right_Joining}]
@@ -624,6 +596,104 @@ One feature of Atomo I liked and thought was cool was that all the syntax was de
 Assignment
 ==========
 
+Steelman 5A. It shall be possible to declare constants of any type. Such constants shall include both those whose values-are determined during translation and those whose value cannot be determined until allocation. Programs may not assign to constants.
+
+Steelman 5D. Procedures, functions, types, labels, exception situations, and statements shall not be assignable to variables, be computable as values of expressions, or be usable as nongeneric parameters to procedures or functions.
+
+Stroscot does allow sets to be assigned to variables, also exceptions and labels and statements (blocks). Procedures and functions are a bit strange; you can store the symbol of the procedure/function, and you can store a lambda, but the rewrite rule itself is stored in a module and you can't really access it alone except as an AST.
+
+
+`Discussion <https://craftofcoding.wordpress.com/2021/02/19/evolution-of-the-assignment-operator/>`__. Stroscot's assignment syntax is complicated because I want separate initialization (declarative assignment) and reassignment (mutating assignment).
+
+.. list-table:: Comparison
+   :header-rows: 1
+
+   * - Language
+     - Initialization
+     - Reassignment
+     - Equality
+   * - Mathematics
+     - ``=``
+     - ``⟹`` or ``=>``
+     - ``=``
+   * - Algol
+     - ``:=``
+     - ``:=``
+     - ``=``
+   * - Fortran
+     - ``=``
+     - ``=``
+     - ``.EQ.``
+   * - PL/I
+     - ``=``
+     - ``=``
+     - ``=``
+   * - BCPL
+     - ``=``
+     - ``:=``
+     - ``=``
+   * - B
+     - ``=``
+     - ``:=``
+     - ``==``
+   * - C
+     - ``=``
+     - ``=``
+     - ``==``
+   * - APL
+     - ``←``
+     - ``←``
+     - ``=``
+   * - R
+     - ``<-``
+     - ``<-``
+     - ``==``
+   * - J
+     - ``=:``
+     - ``=:``
+     - ``=``
+
+Looking at precedents, the only languages with distinct initialization and reassignment are B and BCPL, so reassignment should definitely be ``:=``. Then we can either follow mathematical convention and PL/I in making initialization and comparison use the same symbol, or simplify parsing by making equality ``==``. Quorum uses the same symbol and apparently this is what novices expect. :cite:`stefikEmpiricalInvestigationProgramming2013`
+
+Chained assignment
+------------------
+
+Chained assignment is an expression like ``w = x = y = z``. The value of ``z`` is assigned to multiple variables ``w``, ``x``, and ``y``. The `literature <http://www.cse.iitm.ac.in/~amannoug/imop/tr-3.pdf>`__ classifies this as "syntactic sugar", so handling it in the parser like Python seems the reasonable solution - C's "the assignment returns the lvalue" semantics seems contrived.
+
+The evaluation strategy differs between languages. For simple chained assignments, like initializing multiple variables, the evaluation strategy does not matter, but if the targets (l-values) in the assignment are connected in some way, the evaluation strategy affects the result. Here C's RTL semantics makes more sense and seems more useful than `Python's LTR <https://docs.python.org/3/reference/simple_stmts.html#assignment-statements>`__ semantics. So a chain ``a := b := 2`` should expand to ``b := 2; a := b`` rather than ``t = 2; a := t; b := t`` .
+
+Chained update with ``:=``, like ``a := b := 2``, seems the most useful to shorten some assignments. Chained ``a = b = 2`` with value semantics doesn't really seem that useful when you could just replace ``a`` with ``b`` in the rest of the expression and save yourself an identifier. Also it conflicts with using ``=`` for comparison, because it can be interpreted as ``a = (b == 2)``.
+
+There is an issue with running I/O multiple times. For example if you need multiple variables with the same value then you would write ``[a,b,c] = replicateM 3 (ref 0)`` rather than using a chain, because a chain would alias to the same variable. Python already has this problem with aliasing for ``a = b = []``, because ``[]`` is mutable, but in Stroscot ``[]`` is pure so this is fine.
+
+Embedded assignment
+-------------------
+
+This embeds assignments in expressions, like
+
+::
+
+  a = (b = 1) + (c = 2)
+
+Clearly it conflicts with ``=`` as comparison.
+
+But for chained update it is unambiguous and returning the value would be possible:
+
+::
+
+  a = (b := 1) + (c := 2)
+
+But then statements like
+
+::
+
+  b := 1
+
+would have an unused return value. Maybe this value could be marked as optional somehow.
+
+Binding ambiguity
+-----------------
+
 As a syntax ambiguity, there are two different interpretations of assignment, pattern binding and clause definition. The difference:
 
 ::
@@ -681,7 +751,7 @@ Haskell has a division between constructors and functions:
 * identifiers starting with lowercase letters are functions, and can only be used with function bindings.
 * identifiers starting with uppercase letters are constructors, and assignments of the form ``X a b = ...`` are pattern bindings.
 
-This rule reduces maintainability. If the representation is changed there is no way to replace the dumb constructor with a smart constructor. So instead libraries are littered with boilerplate pseudo-constructors like ``mkThing = Thing`` to get around this syntactic restriction. In fact in :cite:`kahrsNonOmegaOverlappingTRSsAre2016` there is a boilerplate trick to turn any TRS into a constructor TRS, by duplicating ``foo`` into a constructor ``Foo`` and a function ``foo``, converting subterms of the original rules to match on constructors, and adding rules that turn stuck patterns into constructors. For example ``k x y = x; s x y z = (x z) (y z)`` turns into:
+This rule reduces maintainability. If the representation is changed there is no way to replace the dumb constructor with a smart constructor. So instead libraries are littered with boilerplate pseudo-constructors like ``mkThing = Thing`` to get around this syntactic restriction. In fact in :cite:`kahrsNonomegaoverlappingTRSsAre2016` there is a boilerplate trick to turn any TRS into a constructor TRS, by duplicating ``foo`` into a constructor ``Foo`` and a function ``foo``, converting subterms of the original rules to match on constructors, and adding rules that turn stuck patterns into constructors. For example ``k x y = x; s x y z = (x z) (y z)`` turns into:
 
 ::
 
@@ -749,6 +819,11 @@ There is also the question of how module declaration work - technically these sh
 Type declarations
 =================
 
+::
+
+  a = 2 : s8
+  a = s8 2
+
 ``a = 2 : s8`` and ``a = s8 2`` seem more logical compared to other choices such as ``a : s8 = 2`` (Swift,Jai - hard to find the = with long types) or ``s8 a = 2`` (C,Rust - overlaps with function definition). The name is simply a syntactic handle to refer to the value; it doesn't have an innate type. In contrast the representation of the value must be specified to compile the program. The second syntax ``s8 2`` is similar to assembler syntax such as ``dword 0``.
 
 `This <https://soc.me/languages/type-annotations>`__ says name should be ahead of type annotation, which only ``s8 a = 2`` breaks. The consistency stuff is not relevant.
@@ -773,6 +848,17 @@ This might seem overly complicated, but it's based on Zarf's `rule-based program
 Comments
 ========
 
+Steelman 2I "The language shall permit comments that are introduced by a special (one or two character) symbol and terminated by the next line boundary of the source program." This is just the simplest EOL comment, but there are other types.
+
+::
+
+  // comment
+  /* multiline
+      comment */
+  (* nesting (* comment *) *)
+   if(false) { code_comment - lexed but not parsed except for start/end }
+  #! shebang at beginning of file
+
 Comments allow writing documentation inline with the code. This speeds up development by keeping all the information in one file and avoiding having to jump around. It also encourages a standardized documentation format.
 
 Tool support can be incomplete because there is a lot of freedom in comments. People can put comments in random places and they can be attached to whatever and indented strangely. With such freedom the reformatter will likely mangle some comments, but probably people will just learn not to do that.
@@ -840,7 +926,7 @@ Indentation
 The tabs vs. spaces debate is still going. So let's make some people unhappy by baking the decision into the default syntax.
 
 * `Pike <https://groups.google.com/g/golang-nuts/c/iHGLTFalb54/m/zqMoq9JRBAAJ>`__ says tabs allow choosing 2,4,8 spaces. But this flexibility means linebreaking suffers. For example, assume 100 character lines. Then someone with a 2-space tab and an 8 tab indent can fit 84 characters of code, but someone with an 8-space tab will see that 84 characters of code as a 148 character line, 150% of a line and needing a linebreak. It's better that everyone sees pretty much the same thing. Linus Torvalds `says <https://www.yarchive.net/comp/linux/coding_style.html>`__ tabs are 8 spaces and not adjustable. Also `he says <https://www.kernel.org/doc/html/latest/process/coding-style.html>`__ the line-limit argument is invalid because 3 levels of indentation suffices, but deep indentation often comes up with nested literal data. Another point against Pike is that browsers offer no means to change the width of tabs, so this customization is incomplete - using spaces will at least ensure the display is consistent with the editor.
-* Style guides for large companies/projects all agree on "no tabs" (e.g. `this <https://github.com/jrevels/YASGuide#linealignmentspacing-guidelines >`__)
+* Style guides for large companies/projects all agree on "no tabs" (e.g. `this <https://github.com/jrevels/YASGuide#linealignmentspacing-guidelines>`__)
 * `GitHub stats <https://hoffa.medium.com/400-000-github-repositories-1-billion-files-14-terabytes-of-code-spaces-or-tabs-7cfe0b5dd7fd#.o7n8zeezx>`__ show spaces winning in the majority of languages
 * The `2017 SO survey <https://stackoverflow.blog/2017/06/15/developers-use-spaces-make-money-use-tabs/>`__ showed spaces make 8.6% more salary
 * "Tabs + spaces" still has the issues with resizing tabs, and more because the hardcoded spaces may be larger than the tabs. For example resizing an 8-space tab plus 4 spaces to a 2-space tab plus spaces will break. And it is even less common.
@@ -1213,104 +1299,6 @@ Pattern matching / conditionals
 
 The condition can be split between a common discriminator and individual cases. This requires doing away with mandatory parentheses around the conditions. This strongly suggests using a keyword (then) to introduce branches, instead of using curly braces, based on readability considerations.
 
-Assignment
-==========
-
-Steelman 5A. It shall be possible to declare constants of any type. Such constants shall include both those whose values-are determined during translation and those whose value cannot be determined until allocation. Programs may not assign to constants.
-
-Steelman 5D. Procedures, functions, types, labels, exception situations, and statements shall not be assignable to variables, be computable as values of expressions, or be usable as nongeneric parameters to procedures or functions.
-
-Stroscot does allow sets to be assigned to variables, also exceptions and labels and statements (blocks). Procedures and functions are a bit strange; you can store the symbol of the procedure/function, and you can store a lambda, but the rewrite rule itself is stored in a module and you can't really access it alone except as an AST.
-
-
-`Discussion <https://craftofcoding.wordpress.com/2021/02/19/evolution-of-the-assignment-operator/>`__. Stroscot's assignment syntax is complicated because I want separate initialization (declarative assignment) and reassignment (mutating assignment).
-
-.. list-table:: Comparison
-   :header-rows: 1
-
-   * - Language
-     - Initialization
-     - Reassignment
-     - Equality
-   * - Mathematics
-     - ``=``
-     - ``⟹`` or ``=>``
-     - ``=``
-   * - Algol
-     - ``:=``
-     - ``:=``
-     - ``=``
-   * - Fortran
-     - ``=``
-     - ``=``
-     - ``.EQ.``
-   * - PL/I
-     - ``=``
-     - ``=``
-     - ``=``
-   * - BCPL
-     - ``=``
-     - ``:=``
-     - ``=``
-   * - B
-     - ``=``
-     - ``:=``
-     - ``==``
-   * - C
-     - ``=``
-     - ``=``
-     - ``==``
-   * - APL
-     - ``←``
-     - ``←``
-     - ``=``
-   * - R
-     - ``<-``
-     - ``<-``
-     - ``==``
-   * - J
-     - ``=:``
-     - ``=:``
-     - ``=``
-
-Looking at precedents, the only languages with distinct initialization and reassignment are B and BCPL, so reassignment should definitely be ``:=``. Then we can either follow mathematical convention and PL/I in making initialization and comparison use the same symbol, or simplify parsing by making equality ``==``. Quorum uses the same symbol and apparently this is what novices expect. :cite:`stefikEmpiricalInvestigationProgramming2013`
-
-Chained assignment
-------------------
-
-Chained assignment is an expression like ``w = x = y = z``. The value of ``z`` is assigned to multiple variables ``w``, ``x``, and ``y``. The `literature <http://www.cse.iitm.ac.in/~amannoug/imop/tr-3.pdf>`__ classifies this as "syntactic sugar", so handling it in the parser like Python seems the reasonable solution - C's "the assignment returns the lvalue" semantics seems contrived.
-
-The evaluation strategy differs between languages. For simple chained assignments, like initializing multiple variables, the evaluation strategy does not matter, but if the targets (l-values) in the assignment are connected in some way, the evaluation strategy affects the result. Here C's RTL semantics makes more sense and seems more useful than `Python's LTR <https://docs.python.org/3/reference/simple_stmts.html#assignment-statements>` semantics. So a chain ``a := b := 2`` should expand to ``b := 2; a := b`` rather than ``t = 2; a := t; b := t`` .
-
-Chained update with ``:=``, like ``a := b := 2``, seems the most useful to shorten some assignments. Chained ``a = b = 2`` with value semantics doesn't really seem that useful when you could just replace ``a`` with ``b`` in the rest of the expression and save yourself an identifier. Also it conflicts with using ``=`` for comparison, because it can be interpreted as ``a = (b == 2)``.
-
-There is an issue with running I/O multiple times. For example if you need multiple variables with the same value then you would write ``[a,b,c] = replicateM 3 (ref 0)`` rather than using a chain, because a chain would alias to the same variable. Python already has this problem with aliasing for ``a = b = []``, because ``[]`` is mutable, but in Stroscot ``[]`` is pure so this is fine.
-
-Embedded assignment
--------------------
-
-This embeds assignments in expressions, like
-
-::
-
-  a = (b = 1) + (c = 2)
-
-Clearly it conflicts with ``=`` as comparison.
-
-But for chained update it is unambiguous and returning the value would be possible:
-
-::
-
-  a = (b := 1) + (c := 2)
-
-But then statements like
-
-::
-
-  b := 1
-
-would have an unused return value. Maybe this value could be marked as optional somehow.
-
 Conditionals
 ============
 
@@ -1464,6 +1452,33 @@ variables, simple control flow are expressed using a light-weight notation (shor
 DSLs
 ====
 
+Stroscot aims to be a "pluggable" language, where you can write syntax, type checking, etc. for a DSL and have it embed seamlessly into the main language. This may be possible due to macros, which allows pattern-matching the AST of any expression, like ``javascript (1 + "abc" { 234 })``, or may need special handling in the parser to also do character-level embedding or seamless integration of parsers / escape sequences.
+
+Example DSLs:
+
+* SQL
+
+::
+
+  run_sql_statement { SELECT ... }
+
+* Assembly and C++.
+
+::
+
+  result = asm { sumsq (toregister x), (toregister y) }
+  my_func = load("foo.cpp").lookup("my_func")
+
+* TeX / mathematical expressions:
+
+::
+
+   tex { result = ax^4+cx^2 }
+   math { beta = phi lambda }
+
+
+It is not just fancy syntax. DSLs that use vanilla syntax are useful for staging computations, like passes that fuse multiple operations such as expmod and accuracy optimizers that figure out the best way to stage a computation.
+
 Scala and JS have support for native XML literals. Scala had one syntax in 2 and a new syntax in 3. Similarly JS had E4X, but client-side E4X is dead today, replaced by server-side React JSX which is honestly pretty similar. More recently XML has been perceived as verbose and JSON has become popular. JSON is native to Javascript hence literals/interpolation are aleady supported, and then browsers added parsers/pretty printers. The lesson is not (like Flix says) that no support should be present - rather it is that the design should be flexible and allow easily adding markup syntaxes.
 
 ::
@@ -1589,6 +1604,10 @@ Most operators are textual:
 
 Minus is both a unary prefix operator and a binary infix operator with special support to disambiguate the two. ``(-)`` denotes the binary minus operator and ``neg`` the unary minus operation.
 
+Operators are syntactic sugar for functions. Enclosing an operator in parentheses turns it into an ordinary function symbol, thus ``x+y`` is exactly the same as ``(+) x y``.
+
+String concatenation is ``++``.
+
 Expressions
 ===========
 
@@ -1793,15 +1812,6 @@ The general idea of inheritance is, for ``Foo`` a child of ``Bar`` to rewrite ca
     }
   }
 
-Operators
----------
-
-Operators are syntactic sugar for functions. Enclosing an operator
-in parentheses turns it into an ordinary function symbol, thus ``x+y`` is
-exactly the same as ``(+) x y``.
-
-String concatenation is ``++``.
-
 Lambdas
 =======
 
@@ -1916,7 +1926,7 @@ This is pretty interesting, sort of a Python-style thing where variables are in
 
 This is pretty vague with regards to what constitutes a "point" but I interpret it as the condition enforced on imperative program threads that they execute one action at a time and this action must be chosen deterministically. It seems that Stroscot's use of the continuation monad enforces this structured approach so it isn't an issue.
 
-::
+.. code-block:: none
 
   a = if true then 1 else 2 -- just a function if_then_else_ : Bool -> a -> a -> a
   {
@@ -1933,7 +1943,7 @@ This is pretty vague with regards to what constitutes a "point" but I interpret
     if (x % 2 == 0)
       break
 
-::
+.. code-block:: none
 
   check {
      risky_procedure
@@ -1947,7 +1957,7 @@ More here: https://docs.microsoft.com/en-us/dotnet/fsharp/language-reference/com
 
 Also the do-while block from `Rune <https://github.com/google/rune/blob/main/doc/rune4python.md#looping>`__:
 
-::
+.. code-block:: none
 
   do
     c = getNextChar()
@@ -1956,7 +1966,7 @@ Also the do-while block from `Rune <https://github.com/google/rune/blob/main/doc
 
 The do-block always executes, and the while-block only executes if the condition is true, after which we jump to the start of the do-block. If the condition is false, the loop terminates. This avoids the common C/C++ assignment-in-condition hack:
 
-::
+.. code-block:: cpp
 
   int c;
   while ((c = getNextChar()) != '\0') {
@@ -2031,29 +2041,6 @@ So top-level statements and function calls are allowed. For example you can impl
       a = 2
 
 
-Comments
-========
-
-Steelman 2I "The language shall permit comments that are introduced by a special (one or two character) symbol and terminated by the next line boundary of the source program." This is just the simplest EOL comment, but there are other types.
-
-::
-
-  // comment
-  /* multiline
-      comment */
-  (* nesting (* comment *) *)
-   if(false) { code_comment - lexed but not parsed except for start/end }
-  #! shebang at beginning of file
-
-Type declarations
-=================
-
-::
-
-  a = 2 : s8
-  a = s8 2
-
-
 Reasoning footprint
 ===================
 
diff --git a/docs/Commentary/Libraries/Units.rst b/docs/Commentary/Libraries/Units.rst
index c4425d1..ef8c998 100644
--- a/docs/Commentary/Libraries/Units.rst
+++ b/docs/Commentary/Libraries/Units.rst
@@ -14,8 +14,8 @@ The number must be represented in some format, so quantities are further divided
 
 An expression like "10 pounds" may be interpreted as multiple quantities, e.g. "10 avoirdupois pounds" (a weight) or "10 British pounds" (a currency), depending on context. The context could be represented via an implicit argument or whatever, but it seems a lot easier to forbid ambiguity and devise a coherent set of unit names, e.g. Frink's "pound is weight and GBP is currency". So expressions are required to be unambiguous.
 
-Units
-=====
+Types of units
+==============
 
 The units we have are multiplicative units, affine units, logarithmic units, and percentages.
 
diff --git a/docs/Commentary/Meta/Code-of-conduct.rst b/docs/Commentary/Meta/Code-of-conduct.rst
index 68ac473..b8e8dd4 100644
--- a/docs/Commentary/Meta/Code-of-conduct.rst
+++ b/docs/Commentary/Meta/Code-of-conduct.rst
@@ -81,7 +81,7 @@ This Section was reserved by FIRE for a description of the project's administrat
 
 There are some free training materials from `Otter Tech <https://gitlab.com/otter-tech/coc-incident-response-workshop/>`__ and `Frame Shift Consulting <https://frameshiftconsulting.com/resources/code-of-conduct-book/>`__.
 
-One idea is to use `Debian's constitution <https://www.debian.org/devel/constitution>`__ as the basic administrative structure. There is the suggestion in :cite:`parkesComplexityofStrategicBehaviorComparisonSchulze2021` to use ranked pairs instead of Schulze - it seems Schulze has less ties, but this is also why it is easier to game. Also ranked pairs satisfies LIIA which Schulze does not (c.f. `Table <https://en.wikipedia.org/wiki/Template:Comparison_of_Schulze_to_preferential_voting_systems>`__).
+One idea is to use `Debian's constitution <https://www.debian.org/devel/constitution>`__ as the basic administrative structure. There is the suggestion in :cite:`parkesComplexityofstrategicbehaviorComparisonSchulze2021` to use ranked pairs instead of Schulze - it seems Schulze has less ties, but this is also why Schulze is easier to game. Also ranked pairs satisfies LIIA which Schulze does not (c.f. `Table <https://en.wikipedia.org/wiki/Template:Comparison_of_Schulze_to_preferential_voting_systems>`__).
 
 Also check out these posts: `Python-style governance <https://chrisholdgraf.com/blog/2019/2019-10-27-jupyter-governance-python/>`__, `Rust-style governance <https://chrisholdgraf.com/blog/2019/2019-10-13-rust-jupyter-governance/>`__
 
diff --git a/docs/Commentary/Meta/Guidelines.rst b/docs/Commentary/Meta/Guidelines.rst
index 3db992a..21cb8c4 100644
--- a/docs/Commentary/Meta/Guidelines.rst
+++ b/docs/Commentary/Meta/Guidelines.rst
@@ -25,8 +25,7 @@ Principles
 * If it isn't documented, it doesn't exist. Not only does it have to be doc'd, but it has to be explained and taught and demonstrated. Do that, and people will be excited -- not about your documentation, but about your product. (`Mike Pope <https://web.archive.org/web/20230529230333/https://www.mikepope.com/blog/displayblog.aspx?permalink=1680>`__ via `Coding Horror <https://blog.codinghorror.com/if-it-isnt-documented-it-doesnt-exist/>`__). Corollary: There is no undefined behavior, only undocumented behavior.
 * Liking increases monotonically for both higher complexity and higher number of presentations. :cite:`madisonRepeatedListeningIncreases2017` (Originally this was "some complexity is desirable" from :cite:`normanLivingComplexity2010` page 13, but then I looked for sources and there was a stronger conclusion. Fig. 2 seems to have a liking dip for the highest complexity. Other studies used unnatural stimuli or did not control for familiarity, resulting in averaging experienced and inexperienced participants. :cite:`gucluturkDecomposingComplexityPreferences2019` There is a difference between objective vs. subjective complexity measures; using Fisher information to convert from objective to subjective measures of information produces the typical inverted U-shape for objective measures. :cite:`grzywaczDoesAmountInformation2022`)
 * Better depends on your customer's goodness metric. It is time for us to reject the simple-minded interpretation of the slogan "worse is better", and start putting out software that really is better (on the dimension of goodness that our customers have, not necessarily our own). (`Jim Waldo <http://web.archive.org/web/20210325222034/https://www.artima.com/weblogs/viewpost.jsp?thread=24807>`__)
-* "Good Design Is Easier to Change Than Bad Design". A thing is well designed if it adapts to the people who use it. For code, that means it must adapt by changing. So, a good design is Easier To Change (ETC). As far as we can tell, every design principle out there is a special case of ETC. Why is decoupling good? Because by isolating concerns we make each easier to change. Why is the single responsibility principle useful? Because a change in
-requirements is mirrored by a change in just one module. Why is naming important? Because good names make code easier to read, and you have to read it to change it.  (Pragmatic Programmer 2019 edition, page 28)
+* "Good Design Is Easier to Change Than Bad Design". A thing is well designed if it adapts to the people who use it. For code, that means it must adapt by changing. So, a good design is Easier To Change (ETC). As far as we can tell, every design principle out there is a special case of ETC. Why is decoupling good? Because by isolating concerns we make each easier to change. Why is the single responsibility principle useful? Because a change in requirements is mirrored by a change in just one module. Why is naming important? Because good names make code easier to read, and you have to read it to change it.  (Pragmatic Programmer 2019 edition, page 28)
 
 Paradigms
 =========
@@ -510,7 +509,7 @@ Now what we care about is the impact of programming language choice on project c
 Lines of code
 ~~~~~~~~~~~~~
 
-:cite:`brooksMythicalManMonth1995` makes the observation that "productivity seems to be constant in terms of elementary statements."  Similarly :cite:`boehmSoftwareEngineeringEconomics1981` says (page 477) "COCOMO uses Delivered Source Instructions as the basic size parameter". DSI was correlated more closely with total effort than executable machine instructions. In COCOMO II, total effort in person-months is a power law of the project size in SLOC.
+:cite:`brooksMythicalManmonth1995` makes the observation that "productivity seems to be constant in terms of elementary statements."  Similarly :cite:`boehmSoftwareEngineeringEconomics1981` says (page 477) "COCOMO uses Delivered Source Instructions as the basic size parameter". DSI was correlated more closely with total effort than executable machine instructions. In COCOMO II, total effort in person-months is a power law of the project size in SLOC.
 
 This is borne out by :cite:`clarkDepartmentDefenseSoftware2017`, an analysis of DoD projects. Per Figure 25, fitting a power law to the relationship between SLOC and actual hours explains 67% of the variance. Per Figure 73 the languages were primarily C, C++, C#, Ada, and Java; they did not conduct a by-language analysis. :cite:`clarkDoDSoftwareFactbook2015` Figure 12 provides a more interesting graph which shows basically the same thing, although it is colorized by project type. The overlap in rates by project type is such that I would not consider it a significant factor.
 
diff --git a/docs/Commentary/Meta/Learning.rst b/docs/Commentary/Meta/Learning.rst
index 1ba4ee2..03d4003 100644
--- a/docs/Commentary/Meta/Learning.rst
+++ b/docs/Commentary/Meta/Learning.rst
@@ -19,8 +19,7 @@ Learning content can be provided in different ways:
 
 * Literature (10% retention): Books, online tutorials, and official languages references provide comprehensive and structured learning material. They often offer in-depth explanations, examples, exercises, and best practices. How long should it be? A `121 page Python book (60 pages double spaced) <https://www.amazon.com/Python-Programming-Beginners-Comprehensive-Hands/dp/B0BFV21L24/>`__ is derided as terse and useless, requiring to google every new keyword. `K&R C <https://www.amazon.com/C-Programming-Language-2nd-Edition/dp/0131103628/>`__ has 272 pages, but is "not beginner friendly". The `C# Programming Yellow Book <http://www.csharpcourse.com/>`__  is 217 8.5x11 pages or about 322 of the standard 7x9 pages. `Python for Kids <https://www.amazon.com/Python-Kids-Playful-Introduction-Programming/dp/1593274076/>`__ clocks in at 344 pages but is still missing critical functions such as the input command. On the other hand some chapters such as turtle graphics, tkinter, and classes/objects can be skipped (74 pages). My first programming book `Beginning Programming with Java For Dummies <https://www.amazon.com/Beginning-Programming-Java-Dummies-Computers/dp/0764526464/>`__ had 408 pages. The `5th edition <https://www.amazon.com/Beginning-Programming-Java-Dummies-Computer/dp/1119235537/>`__ is the most popular and has 560 pages. But it still only covers the basics. `Head First Java <https://www.amazon.com/Head-First-Java-2nd-Edition/dp/0596009208/>`__ is recommended by the r/learnprogramming subreddit and has 688 pages.
 
-* Video Courses (20%-30% retention): Video courses offer visual and auditory learning experiences. mainly in the form of lectures and coding examples. YouTube has numerous X-hour courses on various subjects, from universities and individuals. On YouTube `MIT
-6.0001 <https://ocw.mit.edu/courses/6-0001-introduction-to-computer-science-and-programming-in-python-fall-2016/video_galleries/lecture-videos/>` is around 12x45=540 minutes. `CS50P <https://www.youtube.com/playlist?list=PLhQjrBD2T3817j24-GogXmWqO5Q5vYy0V>`__ is 14x1.2=1005 minutes. The amateur `CS Dojo <https://www.youtube.com/playlist?list=PLBZBJbE_rGRWeh5mIBhD-hhDwSEDxogDg>` is 16x~13=217 minutes. `Digilent Inc.'s course <https://www.youtube.com/playlist?list=PL0845FEB57E5894C2>`__ is 87x6.5=561 minutes.
+* Video Courses (20%-30% retention): Video courses offer visual and auditory learning experiences. mainly in the form of lectures and coding examples. YouTube has numerous X-hour courses on various subjects, from universities and individuals. On YouTube `MIT 6.0001 <https://ocw.mit.edu/courses/6-0001-introduction-to-computer-science-and-programming-in-python-fall-2016/video_galleries/lecture-videos/>`__ is around 12x45=540 minutes. `CS50P <https://www.youtube.com/playlist?list=PLhQjrBD2T3817j24-GogXmWqO5Q5vYy0V>`__ is 14x1.2=1005 minutes. The amateur `CS Dojo <https://www.youtube.com/playlist?list=PLBZBJbE_rGRWeh5mIBhD-hhDwSEDxogDg>`__ is 16x~13=217 minutes. `Digilent Inc.'s course <https://www.youtube.com/playlist?list=PL0845FEB57E5894C2>`__ is 87x6.5=561 minutes.
 
 * Quizzes (? retention): Some basic tests allow quickly identifying areas where one's knowledge is deficient. Students will learn things from seeing their wrong answers and an explanation of the correct answer.
 
diff --git a/docs/Commentary/Meta/OtherPL.rst b/docs/Commentary/Meta/OtherPL.rst
index 0e10243..8eac311 100644
--- a/docs/Commentary/Meta/OtherPL.rst
+++ b/docs/Commentary/Meta/OtherPL.rst
@@ -120,7 +120,7 @@ PyPL index (top 28)
 
 * good standard library design and documentation, probably worth copying
 * voted "most loved" by StackOverflow
-* ownership model/borrow checker has been found difficult to use by several studies (`1 <https://arxiv.org/pdf/1901.01001.pdf>`__, `2 <https://arxiv.org/pdf/2011.06171.pdf>`__, `https://dl.acm.org/doi/pdf/10.1145/3510003.3510107`__). Also it is incomplete - can't even write linked lists without `endless pain <https://rcoh.me/posts/rust-linked-list-basically-impossible/>`__. In practice Rust programmers `end up <https://rust-unofficial.github.io/too-many-lists/third-layout.html>`__  using reference counting or GC to ensure memory safety in complex cases
+* ownership model/borrow checker has been found difficult to use by several studies (`1 <https://arxiv.org/pdf/1901.01001.pdf>`__, `2 <https://arxiv.org/pdf/2011.06171.pdf>`__, `3 <https://dl.acm.org/doi/pdf/10.1145/3510003.3510107>`__). Also it is incomplete - can't even write linked lists without `endless pain <https://rcoh.me/posts/rust-linked-list-basically-impossible/>`__. In practice Rust programmers `end up <https://rust-unofficial.github.io/too-many-lists/third-layout.html>`__  using reference counting or GC to ensure memory safety in complex cases
 * concurrency safe, but async suffers from "borrow checker"-itis and uses atomic reference counting
 * learning experience circa 2015 was "rough"
 * compatibility pendulum has swung towards "too stable" - many changes that "should" be made for a better language that can't be
diff --git a/docs/Commentary/index.rst b/docs/Commentary/index.rst
index 999b2ce..30f85c2 100644
--- a/docs/Commentary/index.rst
+++ b/docs/Commentary/index.rst
@@ -5,6 +5,8 @@ Every language needs an explanation of all the decisions made and possible optio
 
 .. toctree::
   :maxdepth: 2
-  :glob:
 
-  *
\ No newline at end of file
+  Language/index
+  Implementation/index
+  Libraries/index
+  Meta/index
diff --git a/docs/GettingStarted/FAQ.rst b/docs/GettingStarted/FAQ.rst
index b9b2a65..a98e716 100644
--- a/docs/GettingStarted/FAQ.rst
+++ b/docs/GettingStarted/FAQ.rst
@@ -6,23 +6,6 @@ Does it run?
 
 No, it's in the design stage. But there are Haskell experiments in the ``src/`` and ``test/`` directories that can be run.
 
-What language is the compiler written in?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-A near-term goal is to write Stroscot in itself. However, it has to generate code first. I originally picked JavaScript to start for a number of reasons:
-
-* It's the fastest interpreted language available
-* It has reasonably up-to-date syntax and features thanks to TC39
-* A lot of the :ref:`inspiring projects <inspiring-projects>` were written in JS
-* LLVM compiles to JS and there are LLVM bindings available for JS
-* TypeScript doesn't add much besides compilation overhead
-
-Since then, development has shifted to Haskell, for other reasons:
-
-* The compiler/type system prevents a lot of common errors (particularly typos, which JS doesn't detect until late)
-* A lot of other type-system-heavy projects are written in Haskell
-* I'm most familiar with Haskell.
-
 Where can I ask questions?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/Reference/Errors.rst b/docs/Reference/Errors.rst
index 1bf8dea..f463903 100644
--- a/docs/Reference/Errors.rst
+++ b/docs/Reference/Errors.rst
@@ -207,7 +207,9 @@ Missing override
     - Correct
 
   * - .. literalinclude:: errors/missing-override.txt
+         :language: none
     - .. literalinclude:: errors/missing-override-fixed.txt
+         :language: none
 
 .. include:: errors/missing-override-message.rst
 
diff --git a/docs/Reference/Modules.rst b/docs/Reference/Modules.rst
index 9ac279a..1e91743 100644
--- a/docs/Reference/Modules.rst
+++ b/docs/Reference/Modules.rst
@@ -128,7 +128,9 @@ Overrides
 
 By default, methods are scoped to their module. Every definition ``foo = a`` binds the identifier ``Module.foo``, and each module creates a new identifier. This means a use ``Module.foo`` refers to only the declarations within that module.
 
-Dispatch will resolve bare identifiers to their appropriate modules when it can be determined from context, and nondeterminism even allows some overloading. For example::
+Dispatch will resolve bare identifiers to their appropriate modules when it can be determined from context, and nondeterminism even allows some overloading. For example:
+
+.. code-block:: none
 
   A = module
     foo (x:{A}) = 1
@@ -140,10 +142,11 @@ Dispatch will resolve bare identifiers to their appropriate modules when it can
   # [1,2]
   # print (force foo)
 
-
 This defines two symbols ``A.foo : A -> {1}`` and ``B.foo : B -> {2}``, and resolves ``map foo [A,B]`` to ``[A.foo A, B.foo B]``. However, because the bare identifier ends up resolving to different symbols, there are cases where dispatch is not sufficient to resolve the ambiguity. In this example, ``force foo`` results in an error because it could be either ``A.foo`` or ``B.foo``.
 
-The ``override`` statement allows reusing an identifier defined in a different module, extending a method definition as if the clauses were all defined in the same module. This avoids the nondeterminism issues. For example with the following::
+The ``override`` statement allows reusing an identifier defined in a different module, extending a method definition as if the clauses were all defined in the same module. This avoids the nondeterminism issues. For example with the following:
+
+.. code-block:: none
 
   A = module
     foo (x:{A}) = 1
diff --git a/docs/Reference/Values.rst b/docs/Reference/Values.rst
index 95d0990..26bf440 100644
--- a/docs/Reference/Values.rst
+++ b/docs/Reference/Values.rst
@@ -106,7 +106,7 @@ Mutable arrays are a reference pointing to an immutable array. Operations are op
 
 There is also an array of mutable cells (bytes), similar to C pointers / arrays. You can do something like ``readOffset Int 0 ptr``. You can read a different type than you wrote, and it doesn't have to be aligned (although aligned accesses may be faster depending on architecture). This type is useful for low-level munging but mutable arrays are probably safer.
 
-:cite:`Tremblay` says that "allowing the size of arrays to be decided at run time [...] introduces considerable implementation problems and interferes with compile-time error checking. This feature may be of only limited value in certain applications areas." But Storscot is based on an interpeter model - so the only time the size of an array could be decided is at run-time.
+:cite:`tremblayTheoryPracticeCompiler1985` page 73 says that "allowing the size of arrays to be decided at run time [...] introduces considerable implementation problems and interferes with compile-time error checking. This feature may be of only limited value in certain applications areas." But Stroscot is based on an interpeter model - so the only time the size of an array could be decided is at run-time. Most languages these days have dynamically-sized arrays.
 
 Tensors
 -------
@@ -267,7 +267,7 @@ Sets are the mathematical definition, i.e. a function ``isElemOf : Any -> {Prese
   b = map { 1 = Present, 2 = Present, 3 = Present }
   a = set (\x -> lookup {default=Absent} b x)
 
-More notation for sets is discussed on the :ref:`Sets` page.
+More notation for sets is discussed on the `Sets`_ page.
 
 Bags
 ====
diff --git a/docs/conf.py b/docs/conf.py
index c58803c..0428166 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -13,6 +13,7 @@
 extensions = [
     'sphinx.ext.mathjax',
     'sphinx.ext.graphviz',
+    "sphinx.ext.autosectionlabel",
     'sphinxcontrib.bibtex',
     # ,'sphinxcontrib.fulltoc'
     'sphinx_rtd_theme',
@@ -27,6 +28,8 @@
 # References
 bibtex_bibfiles = ['references.bib']
 
+# Make sure cross-reference targets are unique
+autosectionlabel_prefix_document = True
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
diff --git a/docs/index.rst b/docs/index.rst
index 9288936..6476ec3 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -14,7 +14,6 @@ Links:
 Contents:
 
 .. toctree::
-  :maxdepth: 2
 
   GettingStarted/index
   HowTo/index
diff --git a/docs/references.bib b/docs/references.bib
index 91b415f..ca8b6c0 100644
--- a/docs/references.bib
+++ b/docs/references.bib
@@ -1,5 +1,5 @@
-@misc{abdelgawadNOOPDomainTheoreticModel2018,
-  title = {{{NOOP}}: {{A Domain-Theoretic Model}} of {{Nominally-Typed OOP}}},
+@misc{abdelgawadNOOPDomaintheoreticModel2018,
+  title = {{{NOOP}}: A Domain-Theoretic Model of Nominally-Typed {{OOP}}},
   shorttitle = {{{NOOP}}},
   author = {AbdelGawad, Moez and Cartwright, Robert},
   year = {2018},
@@ -16,8 +16,24 @@ @misc{abdelgawadNOOPDomainTheoreticModel2018
   keywords = {Computer Science - Logic in Computer Science,Computer Science - Programming Languages},
   note = {Comment: 23 pages}
 }
-% == BibTeX quality report for abdelgawadNOOPDomainTheoreticModel2018:
-% ? Title looks like it was stored in title-case in Zotero
+
+@article{abelUiCAAccurateThroughput2021,
+  ids = {abelAccurateThroughputPrediction2021a},
+  title = {{{uiCA}}: Accurate Throughput Prediction of Basic Blocks on Recent {{Intel}} Microarchitectures},
+  author = {Abel, Andreas and Reineke, Jan},
+  year = {2021},
+  month = jul,
+  journal = {arXiv:2107.14210 [cs]},
+  eprint = {2107.14210},
+  primaryclass = {cs},
+  url = {http://arxiv.org/abs/2107.14210},
+  urldate = {2021-07-30},
+  abstract = {Tools to predict the throughput of basic blocks on a specific microarchitecture are useful to optimize software performance and to build optimizing compilers. In recent work, several such tools have been proposed. However, the accuracy of their predictions has been shown to be relatively low. In this paper, we identify the most important factors for these inaccuracies. To a significant degree these inaccuracies are due to elements and parameters of the pipelines of recent CPUs that are not taken into account by previous tools. A primary reason for this is that the necessary details are often undocumented. In this paper, we build more precise models of relevant components by reverse engineering using microbenchmarks. Based on these models, we develop a simulator for predicting the throughput of basic blocks. In addition to predicting the throughput, our simulator also provides insights into how the code is executed. Our tool supports all Intel Core microarchitecture generations released in the last decade. We evaluate it on an improved version of the BHive benchmark suite. On many recent microarchitectures, its predictions are more accurate than the predictions of state-of-the-art tools by more than an order of magnitude.},
+  archiveprefix = {arxiv},
+  keywords = {Computer Science - Performance}
+}
+% == BibTeX quality report for abelUiCAAccurateThroughput2021:
+% ? Possibly abbreviated journal title arXiv:2107.14210 [cs]
 
 @article{abramskyGeometryInteractionLinear2002,
   title = {Geometry of Interaction and Linear Combinatory Algebras.},
@@ -52,8 +68,30 @@ @incollection{abrusciNoncommutativeProofNets1995
   note = {Proceedings of the Workshop on Linear Logic, Ithaca, New York, June 1993}
 }
 
+@article{adamsPrincipledParsingIndentationsensitive2013,
+  ids = {adamsPrincipledParsingIndentationSensitive,adamsPrincipledParsingIndentationSensitivea},
+  title = {Principled Parsing for Indentation-Sensitive Languages: Revisiting {{Landin}}'s Offside Rule},
+  shorttitle = {Principled Parsing for Indentation-Sensitive Languages},
+  author = {Adams, Michael D.},
+  year = {2013},
+  month = jan,
+  journal = {ACM SIGPLAN Notices},
+  volume = {48},
+  number = {1},
+  pages = {511--522},
+  issn = {0362-1340},
+  doi = {10.1145/2480359.2429129},
+  url = {https://doi.org/10.1145/2480359.2429129},
+  urldate = {2022-05-18},
+  abstract = {Several popular languages, such as Haskell, Python, and F\#, use the indentation and layout of code as part of their syntax. Because context-free grammars cannot express the rules of indentation, parsers for these languages currently use ad hoc techniques to handle layout. These techniques tend to be low-level and operational in nature and forgo the advantages of more declarative specifications like context-free grammars. For example, they are often coded by hand instead of being generated by a parser generator. This paper presents a simple extension to context-free grammars that can express these layout rules, and derives GLR and LR(k) algorithms for parsing these grammars. These grammars are easy to write and can be parsed efficiently. Examples for several languages are presented, as are benchmarks showing the practical efficiency of these algorithms.},
+  keywords = {indentation,offside rule,parsing}
+}
+% == BibTeX quality report for adamsPrincipledParsingIndentationsensitive2013:
+% ? unused Journal abbreviation ("SIGPLAN Not.")
+% ? unused Library catalog ("January 2013")
+
 @article{albertResourceAnalysisDriven2019,
-  title = {Resource {{Analysis}} Driven by ({{Conditional}}) {{Termination Proofs}}},
+  title = {Resource Analysis Driven by (Conditional) Termination Proofs},
   author = {Albert, Elvira and Bofill, Miquel and Borralleras, Cristina and {Martin-Martin}, Enrique and Rubio, Albert},
   year = {2019},
   month = sep,
@@ -72,6 +110,27 @@ @article{albertResourceAnalysisDriven2019
   note = {Comment: Paper presented at the 35th International Conference on Logic Programming (ICLP 2019), Las Cruces, New Mexico, USA, 20-25 September 2019, 16 pages}
 }
 
+@article{albrechtSoftwareFunctionSource1983,
+  title = {Software Function, Source Lines of Code, and Development Effort Prediction: A Software Science Validation},
+  shorttitle = {Software Function, Source Lines of Code, and Development Effort Prediction},
+  author = {Albrecht, A.J. and Gaffney, J.E.},
+  year = {1983},
+  month = nov,
+  journal = {IEEE Transactions on Software Engineering},
+  volume = {SE-9},
+  number = {6},
+  pages = {639--648},
+  issn = {0098-5589},
+  doi = {10.1109/TSE.1983.235271},
+  url = {http://ieeexplore.ieee.org/document/1703110/},
+  urldate = {2023-12-28},
+  abstract = {One of the most important problems faced by software developers and users is the prediction of the size of a programming system and its development effort. As an alternative to "size," one might deal with a measure of the "function" that the software is to perform. Albrecht [1] has developed a methodology to estimate the amount of the "function" the software is to perform, in terms of the data it is to use (absorb) and to generate (produce). The "function" is quantified as "function points," essentially, a weighted sum of the numbers of "inputs," "outputs," master files," and "inquiries" provided to, or generated by, the software. This paper demonstrates the equivalence between Albrecht's external input/output data flow representative of a program (the "function points" metric) and Halstead's [2] "software science" or "software linguistics" model of a program as well as the "soft content" variation of Halstead's model suggested by Gaffney [7].},
+  langid = {english}
+}
+% == BibTeX quality report for albrechtSoftwareFunctionSource1983:
+% ? unused Journal abbreviation ("IIEEE Trans. Software Eng.")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @article{allenCatalogueOptimizingTransformations1971,
   title = {A Catalogue of Optimizing Transformations},
   author = {Allen, Frances E and Cocke, John},
@@ -143,6 +202,24 @@ @article{appelGarbageCollectionCan1987
 % == BibTeX quality report for appelGarbageCollectionCan1987:
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@article{appelSSAFunctionalProgramming1998,
+  title = {{{SSA}} Is Functional Programming},
+  author = {Appel, Andrew W.},
+  year = {1998},
+  month = apr,
+  journal = {ACM SIGPLAN Notices},
+  volume = {33},
+  number = {4},
+  pages = {17--20},
+  issn = {0362-1340},
+  doi = {10.1145/278283.278285},
+  url = {https://dl.acm.org/doi/10.1145/278283.278285},
+  urldate = {2023-05-03}
+}
+% == BibTeX quality report for appelSSAFunctionalProgramming1998:
+% ? unused Journal abbreviation ("SIGPLAN Not.")
+% ? unused Library catalog ("ACM Digital Library")
+
 @incollection{aptWhyOccurcheckNot1992,
   title = {Why the Occur-Check Is Not a Problem},
   booktitle = {Programming {{Language Implementation}} and {{Logic Programming}}},
@@ -226,8 +303,78 @@ @book{aspertiOptimalImplementationFunctional1999
 % ? unused Library catalog ("ACM Digital Library")
 % ? unused Number of pages ("408")
 
+@article{aspertiParallelBetaReduction2001,
+  title = {Parallel Beta Reduction Is Not Elementary Recursive},
+  author = {Asperti, Andrea and Mairson, Harry G.},
+  year = {2001},
+  month = oct,
+  journal = {Information and Computation},
+  volume = {170},
+  number = {1},
+  pages = {49--80},
+  issn = {08905401},
+  doi = {10.1006/inco.2001.2869},
+  url = {https://linkinghub.elsevier.com/retrieve/pii/S089054010192869X},
+  urldate = {2020-06-15},
+  langid = {english}
+}
+% == BibTeX quality report for aspertiParallelBetaReduction2001:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@techreport{baikCOCOMOIIModel2000,
+  title = {{{COCOMO II}} Model Manual 2000.0},
+  author = {Baik, Jongmoon and Horowitz, Ellis},
+  year = {2000},
+  institution = {{University of Southern California}},
+  url = {http://web.archive.org/web/20181123110403/http://csse.usc.edu/csse/research/COCOMOII/cocomo2000.0/CII_modelman2000.0.pdf},
+  urldate = {2023-12-26}
+}
+
+@article{balabonskiUnifiedApproachFully2011,
+  title = {A Unified Approach to Fully Lazy Sharing},
+  author = {Balabonski, Thibaut},
+  year = {2011},
+  month = oct,
+  abstract = {We give an axiomatic presentation of sharing-via-labelling for weak λ-calculi, that allows to formally compare many different approaches to fully lazy sharing, and obtain two important results. We prove that the known implementations of full laziness are all equivalent in terms of the number of β-reductions performed, although they behave differently regarding the duplication of terms. We establish a link between the optimality theories of weak λ-calculi and first-order rewriting systems by expressing fully lazy λ-lifting in our framework, thus emphasizing the first-order essence of weak reduction.},
+  langid = {english}
+}
+% == BibTeX quality report for balabonskiUnifiedApproachFully2011:
+% Missing required field 'journal'
+% ? unused Library catalog ("Zotero")
+
+@phdthesis{barikErrorMessagesRational2018,
+  title = {Error {{Messages}} as {{Rational Reconstructions}}},
+  author = {Barik, Titus},
+  year = {2018},
+  langid = {english},
+  school = {North Carolina State University}
+}
+% == BibTeX quality report for barikErrorMessagesRational2018:
+% ? Title looks like it was stored in title-case in Zotero
+% ? unused Library catalog ("Zotero")
+
+@inproceedings{barikHowShouldCompilers2018,
+  title = {How Should Compilers Explain Problems to Developers?},
+  booktitle = {Proceedings of the 2018 26th {{ACM Joint Meeting}} on {{European Software Engineering Conference}} and {{Symposium}} on the {{Foundations}} of {{Software Engineering}}},
+  author = {Barik, Titus and Ford, Denae and {Murphy-Hill}, Emerson and Parnin, Chris},
+  year = {2018},
+  month = oct,
+  pages = {633--643},
+  publisher = {{ACM}},
+  address = {{Lake Buena Vista FL USA}},
+  doi = {10.1145/3236024.3236040},
+  url = {https://dl.acm.org/doi/10.1145/3236024.3236040},
+  urldate = {2022-10-15},
+  abstract = {Compilers primarily give feedback about problems to developers through the use of error messages. Unfortunately, developers routinely find these messages to be confusing and unhelpful. In this paper, we postulate that because error messages present poor explanations, theories of explanation—such as Toulmin’s model of argument—can be applied to improve their quality. To understand how compilers should present explanations to developers, we conducted a comparative evaluation with 68 professional software developers and an empirical study of compiler error messages found in Stack Overflow questions across seven different programming languages.},
+  isbn = {978-1-4503-5573-5},
+  langid = {english}
+}
+% == BibTeX quality report for barikHowShouldCompilers2018:
+% ? unused Conference name ("ESEC/FSE '18: 26th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @inproceedings{bauerIndentationSimplyMatter2019,
-  title = {Indentation: {{Simply}} a {{Matter}} of {{Style}} or {{Support}} for {{Program Comprehension}}?},
+  title = {Indentation: Simply a Matter of Style or Support for Program Comprehension?},
   shorttitle = {Indentation},
   booktitle = {2019 {{IEEE}}/{{ACM}} 27th {{International Conference}} on {{Program Comprehension}} ({{ICPC}})},
   author = {Bauer, Jennifer and Siegmund, Janet and Peitek, Norman and Hofmeister, Johannes C. and Apel, Sven},
@@ -245,7 +392,27 @@ @inproceedings{bauerIndentationSimplyMatter2019
 }
 % == BibTeX quality report for bauerIndentationSimplyMatter2019:
 % ? Unsure about the formatting of the booktitle
-% ? Title looks like it was stored in title-case in Zotero
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@inproceedings{beckerCompilerErrorMessages2019,
+  title = {Compiler Error Messages Considered Unhelpful: {{The}} Landscape of Text-Based Programming Error Message Research},
+  shorttitle = {Compiler Error Messages Considered Unhelpful},
+  booktitle = {Proceedings of the {{Working Group Reports}} on {{Innovation}} and {{Technology}} in {{Computer Science Education}}},
+  author = {Becker, Brett A. and Denny, Paul and Pettit, Raymond and Bouchard, Durell and Bouvier, Dennis J. and Harrington, Brian and Kamil, Amir and Karkare, Amey and McDonald, Chris and Osera, Peter-Michael and Pearce, Janice L. and Prather, James},
+  year = {2019},
+  month = dec,
+  pages = {177--210},
+  publisher = {{ACM}},
+  address = {{Aberdeen Scotland Uk}},
+  doi = {10.1145/3344429.3372508},
+  url = {https://dl.acm.org/doi/10.1145/3344429.3372508},
+  urldate = {2021-03-30},
+  abstract = {Diagnostic messages generated by compilers and interpreters such as syntax error messages have been researched for over half of a century. Unfortunately, these messages which include error, warning, and run-time messages, present substantial difficulty and could be more effective, particularly for novices. Recent years have seen an increased number of papers in the area including studies on the effectiveness of these messages, improving or enhancing them, and their usefulness as a part of programming process data that can be used to predict student performance, track student progress, and tailor learning plans. Despite this increased interest, the long history of literature is quite scattered and has not been brought together in any digestible form.},
+  isbn = {978-1-4503-7567-2},
+  langid = {english}
+}
+% == BibTeX quality report for beckerCompilerErrorMessages2019:
+% ? unused Conference name ("ITiCSE '19: Innovation and Technology in Computer Science Education")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
 @article{beckerWhatDoesSaying2021,
@@ -268,19 +435,95 @@ @article{beckerWhatDoesSaying2021
 % ? unused Journal abbreviation ("Commun. ACM")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@article{beierHowDoesTypeface2013,
+  title = {How Does Typeface Familiarity Affect Reading Performance and Reader Preference?},
+  author = {Beier, Sofie and Larson, Kevin},
+  year = {2013},
+  month = oct,
+  journal = {Information Design Journal},
+  volume = {20},
+  number = {1},
+  pages = {16--31},
+  issn = {0142-5471, 1569-979X},
+  doi = {10.1075/idj.20.1.02bei},
+  url = {http://www.jbe-platform.com/content/journals/10.1075/idj.20.1.02bei},
+  urldate = {2023-01-13},
+  abstract = {Some typographers have proposed that typeface familiarity is defined by the amount of time that a reader has been exposed to a typeface design, while other typographers have proposed that familiarity is defined by the commonalities in letter shapes. These two hypotheses were tested by measuring the reading speed and preferences of participants. Participants were tested twice with common and uncommon letter shapes, once before and once after spending 20 minutes reading a story with the font. The results indicate that the exposure period has an effect on the speed of reading, but the uncommon letter shapes did not. Readers did not like the uncommon letter shapes. This has implications for the selection of type and the design of future typefaces.},
+  langid = {english}
+}
+% == BibTeX quality report for beierHowDoesTypeface2013:
+% ? unused Journal abbreviation ("IDJ")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@phdthesis{beierTypefaceLegibilityDefining2009,
+  type = {Thesis},
+  title = {Typeface Legibility: {{Towards}} Defining Familiarity},
+  shorttitle = {Typeface {{Legibility}}},
+  author = {Beier, Sofie},
+  year = {2009},
+  month = may,
+  url = {https://researchonline.rca.ac.uk/957/},
+  urldate = {2023-02-11},
+  abstract = {The aim of the project is to investigate the influence of fa- miliarity on reading. Three new fonts were created in order to examine the familiarity of fonts that readers could not have seen before. Each of the new fonts contains lowercase letters with fa- miliar and unfamiliar skeleton variations. The different skeleton variations were tested with distance threshold and time thresh- old methods in order to account for differences in visibility. This investigation helped create final typeface designs where the fa- miliar and unfamiliar skeleton variations have roughly similar and good performance. The typefaces were later applied as the test material in the familiarity investigation. Some typographers have proposed that familiarity means the amount of time that a reader has been exposed to a typeface design, while other typographers have proposed that familiarity is the commonalities in letterforms. These two hypotheses were tested by measuring the reading speed and preference of partici- pants, as they read fonts that had either common or uncommon letterforms, the fonts were then re-measured after an exposure period. The results indicate that exposure has an immediate ef- fect on the speed of reading, but that unfamiliar letter features only have an effect of preference and not on reading speed. By combining the craftsmen’s knowledge of designing with the methods of experimental research, the project takes a new step forward towards a better understanding of how different type- faces can influence the reading process.},
+  langid = {english},
+  school = {Royal College of Art}
+}
+% == BibTeX quality report for beierTypefaceLegibilityDefining2009:
+% ? unused Library catalog ("researchonline.rca.ac.uk")
+% ? unused Number of pages ("268")
+
 @misc{ben-amramNotesPippengerComparison1996,
-  title = {Notes on {{Pippenger}}'s {{Comparison}} of {{Pure}} and {{Impure LISP}}},
+  title = {Notes on {{Pippenger}}'s Comparison of Pure and Impure {{LISP}}},
   author = {{Ben-amram}, Amir M.},
   year = {1996},
   url = {citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.55.3024&rep=rep1&type=pdf},
   abstract = {any impure-LISP program running in time t can be compiled into a pure-LISP program running in time O(t log t): first implement the impure-LISP operations using an array of size at most t. Then represent the array as a balanced binary tree, which can be done in pure LISP. The main result of the paper is a lower-bound theorem. It can roughly be described as follows. A problem P is presented, that can be solved in linear time, t = O(n), in impure  LISP. It is proved that for any pure-LISP program p for P , the worst-case time complexity is \textbackslash Omega\textbackslash Gamma n log n). 2 Restrictions of the Proof and Open Problems  The lower-bound result requires two restrictive assumptions. We first describe the restrictions and their technical implications. Next, we discuss the two questions that}
 }
 % == BibTeX quality report for ben-amramNotesPippengerComparison1996:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("CiteSeer")
 
+@article{bergerImpactProgrammingLanguages2019,
+  title = {On the {{Impact}} of {{Programming Languages}} on {{Code Quality}}: {{A Reproduction Study}}},
+  shorttitle = {On the {{Impact}} of {{Programming Languages}} on {{Code Quality}}},
+  author = {Berger, Emery D. and Hollenbeck, Celeste and Maj, Petr and Vitek, Olga and Vitek, Jan},
+  year = {2019},
+  month = dec,
+  journal = {ACM Transactions on Programming Languages and Systems},
+  volume = {41},
+  number = {4},
+  pages = {1--24},
+  issn = {0164-0925, 1558-4593},
+  doi = {10.1145/3340571},
+  url = {https://dl.acm.org/doi/10.1145/3340571},
+  urldate = {2023-12-27},
+  abstract = {In a 2014 article, Ray, Posnett, Devanbu, and Filkov claimed to have uncovered a statistically significant association between 11 programming languages and software defects in 729 projects hosted on GitHub. Specifically, their work answered four research questions relating to software defects and programming languages. With data and code provided by the authors, the present article first attempts to conduct an experimental repetition of the original study. The repetition is only partially successful, due to missing code and issues with the classification of languages. The second part of this work focuses on their main claim, the association between bugs and languages, and performs a complete, independent reanalysis of the data and of the statistical modeling steps undertaken by Ray et al. in 2014. This reanalysis uncovers a number of serious flaws that reduce the number of languages with an association with defects down from 11 to only 4. Moreover, the practical effect size is exceedingly small. These results thus undermine the conclusions of the original study. Correcting the record is important, as many subsequent works have cited the 2014 article and have asserted, without evidence, a causal link between the choice of programming language for a given task and the number of software defects. Causation is not supported by the data at hand; and, in our opinion, even after fixing the methodological flaws we uncovered, too many unaccounted sources of bias remain to hope for a meaningful comparison of bug rates across languages.},
+  langid = {english}
+}
+% == BibTeX quality report for bergerImpactProgrammingLanguages2019:
+% ? Title looks like it was stored in title-case in Zotero
+% ? unused Journal abbreviation ("ACM Trans. Program. Lang. Syst.")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@incollection{bergeronSystemsProgrammingLanguages1972,
+  title = {Systems {{Programming Languages}}},
+  booktitle = {Advances in {{Computers}}},
+  author = {Bergeron, R.D. and Gannon, J.D. and Shecter, D.P. and Tompa, F.W. and Dam, A. Van},
+  year = {1972},
+  volume = {12},
+  pages = {175--284},
+  publisher = {{Elsevier}},
+  doi = {10.1016/S0065-2458(08)60510-0},
+  url = {https://linkinghub.elsevier.com/retrieve/pii/S0065245808605100},
+  urldate = {2023-12-23},
+  isbn = {978-0-12-012112-0},
+  langid = {english}
+}
+% == BibTeX quality report for bergeronSystemsProgrammingLanguages1972:
+% ? Title looks like it was stored in title-case in Zotero
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @inproceedings{bhatotiaIThreadsThreadingLibrary2015,
-  title = {{{iThreads}}: {{A Threading Library}} for {{Parallel Incremental Computation}}},
+  title = {{{iThreads}}: A Threading Library for Parallel Incremental Computation},
   shorttitle = {{{iThreads}}},
   booktitle = {Proceedings of the {{Twentieth International Conference}} on {{Architectural Support}} for {{Programming Languages}} and {{Operating Systems}} - {{ASPLOS}} '15},
   author = {Bhatotia, Pramod and Fonseca, Pedro and Acar, Umut A. and Brandenburg, Björn B. and Rodrigues, Rodrigo},
@@ -295,10 +538,28 @@ @inproceedings{bhatotiaIThreadsThreadingLibrary2015
   langid = {english}
 }
 % == BibTeX quality report for bhatotiaIThreadsThreadingLibrary2015:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Conference name ("the Twentieth International Conference")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@inproceedings{billotStructureSharedForests1989,
+  title = {The Structure of Shared Forests in Ambiguous Parsing},
+  booktitle = {Proceedings of the 27th Annual Meeting on {{Association}} for {{Computational Linguistics}}},
+  author = {Billot, Sylvie and Lang, Bernard},
+  year = {1989},
+  month = jun,
+  series = {{{ACL}} '89},
+  pages = {143--151},
+  publisher = {{Association for Computational Linguistics}},
+  address = {{USA}},
+  doi = {10.3115/981623.981641},
+  url = {https://dl.acm.org/doi/10.3115/981623.981641},
+  urldate = {2023-05-22},
+  abstract = {The Context-Free backbone of some natural language analyzers produces all possible CF parses as some kind of shared forest, from which a single tree is to be chosen by a disambiguation process that may be based on the finer features of the language. We study the structure of these forests with respect to optimality of sharing, and in relation with the parsing schema used to produce them. In addition to a theoretical and experimental framework for studying these issues, the main results presented are:- sophistication in chart parsing schemata (e.g. use of look-ahed) may reduce time and space efficiency instead of improving it,- there is a shared forest structure with at most cubic size for any CF grammar,- when O(n3) complexity is required, the shape of a shared forest is dependent on the parsing schema used.Though analyzed on CF grammars for simplicity, these results extend to more complex formalisms such as unification based grammars.},
+  keywords = {Ambiguity,Chart Parsing,Context-Free Parsing,Dynamic Programming,Earley Parsing,Parse Forest,Parse Tree,Parsing Schemata,Parsing Strategies}
+}
+% == BibTeX quality report for billotStructureSharedForests1989:
+% ? unused Library catalog ("ACM Digital Library")
+
 @article{birdMoreHasteLess1997,
   title = {More Haste, Less Speed: Lazy versus Eager Evaluation},
   shorttitle = {More Haste, Less Speed},
@@ -319,7 +580,7 @@ @article{birdMoreHasteLess1997
 }
 
 @inproceedings{blaisdellNonassociativeNoncommutativeMultimodal2022,
-  title = {Non-Associative, {{Non-commutative Multi-modal Linear Logic}}},
+  title = {Non-Associative, Non-Commutative Multi-Modal Linear Logic},
   booktitle = {Automated {{Reasoning}}},
   author = {Blaisdell, Eben and Kanovich, Max and Kuznetsov, Stepan L. and Pimentel, Elaine and Scedrov, Andre},
   editor = {Blanchette, Jasmin and Kovács, Laura and Pattinson, Dirk},
@@ -337,6 +598,23 @@ @inproceedings{blaisdellNonassociativeNoncommutativeMultimodal2022
 % ? Unsure about the formatting of the booktitle
 % ? unused Library catalog ("Springer Link")
 
+@book{boehmSoftwareEngineeringEconomics1981,
+  title = {Software Engineering Economics},
+  author = {Boehm, Barry W.},
+  year = {1981},
+  publisher = {{Englewood Cliffs, N.J. : Prentice-Hall}},
+  url = {http://archive.org/details/softwareengineer0000boeh},
+  urldate = {2023-12-29},
+  abstract = {xxvii, 767 p. : 25 cm. --; Includes indexes; Bibliography: p. 733-749},
+  collaborator = {{Internet Archive}},
+  isbn = {978-0-13-822122-5},
+  langid = {english},
+  keywords = {Software engineering -- Economic aspects}
+}
+% == BibTeX quality report for boehmSoftwareEngineeringEconomics1981:
+% ? unused Library catalog ("Internet Archive")
+% ? unused Number of pages ("812")
+
 @inproceedings{bolingbrokeSupercompilationEvaluation2010,
   title = {Supercompilation by Evaluation},
   booktitle = {Proceedings of the Third {{ACM Haskell}} Symposium on {{Haskell}}},
@@ -381,6 +659,48 @@ @inproceedings{bolingbrokeTypesAreCalling2009
 % ? unused Conference name ("the 2nd ACM SIGPLAN symposium")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@inproceedings{borningClassesPrototypesObjectoriented1986,
+  title = {Classes versus Prototypes in Object-Oriented Languages},
+  booktitle = {Proceedings of 1986 {{ACM Fall}} Joint Computer Conference},
+  author = {Borning, A. H.},
+  year = {1986},
+  month = nov,
+  series = {{{ACM}} '86},
+  pages = {36--40},
+  publisher = {{IEEE Computer Society Press}},
+  address = {{Washington, DC, USA}},
+  urldate = {2023-03-02},
+  isbn = {978-0-8186-4743-7}
+}
+% == BibTeX quality report for borningClassesPrototypesObjectoriented1986:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("ACM Digital Library")
+
+@article{boucherTrojanSourceInvisible,
+  title = {Trojan Source: Invisible Vulnerabilities},
+  author = {Boucher, Anderson},
+  pages = {15},
+  abstract = {We present a new type of attack in which source code is maliciously encoded so that it appears different to a compiler and to the human eye. This attack exploits subtleties in text-encoding standards such as Unicode to produce source code whose tokens are logically encoded in a different order from the one in which they are displayed, leading to vulnerabilities that cannot be perceived directly by human code reviewers. ‘Trojan Source’ attacks, as we call them, pose an immediate threat both to first-party software and of supply-chain compromise across the industry. We present working examples of Trojan-Source attacks in C, C++, C\#, JavaScript, Java, Rust, Go, and Python. We propose definitive compiler-level defenses, and describe other mitigating controls that can be deployed in editors, repositories, and build pipelines while compilers are upgraded to block this attack.},
+  langid = {english}
+}
+% == BibTeX quality report for boucherTrojanSourceInvisible:
+% Missing required field 'journal'
+% Missing required field 'year'
+% ? unused Library catalog ("Zotero")
+
+@article{brachthauserParsingFirstclassDerivatives,
+  ids = {brachthaeuserParsingFirstClassDerivatives},
+  title = {Parsing with First-Class Derivatives},
+  author = {Brachthauser, Jonathan Immanuel and Rendel, Tillmann and Ostermann, Klaus},
+  pages = {19},
+  abstract = {Brzozowski derivatives, well known in the context of regular expressions, have recently been rediscovered to give a simplified explanation to parsers of context-free languages. We add derivatives as a novel first-class feature to a standard parser combinator language. First-class derivatives enable an inversion of the control flow, allowing to implement modular parsers for languages that previously required separate preprocessing steps or cross-cutting modifications of the parsers. We show that our framework offers new opportunities for reuse and supports a modular definition of interesting use cases of layout-sensitive parsing.},
+  langid = {english}
+}
+% == BibTeX quality report for brachthauserParsingFirstclassDerivatives:
+% Missing required field 'journal'
+% Missing required field 'year'
+% ? unused Library catalog ("Zotero")
+
 @article{brausseCDCLstyleCalculusSolving2019,
   title = {A {{CDCL-style}} Calculus for Solving Non-Linear Constraints},
   author = {Brauße, Franz and Korovin, Konstantin and Korovina, Margarita and Müller, Norbert Th},
@@ -401,7 +721,7 @@ @article{brausseCDCLstyleCalculusSolving2019
 % ? Possibly abbreviated journal title arXiv:1905.09227 [cs]
 
 @misc{brightProgrammingLanguageIdeas2022,
-  title = {Programming {{Language Ideas That Work And Don}}'t {{Work}}},
+  title = {Programming Language Ideas That Work and Don't Work},
   year = {2022},
   month = jun,
   publisher = {{Code Europe}},
@@ -411,10 +731,25 @@ @misc{brightProgrammingLanguageIdeas2022
   collaborator = {Bright, Walter}
 }
 % == BibTeX quality report for brightProgrammingLanguageIdeas2022:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("YouTube")
 % ? unused Running time ("57:09")
 
+@book{brooksMythicalManmonth1995,
+  title = {The Mythical Man-Month},
+  author = {Brooks, Jr., Frederick P.},
+  year = {1995},
+  month = aug,
+  edition = {Anniversary},
+  url = {http://archive.org/details/MythicalManMonth},
+  urldate = {2023-12-30},
+  abstract = {Few books on software project management have been as influential and timeless as~The Mythical Man-Month. With a blend of software engineering facts and thought-provoking opinions, Fred Brooks offers insight for anyone managing complex projects. These essays draw from his experience as project manager for the IBM System/360 computer family and then for OS/360, its massive software system. Now, 20 years after the initial publication of his book, Brooks has revisited his original ideas and added new thoughts and advice, both for readers already familiar with his work and for readers discovering it for the first time. ~ The added chapters contain (1) a crisp condensation of all the propositions asserted in the original book, including Brooks' central argument in~The Mythical Man-Month:~that large programming projects suffer management problems different from small ones due to the division of labor; that the conceptual integrity of the product is therefore critical; and that it is difficult but possible to achieve this unity; (2) Brooks' view of these propositions a generation later; (3) a reprint of his classic 1986 paper "No Silver Bullet"; and (4) today's thoughts on the 1986 assertion, "There will be no silver bullet within ten years."},
+  langid = {english},
+  keywords = {software}
+}
+% == BibTeX quality report for brooksMythicalManmonth1995:
+% Missing required field 'publisher'
+% ? unused Library catalog ("Internet Archive")
+
 @article{bucciarelliGraphEasySets2016,
   title = {Graph Easy Sets of Mute Lambda Terms},
   author = {Bucciarelli, A. and Carraro, A. and Favro, G. and Salibra, A.},
@@ -526,7 +861,7 @@ @phdthesis{byrdRelationalProgrammingMinikanren2009
 % ? unused Type ("PhD")
 
 @article{caiDistillingRealCost2022,
-  title = {Distilling the {{Real Cost}} of {{Production Garbage Collectors}}},
+  title = {Distilling the Real Cost of Production Garbage Collectors},
   author = {Cai, Zixian and Blackburn, Stephen M and Bond, Michael D and Maas, Martin},
   year = {2022},
   journal = {IEEE International Symposium on Performance Analysis of Systems and Software},
@@ -536,11 +871,10 @@ @article{caiDistillingRealCost2022
   langid = {english}
 }
 % == BibTeX quality report for caiDistillingRealCost2022:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Zotero")
 
-@article{castagnaCovarianceControvarianceFresh2020,
-  title = {Covariance and {{Controvariance}}: A Fresh Look at an Old Issue (a Primer in Advanced Type Systems for Learning Functional Programmers)},
+@article{castagnaCovarianceContravarianceFresh2020,
+  title = {Covariance and Contravariance: A Fresh Look at an Old Issue (a Primer in Advanced Type Systems for Learning Functional Programmers)},
   shorttitle = {Covariance and {{Controvariance}}},
   author = {Castagna, Giuseppe},
   year = {2020},
@@ -557,7 +891,7 @@ @article{castagnaCovarianceControvarianceFresh2020
   archiveprefix = {arxiv},
   keywords = {Computer Science - Programming Languages}
 }
-% == BibTeX quality report for castagnaCovarianceControvarianceFresh2020:
+% == BibTeX quality report for castagnaCovarianceContravarianceFresh2020:
 % ? Possibly abbreviated journal title arXiv:1809.01427 [cs]
 
 @article{chenComputationalInterpretationCompact2021,
@@ -581,6 +915,27 @@ @article{chenComputationalInterpretationCompact2021
 % ? unused Journal abbreviation ("Proc. ACM Program. Lang.")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@article{clarkDepartmentDefenseSoftware2017,
+  title = {Department of {{Defense}} Software Factbook},
+  author = {Clark, Bradford and Miller, Christopher and {etc.}},
+  year = {2017},
+  month = apr,
+  langid = {english}
+}
+% == BibTeX quality report for clarkDepartmentDefenseSoftware2017:
+% Missing required field 'journal'
+% ? unused Library catalog ("Zotero")
+
+@article{clarkDoDSoftwareFactbook2015,
+  title = {{{DoD}} Software Factbook},
+  author = {Clark, Brad},
+  year = {2015},
+  langid = {english}
+}
+% == BibTeX quality report for clarkDoDSoftwareFactbook2015:
+% Missing required field 'journal'
+% ? unused Library catalog ("Zotero")
+
 @incollection{clarkNegationFailure1978,
   title = {Negation as {{Failure}}},
   booktitle = {Logic and {{Data Bases}}},
@@ -623,8 +978,8 @@ @inproceedings{clickPauselessGCAlgorithm2005
 % ? unused Library catalog ("DOI.org (Crossref)")
 
 @book{coburnChangeFunctionWhy2006,
-  title = {The {{Change Function}}: {{Why Some Technologies Take Off}} and {{Others Crash}} and {{Burn}}},
-  shorttitle = {The {{Change Function}}},
+  title = {The Change Function: Why Some Technologies Take off and Others Crash and Burn},
+  shorttitle = {The Change Function},
   author = {Coburn, Pip},
   year = {2006},
   publisher = {{Penguin}},
@@ -635,7 +990,6 @@ @book{coburnChangeFunctionWhy2006
   keywords = {Technology \& Engineering / Industrial Technology}
 }
 % == BibTeX quality report for coburnChangeFunctionWhy2006:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Google Books")
 % ? unused Number of pages ("252")
 
@@ -657,6 +1011,19 @@ @article{coniglioEqualityLinearLogic2002
 % == BibTeX quality report for coniglioEqualityLinearLogic2002:
 % ? unused Library catalog ("JSTOR")
 
+@phdthesis{cookDenotationalSemanticsInheritance1989,
+  title = {A Denotational Semantics of Inheritance},
+  author = {Cook, William R},
+  year = {1989},
+  month = may,
+  url = {https://www.cs.utexas.edu/~wcook/papers/thesis/cook89.pdf},
+  langid = {english},
+  school = {Brown University}
+}
+% == BibTeX quality report for cookDenotationalSemanticsInheritance1989:
+% ? unused Library catalog ("Zotero")
+% ? unused Type ("PhD")
+
 @inproceedings{cookInheritanceNotSubtyping1989,
   title = {Inheritance Is Not Subtyping},
   booktitle = {Proceedings of the 17th {{ACM SIGPLAN-SIGACT}} Symposium on {{Principles}} of Programming Languages},
@@ -677,8 +1044,19 @@ @inproceedings{cookInheritanceNotSubtyping1989
 % ? Unsure about the formatting of the booktitle
 % ? unused Library catalog ("ACM Digital Library")
 
+@article{cookUnderstandingDataAbstraction2009,
+  title = {On Understanding Data Abstraction, Revisited},
+  author = {Cook, William R},
+  year = {2009},
+  abstract = {In 1985 Luca Cardelli and Peter Wegner, my advisor, published an ACM Computing Surveys paper called “On understanding types, data abstraction, and polymorphism”. Their work kicked off a flood of research on semantics and type theory for object-oriented programming, which continues to this day. Despite 25 years of research, there is still widespread confusion about the two forms of data abstraction, abstract data types and objects. This essay attempts to explain the differences and also why the differences matter.},
+  langid = {english}
+}
+% == BibTeX quality report for cookUnderstandingDataAbstraction2009:
+% Missing required field 'journal'
+% ? unused Library catalog ("Zotero")
+
 @phdthesis{coppolaComplexityOptimalReduction2002,
-  title = {On the {{Complexity}} of {{Optimal Reduction}} of {{Functional Programming Languages}}},
+  title = {On the Complexity of Optimal Reduction of Functional Programming Languages},
   author = {Coppola, Paolo},
   year = {2002},
   month = feb,
@@ -687,7 +1065,6 @@ @phdthesis{coppolaComplexityOptimalReduction2002
   school = {Università degli Studi di Udine}
 }
 % == BibTeX quality report for coppolaComplexityOptimalReduction2002:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Zotero")
 
 @phdthesis{corbynPracticalStaticMemory2020,
@@ -737,6 +1114,19 @@ @article{crolardFormulaeastypesInterpretationSubtractive2004
 % ? unused Journal abbreviation ("J Logic Computation")
 % ? unused Library catalog ("academic.oup.com")
 
+@article{CrossTalkCostEstimation2005,
+  title = {{{CrossTalk}}: Cost Estimation},
+  year = {2005},
+  month = apr,
+  volume = {18},
+  number = {4},
+  url = {https://apps.dtic.mil/sti/pdfs/ADA487403.pdf},
+  urldate = {2023-12-26}
+}
+% == BibTeX quality report for CrossTalkCostEstimation2005:
+% Missing required field 'author'
+% Missing required field 'journal'
+
 @misc{dahlCommonBaseLanguage1970,
   title = {Common {{Base Language}}},
   author = {Dahl, Ole-Johan and Myhrhaug, Bjørn and Nygaard, Kristen},
@@ -749,6 +1139,43 @@ @misc{dahlCommonBaseLanguage1970
 % == BibTeX quality report for dahlCommonBaseLanguage1970:
 % ? Title looks like it was stored in title-case in Zotero
 
+@inproceedings{danosStructureExponentialsUncovering1993,
+  title = {The Structure of Exponentials: Uncovering the Dynamics of Linear Logic Proofs},
+  shorttitle = {The Structure of Exponentials},
+  booktitle = {Proceedings of the {{Third Kurt Gödel Colloquium}} on {{Computational Logic}} and {{Proof Theory}}},
+  author = {Danos, Vincent and Joinet, Jean-Baptiste and Schellinx, Harold},
+  editor = {Gottlob, G. and Leitsch, A. and Mundici, D.},
+  year = {1993},
+  month = aug,
+  pages = {159--171},
+  publisher = {{Springer-Verlag LNCS 348}},
+  address = {{Brno, Czech Republic}},
+  url = {https://eprints.illc.uva.nl/1334/},
+  langid = {english}
+}
+% == BibTeX quality report for danosStructureExponentialsUncovering1993:
+% ? unused Library catalog ("eprints.illc.uva.nl")
+
+@article{davidsonDesignApplicationRetargetable1980,
+  title = {The Design and Application of a Retargetable Peephole Optimizer},
+  author = {Davidson, Jack W. and Fraser, Christopher W.},
+  year = {1980},
+  month = apr,
+  journal = {ACM Transactions on Programming Languages and Systems},
+  volume = {2},
+  number = {2},
+  pages = {191--202},
+  issn = {0164-0925, 1558-4593},
+  doi = {10.1145/357094.357098},
+  url = {https://dl.acm.org/doi/10.1145/357094.357098},
+  urldate = {2021-07-30},
+  abstract = {Peephole optimizers improve object code by replacing certain sequences of instructions with better sequences. This paper describes PO, a peephole optimizer that uses a symbolic machine description to simulate pairs of adjacent instructions, replacing them, where possible, with an equivalent sing!e instruction. As a result of this organization, PO is machine independent and can be described formally and concisely: when PO is finished, no instruction, and no pair of adjacent instructions, can be replaced with a cheaper single instruction that has the same effect. This thoroughness allows PO to relieve code generators of much case analysis; for example, they might produce only load/add-register sequences and rely on PO to, where possible, discard them in favor of add-memory, add-immediate, or increment instructions. Experiments indicate that naive code generators can give good code if used with PO.},
+  langid = {english}
+}
+% == BibTeX quality report for davidsonDesignApplicationRetargetable1980:
+% ? unused Journal abbreviation ("ACM Trans. Program. Lang. Syst.")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @article{delawareNarcissusCorrectbyconstructionDerivation2019,
   title = {Narcissus: Correct-by-Construction Derivation of Decoders and Encoders from Binary Formats},
   shorttitle = {Narcissus},
@@ -769,6 +1196,22 @@ @article{delawareNarcissusCorrectbyconstructionDerivation2019
 % ? unused Journal abbreviation ("Proc. ACM Program. Lang.")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@inproceedings{deloreyProgrammingLanguagesAffect2007,
+  title = {Do Programming Languages Affect Productivity? {{A}} Case Study Using Data from Open Source Projects},
+  shorttitle = {Do Programming Languages Affect Productivity?},
+  booktitle = {First {{International Workshop}} on {{Emerging Trends}} in {{FLOSS Research}} and {{Development}}, {{FLOSS}}'07},
+  author = {Delorey, Daniel and Knutson, Charles and Chun, Scott},
+  year = {2007},
+  month = jun,
+  pages = {8--8},
+  doi = {10.1109/FLOSS.2007.5},
+  abstract = {Brooks and others long ago suggested that on average computer programmers write the same number of lines of code in a given amount of time regardless of the programming language used. We examine data collected from the CVS repositories of 9,999 open source projects hosted on SourceForge.net to test this assumption for 10 of the most popular programming languages in use in the open source community. We find that for 24 of the 45 pairwise comparisons, the programming language is a significant factor in determining the rate at which source code is written, even after accounting for variations between programmers and projects.},
+  isbn = {978-0-7695-2961-5}
+}
+% == BibTeX quality report for deloreyProgrammingLanguagesAffect2007:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("ResearchGate")
+
 @article{dershowitzRewriteRewriteRewrite1991,
   title = {Rewrite, Rewrite, Rewrite, Rewrite, Rewrite, …},
   author = {Dershowitz, Nachum and Kaplan, Stéphane and Plaisted, David A.},
@@ -971,6 +1414,27 @@ @article{dyvbigMonadicFrameworkDelimited2007
 % ? unused Journal abbreviation ("J. Funct. Program.")
 % ? unused Library catalog ("November 2007")
 
+@incollection{economopoulosFasterScannerlessGLR2009,
+  title = {Faster Scannerless {{GLR}} Parsing},
+  booktitle = {Compiler {{Construction}}},
+  author = {Economopoulos, Giorgios and Klint, Paul and Vinju, Jurgen},
+  editor = {{de Moor}, Oege and Schwartzbach, Michael I.},
+  year = {2009},
+  volume = {5501},
+  pages = {126--141},
+  publisher = {{Springer Berlin Heidelberg}},
+  address = {{Berlin, Heidelberg}},
+  doi = {10.1007/978-3-642-00722-4_10},
+  url = {http://link.springer.com/10.1007/978-3-642-00722-4_10},
+  urldate = {2020-06-15},
+  abstract = {Analysis and renovation of large software portfolios requires syntax analysis of multiple, usually embedded, languages and this is beyond the capabilities of many standard parsing techniques. The traditional separation between lexer and parser falls short due to the limitations of tokenization based on regular expressions when handling multiple lexical grammars. In such cases scannerless parsing provides a viable solution. It uses the power of context-free grammars to be able to deal with a wide variety of issues in parsing lexical syntax. However, it comes at the price of less efficiency. The structure of tokens is obtained using a more powerful but more time and memory intensive parsing algorithm. Scannerless grammars are also more non-deterministic than their tokenized counterparts, increasing the burden on the parsing algorithm even further.},
+  isbn = {978-3-642-00721-7 978-3-642-00722-4},
+  langid = {english}
+}
+% == BibTeX quality report for economopoulosFasterScannerlessGLR2009:
+% ? unused Library catalog ("DOI.org (Crossref)")
+% ? unused Series title ("Lecture Notes in Computer Science")
+
 @article{eggerEnrichedEffectCalculus2014,
   title = {The Enriched Effect Calculus: Syntax and Semantics},
   shorttitle = {The Enriched Effect Calculus},
@@ -1011,6 +1475,48 @@ @article{elemamConfoundingEffectClass2001
 % ? unused Journal abbreviation ("IIEEE Trans. Software Eng.")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@inproceedings{elizarovKotlinCoroutinesDesign2021,
+  title = {Kotlin Coroutines: Design and Implementation},
+  shorttitle = {Kotlin Coroutines},
+  booktitle = {Proceedings of the 2021 {{ACM SIGPLAN International Symposium}} on {{New Ideas}}, {{New Paradigms}}, and {{Reflections}} on {{Programming}} and {{Software}}},
+  author = {Elizarov, Roman and Belyaev, Mikhail and Akhin, Marat and Usmanov, Ilmir},
+  year = {2021},
+  month = oct,
+  series = {Onward! 2021},
+  pages = {68--84},
+  publisher = {{Association for Computing Machinery}},
+  address = {{New York, NY, USA}},
+  doi = {10.1145/3486607.3486751},
+  url = {https://doi.org/10.1145/3486607.3486751},
+  urldate = {2022-11-29},
+  abstract = {Asynchronous programming is having its “renaissance” moment in recent years. Created in the 1980s, it was in use for quite some time, but with the advent of multi-core processors, it has been sidestepped by multi-threaded programming, which was (for a long time) the de facto standard of performing concurrent computations. However, since the 2000s, more and more programming languages have begun to include the support for asynchronous programming, some built around asynchronicity from the start, others including it later in their evolution. In this paper, we explore the design and implementation of asynchronous programming in Kotlin, a multiplatform programming language from JetBrains, which uses coroutines for asynchronicity. Kotlin provides a compact built-in API for coroutine support, thus giving a lot of implementation freedom to the developer; this flexibility allows to transparently support different flavours of asynchronous programming within the same language. We overview existing approaches to asynchronous programming, zoom in and talk about coroutines in detail, and describe how they are used in Kotlin as the basis for asynchronous computations. Along the way, we show the flexibility of Kotlin coroutines, highlight several existing problems with asynchronicity, how they are fixed or worked-around in Kotlin, and also mention future directions asynchronous programming might explore.},
+  isbn = {978-1-4503-9110-8},
+  keywords = {asynchronous programming,continuations,coroutines,Kotlin,language design}
+}
+% == BibTeX quality report for elizarovKotlinCoroutinesDesign2021:
+% ? unused Library catalog ("ACM Digital Library")
+
+@article{endrullisCoinductiveFoundationsInfinitary2018,
+  ids = {endrullisCOINDUCTIVEFOUNDATIONSINFINITARY},
+  title = {Coinductive Foundations of Infinitary Rewriting and Infinitary Equational Logic},
+  author = {Endrullis, Jörg and Hansen, Helle Hvid and Hendriks, Dimitri and Polonsky, Andrew and Silva, Alexandra},
+  year = {2018},
+  month = jan,
+  eprint = {1706.00677},
+  primaryclass = {cs},
+  pages = {44},
+  doi = {10.23638/LMCS-14(1:3)2018},
+  url = {http://arxiv.org/abs/1706.00677},
+  urldate = {2022-10-23},
+  abstract = {We present a coinductive framework for defining and reasoning about the infinitary analogues of equational logic and term rewriting in a uniform, coinductive way. The setup captures rewrite sequences of arbitrary ordinal length, but it has neither the need for ordinals nor for metric convergence. This makes the framework especially suitable for formalizations in theorem provers.},
+  archiveprefix = {arxiv},
+  langid = {english},
+  keywords = {Computer Science - Logic in Computer Science},
+  note = {Comment: arXiv admin note: substantial text overlap with arXiv:1505.01128, arXiv:1306.6224}
+}
+% == BibTeX quality report for endrullisCoinductiveFoundationsInfinitary2018:
+% Missing required field 'journal'
+
 @article{endrullisCoinductiveTreatmentInfinitary2013,
   title = {A Coinductive Treatment of Infinitary Term Rewriting},
   author = {Endrullis, J. and Hansen, H. H. and Hendriks, D. and Polonsky, A. and Silva, A.},
@@ -1046,6 +1552,28 @@ @article{endrullisHighlightsInfinitaryRewriting2012
 % == BibTeX quality report for endrullisHighlightsInfinitaryRewriting2012:
 % ? unused Library catalog ("ScienceDirect")
 
+@article{endrullisInfinitaryTermRewriting2014,
+  title = {Infinitary Term Rewriting for Weakly Orthogonal Systems: Properties and Counterexamples},
+  shorttitle = {Infinitary Term Rewriting for Weakly Orthogonal Systems},
+  author = {Endrullis, Jörg and Grabmayer, Clemens and Hendriks, Dimitri and Klop, Jan Willem and Oostrom, Vincent},
+  editor = {Lynch, Christopher},
+  year = {2014},
+  month = jun,
+  journal = {Logical Methods in Computer Science},
+  volume = {10},
+  number = {2},
+  pages = {7},
+  issn = {18605974},
+  doi = {10.2168/LMCS-10(2:7)2014},
+  url = {https://lmcs.episciences.org/752},
+  urldate = {2022-07-10},
+  abstract = {We present some contributions to the theory of infinitary rewriting for weakly orthogonal term rewrite systems, in which critical pairs may occur provided they are trivial. We show that the infinitary unique normal form property (UN∞) fails by an example of a weakly orthogonal TRS with two collapsing rules. By translating this example, we show that UN∞ also fails for the infinitary λβη-calculus.},
+  langid = {english}
+}
+% == BibTeX quality report for endrullisInfinitaryTermRewriting2014:
+% ? unused Journal abbreviation ("Log.Meth.Comput.Sci.")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @incollection{erdwegLayoutsensitiveGeneralizedParsing2013,
   title = {Layout-Sensitive Generalized Parsing},
   booktitle = {Software {{Language Engineering}}},
@@ -1067,27 +1595,29 @@ @incollection{erdwegLayoutsensitiveGeneralizedParsing2013
 % ? unused Library catalog ("DOI.org (Crossref)")
 % ? unused Series title ("Lecture Notes in Computer Science")
 
-@inproceedings{erdwegSoundOptimalIncremental2015,
+@inproceedings{erdwegSoundOptimalIncremental2015b,
+  ids = {erdwegSoundOptimalIncremental2015,erdwegSoundOptimalIncremental2015a},
   title = {A Sound and Optimal Incremental Build System with Dynamic Dependencies},
-  booktitle = {Proceedings of the 2015 {{ACM SIGPLAN International Conference}} on {{Object-Oriented Programming}}, {{Systems}}, {{Languages}}, and {{Applications}} - {{OOPSLA}} 2015},
+  booktitle = {Proceedings of the 2015 {{ACM SIGPLAN International Conference}} on {{Object-Oriented Programming}}, {{Systems}}, {{Languages}}, and {{Applications}}},
   author = {Erdweg, Sebastian and Lichter, Moritz and Weiel, Manuel},
   year = {2015},
+  month = oct,
   pages = {89--106},
-  publisher = {{ACM Press}},
-  address = {{Pittsburgh, PA, USA}},
+  publisher = {{ACM}},
+  address = {{Pittsburgh PA USA}},
   doi = {10.1145/2814270.2814316},
-  url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.725.6063&rep=rep1&type=pdf},
-  urldate = {2020-06-15},
+  url = {https://dl.acm.org/doi/10.1145/2814270.2814316},
+  urldate = {2023-12-08},
   abstract = {Build systems are used in all but the smallest software projects to invoke the right build tools on the right files in the right order. A build system must be sound (after a build, generated files consistently reflect the latest source files) and efficient (recheck and rebuild as few build units as possible). Contemporary build systems provide limited efficiency because they lack support for expressing finegrained file dependencies. We present a build system called pluto that supports the definition of reusable, parameterized, interconnected builders. When run, a builder notifies the build system about dynamically required and produced files as well as about other builders whose results are needed. To support fine-grained file dependencies, we generalize the traditional notion of time stamps to allow builders to declare their actual requirements on a file’s content. pluto collects the requirements and products of a builder with their stamps in a build summary. This enables pluto to provides provably sound and optimal incremental rebuilding. To support dynamic dependencies, our rebuild algorithm interleaves dependency analysis and builder execution and enforces invariants on the dependency graph through a dynamic analysis. We have developed pluto as a Java API and used it to implement more than 25 builders. We describe our experience with migrating a larger Ant build script to pluto and compare the respective build times.},
   isbn = {978-1-4503-3689-5},
   langid = {english}
 }
-% == BibTeX quality report for erdwegSoundOptimalIncremental2015:
-% ? unused Conference name ("the 2015 ACM SIGPLAN International Conference")
+% == BibTeX quality report for erdwegSoundOptimalIncremental2015b:
+% ? unused Conference name ("SPLASH '15: Conference on Systems, Programming, Languages, and Applications: Software for Humanity")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
 @phdthesis{erkokValueRecursionMonadic2002,
-  title = {Value {{Recursion}} in {{Monadic Computations}}},
+  title = {Value Recursion in Monadic Computations},
   author = {Erkok, Levent},
   year = {2002},
   month = oct,
@@ -1096,13 +1626,12 @@ @phdthesis{erkokValueRecursionMonadic2002
   school = {Oregon Health and Science University}
 }
 % == BibTeX quality report for erkokValueRecursionMonadic2002:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Archive ("10.6083/M4SQ8XBW")
 % ? unused Library catalog ("Zotero")
 % ? unused Number of pages ("170")
 
 @mastersthesis{filinskiDeclarativeContinuationsCategorical1989,
-  title = {Declarative {{Continuations}} and {{Categorical Duality}}},
+  title = {Declarative Continuations and Categorical Duality},
   author = {Filinski, Andrzej},
   year = {1989},
   month = aug,
@@ -1111,7 +1640,6 @@ @mastersthesis{filinskiDeclarativeContinuationsCategorical1989
   school = {University of Copenhagen}
 }
 % == BibTeX quality report for filinskiDeclarativeContinuationsCategorical1989:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("CiteSeer")
 % ? unused Type ("Master's")
 
@@ -1135,9 +1663,45 @@ @inproceedings{filinskiLinearContinuations1992
 % ? Unsure about the formatting of the booktitle
 % ? unused Library catalog ("ACM Digital Library")
 
-@article{forsterExpressivePowerUserDefined2017,
-  title = {On the {{Expressive Power}} of {{User-Defined Effects}}: {{Effect Handlers}}, {{Monadic Reflection}}, {{Delimited Control}}},
-  shorttitle = {On the {{Expressive Power}} of {{User-Defined Effects}}},
+@techreport{fisherCommonProgrammingLanguage1976,
+  title = {A Common Programming Language for the {{Department}} of {{Defense--Background}} and Technical Requirements},
+  author = {Fisher, David A.},
+  year = {1976},
+  month = jun,
+  url = {https://apps.dtic.mil/sti/citations/ADA028297},
+  urldate = {2024-01-14},
+  abstract = {This paper presents the set of characteristics needed for a common programming language of embedded computer systems applications in the DoD. In addition, it describes the background, purpose, and organization of the DoD Common Programming Language efforts. It reviews the issues considered in developing the needed language characteristics, explains how certain trade-offs and potential conflicts were resolved, and discusses the criteria used to ensure that any language satisfying the criteria will be suitable for embedded computer applications, will not aggravate existing software problems, and will be suitable for standardization.},
+  chapter = {Technical Reports},
+  langid = {english}
+}
+% == BibTeX quality report for fisherCommonProgrammingLanguage1976:
+% Missing required field 'institution'
+% ? unused Library catalog ("apps.dtic.mil")
+
+@article{flanaganEssenceCompilingContinuations1993,
+  ids = {flanaganEssenceCompilingContinuations},
+  title = {The Essence of Compiling with Continuations},
+  author = {Flanagan, Cormac and Sabry, Amr and Duba, Bruce F. and Felleisen, Matthias},
+  year = {1993},
+  month = jun,
+  journal = {ACM SIGPLAN Notices},
+  volume = {28},
+  number = {6},
+  pages = {237--247},
+  issn = {0362-1340},
+  doi = {10.1145/173262.155113},
+  url = {https://dl.acm.org/doi/10.1145/173262.155113},
+  urldate = {2023-05-03},
+  abstract = {In order to simplify the compilation process, many compilers for higher-order languages use the continuation-passing style (CPS) transformation in a first phase to generate an intermediate representation of the source program. The salient aspect of this intermediate form is that all procedures take an argument that represents the rest of the computation (the “continuation”). Since the nai¨ve CPS transformation considerably increases the size of programs, CPS compilers perform reductions to produce a more compact intermediate representation. Although often implemented as a part of the CPS transformation, this step is conceptually a second phase. Finally, code generators for typical CPS compilers treat continuations specially in order to optimize the interpretation of continuation parameters. A thorough analysis of the abstract machine for CPS terms show that the actions of the code generator invert the nai¨ve CPS translation step. Put differently, the combined effect of the three phases is equivalent to a source-to-source transformation that simulates the compaction phase. Thus, fully developed CPS compilers do not need to employ the CPS transformation but can achieve the same results with a simple source-level transformation.},
+  langid = {english}
+}
+% == BibTeX quality report for flanaganEssenceCompilingContinuations1993:
+% ? unused Journal abbreviation ("SIGPLAN Not.")
+% ? unused Library catalog ("ACM Digital Library")
+
+@article{forsterExpressivePowerUserdefined2017,
+  title = {On the Expressive Power of User-Defined Effects: {{Effect}} Handlers, Monadic Reflection, Delimited Control},
+  shorttitle = {On the Expressive Power of User-Defined Effects},
   author = {Forster, Yannick and Kammar, Ohad and Lindley, Sam and Pretnar, Matija},
   year = {2017},
   month = feb,
@@ -1151,9 +1715,8 @@ @article{forsterExpressivePowerUserDefined2017
   langid = {english},
   keywords = {Computer Science - Logic in Computer Science,Computer Science - Programming Languages}
 }
-% == BibTeX quality report for forsterExpressivePowerUserDefined2017:
+% == BibTeX quality report for forsterExpressivePowerUserdefined2017:
 % ? Possibly abbreviated journal title arXiv:1610.09161 [cs]
-% ? Title looks like it was stored in title-case in Zotero
 
 @incollection{forsterQuineNewFoundations2019,
   title = {Quine’s {{New Foundations}}},
@@ -1171,14 +1734,49 @@ @incollection{forsterQuineNewFoundations2019
 % ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Stanford Encyclopedia of Philosophy")
 
-@article{ghilezanStrongNormalizationTypability1996,
-  title = {Strong {{Normalization}} and {{Typability}} with {{Intersection Types}}},
-  author = {Ghilezan, Silvia},
-  year = {1996},
-  month = jan,
-  journal = {Notre Dame Journal of Formal Logic},
-  volume = {37},
-  number = {1},
+@misc{furiaApplyingBayesianAnalysis2021,
+  title = {Applying {{Bayesian}} Analysis Guidelines to Empirical Software Engineering Data: The Case of Programming Languages and Code Quality},
+  shorttitle = {Applying Bayesian Analysis Guidelines to Empirical Software Engineering Data},
+  author = {Furia, Carlo A. and Torkar, Richard and Feldt, Robert},
+  year = {2021},
+  month = jul,
+  number = {arXiv:2101.12591},
+  eprint = {2101.12591},
+  primaryclass = {cs},
+  publisher = {{arXiv}},
+  url = {http://arxiv.org/abs/2101.12591},
+  urldate = {2023-12-27},
+  abstract = {Statistical analysis is the tool of choice to turn data into information, and then information into empirical knowledge. The process that goes from data to knowledge is, however, long, uncertain, and riddled with pitfalls. To be valid, it should be supported by detailed, rigorous guidelines, which help ferret out issues with the data or model, and lead to qualified results that strike a reasonable balance between generality and practical relevance. Such guidelines are being developed by statisticians to support the latest techniques for Bayesian data analysis. In this article, we frame these guidelines in a way that is apt to empirical research in software engineering.},
+  archiveprefix = {arxiv},
+  langid = {english},
+  keywords = {Computer Science - Software Engineering}
+}
+
+@book{gammaDesignPatternsElements1994,
+  title = {Design Patterns: Elements of Reusable Object-Oriented Software},
+  shorttitle = {Design Patterns},
+  author = {Gamma, Erich and Helm, Richard and Johnson, Ralph and Vlissides, John and Booch, Grady},
+  year = {1994},
+  month = oct,
+  edition = {1st edition},
+  publisher = {{Addison-Wesley Professional}},
+  address = {{Reading, Mass}},
+  abstract = {Capturing a wealth of experience about the design of object-oriented software, four top-notch designers present a catalog of simple and succinct solutions to commonly occurring design problems. Previously undocumented, these 23 patterns allow designers to create more flexible, elegant, and ultimately reusable designs without having to rediscover the design solutions themselves.  The authors begin by describing what patterns are and how they can help you design object-oriented software. They then go on to systematically name, explain, evaluate, and catalog recurring designs in object-oriented systems. With Design Patterns as your guide, you will learn how these important patterns fit into the software development process, and how you can leverage them to solve your own design problems most efficiently.  Each pattern describes the circumstances in which it is applicable, when it can be applied in view of other design constraints, and the consequences and trade-offs of using the pattern within a larger design. All patterns are compiled from real systems and are based on real-world examples. Each pattern also includes code that demonstrates how it may be implemented in object-oriented programming languages like C++ or Smalltalk.},
+  isbn = {978-0-201-63361-0},
+  langid = {english}
+}
+% == BibTeX quality report for gammaDesignPatternsElements1994:
+% ? unused Library catalog ("Amazon")
+% ? unused Number of pages ("416")
+
+@article{ghilezanStrongNormalizationTypability1996,
+  title = {Strong Normalization and Typability with Intersection Types},
+  author = {Ghilezan, Silvia},
+  year = {1996},
+  month = jan,
+  journal = {Notre Dame Journal of Formal Logic},
+  volume = {37},
+  number = {1},
   pages = {44--52},
   publisher = {{Duke University Press}},
   issn = {0029-4527, 1939-0726},
@@ -1189,7 +1787,6 @@ @article{ghilezanStrongNormalizationTypability1996
   keywords = {03B15,03B40,03B70,68Q55}
 }
 % == BibTeX quality report for ghilezanStrongNormalizationTypability1996:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Project Euclid")
 
 @inproceedings{girardGeometryInteraction1989,
@@ -1231,6 +1828,47 @@ @article{girardLocusSolumRules2001
   langid = {english}
 }
 
+@article{goelDesignImplementationUse2019,
+  title = {On the Design, Implementation, and Use of Laziness in {{R}}},
+  author = {Goel, Aviral and Vitek, Jan},
+  year = {2019},
+  month = oct,
+  journal = {Proceedings of the ACM on Programming Languages},
+  volume = {3},
+  number = {OOPSLA},
+  eprint = {1909.08958},
+  primaryclass = {cs},
+  pages = {1--27},
+  issn = {2475-1421},
+  doi = {10.1145/3360579},
+  url = {http://arxiv.org/abs/1909.08958},
+  urldate = {2022-10-25},
+  abstract = {The R programming language has been lazy for over twenty-five years. This paper presents a review of the design and implementation of call-by-need in R, and a data-driven study of how generations of programmers have put laziness to use in their code. We analyze 16,707 packages and observe the creation of 270.9 B promises. Our data suggests that there is little supporting evidence to assert that programmers use laziness to avoid unnecessary computation or to operate over infinite data structures. For the most part R code appears to have been written without reliance on, and in many cases even knowledge of, delayed argument evaluation. The only significant exception is a small number of packages which leverage call-by-need for meta-programming. CCS Concepts: • General and reference → Empirical studies; • Software and its engineering → General programming languages; Scripting languages; Semantics.},
+  archiveprefix = {arxiv},
+  langid = {english},
+  keywords = {Computer Science - Programming Languages,D.3},
+  note = {Comment: 27 pages, 4 tables, 21 figures}
+}
+% == BibTeX quality report for goelDesignImplementationUse2019:
+% ? unused Journal abbreviation ("Proc. ACM Program. Lang.")
+
+@book{graverCostReportingElements1977,
+  title = {Cost Reporting Elements and Activity Cost Tradeoffs for Defense System Software. {{Volume I}}.  {{Study}} Results.},
+  shorttitle = {{{DTIC ADA053020}}},
+  author = {Graver, C.A.},
+  year = {1977},
+  month = may,
+  url = {http://archive.org/details/DTIC_ADA053020},
+  urldate = {2023-12-30},
+  abstract = {In April 1976, General Research Corporation (GRC) began a study of 'Life-Cycle Costing of Major Defense System Software and Computer Resources,' Contract F19628- 76-C-0180. The purpose was to assist Air Force Program Offices and staff agencies in estimating, reporting and controlling the life-cycle costs of software. The study was performed under direction of the Electronic Systems Division (AFSC), Computer Systems Engineering Office (TOI).},
+  langid = {english},
+  keywords = {*COMPUTER PROGRAMS,*COSTS,*DEFENSE SYSTEMS,DTIC Archive,ESTIMATES,GENERAL RESEARCH CORP SANTA BARBARA CALIF,{Graver,C  A},LIFE CYCLE COSTS,MAINTENANCE,MANHOURS,SIZES(DIMENSIONS)}
+}
+% == BibTeX quality report for graverCostReportingElements1977:
+% Missing required field 'publisher'
+% ? unused Library catalog ("Internet Archive")
+% ? unused Number of pages ("303")
+
 @misc{gravgaardElasticTabstopsBetter,
   title = {Elastic Tabstops - a Better Way to Indent and Align Code},
   author = {Gravgaard, Nick},
@@ -1240,6 +1878,34 @@ @misc{gravgaardElasticTabstopsBetter
   langid = {english}
 }
 
+@article{grzywaczDoesAmountInformation2022,
+  title = {Does Amount of Information Support Aesthetic Values?},
+  author = {Grzywacz, Norberto M. and Aleem, Hassan},
+  year = {2022},
+  journal = {Frontiers in Neuroscience},
+  volume = {16},
+  issn = {1662-453X},
+  url = {https://www.frontiersin.org/articles/10.3389/fnins.2022.805658},
+  urldate = {2024-01-08},
+  abstract = {Obtaining information from the world is important for survival. The brain, therefore, has special mechanisms to extract as much information as possible from sensory stimuli. Hence, given its importance, the amount of available information may underlie aesthetic values. Such information-based aesthetic values would be significant because they would compete with others to drive decision-making. In this article, we ask, “What is the evidence that amount of information support aesthetic values?” An important concept in the measurement of informational volume is entropy. Research on aesthetic values has thus used Shannon entropy to evaluate the contribution of quantity of information. We review here the concepts of information and aesthetic values, and research on the visual and auditory systems to probe whether the brain uses entropy or other relevant measures, specially, Fisher information, in aesthetic decisions. We conclude that information measures contribute to these decisions in two ways: first, the absolute quantity of information can modulate aesthetic preferences for certain sensory patterns. However, the preference for volume of information is highly individualized, with information-measures competing with organizing principles, such as rhythm and symmetry. In addition, people tend to be resistant to too much entropy, but not necessarily, high amounts of Fisher information. We show that this resistance may stem in part from the distribution of amount of information in natural sensory stimuli. Second, the measurement of entropic-like quantities over time reveal that they can modulate aesthetic decisions by varying degrees of surprise given temporally integrated expectations. We propose that amount of information underpins complex aesthetic values, possibly informing the brain on the allocation of resources or the situational appropriateness of some cognitive models.}
+}
+% == BibTeX quality report for grzywaczDoesAmountInformation2022:
+% ? unused Library catalog ("Frontiers")
+
+@article{gucluturkDecomposingComplexityPreferences2019,
+  title = {Decomposing Complexity Preferences for Music},
+  author = {Güçlütürk, Yaǧmur and {van Lier}, Rob},
+  year = {2019},
+  journal = {Frontiers in Psychology},
+  volume = {10},
+  issn = {1664-1078},
+  url = {https://www.frontiersin.org/articles/10.3389/fpsyg.2019.00674},
+  urldate = {2024-01-08},
+  abstract = {Recently, we demonstrated complexity as a major factor for explaining individual differences in visual preferences for abstract digital art. We have shown that participants could best be separated into two groups based on their liking ratings for abstract digital art comprising geometric patterns: one group with a preference for complex visual patterns and another group with a preference for simple visual patterns. In the present study, building up on these results, we extended our investigations for complexity preferences from highly controlled visual stimuli to ecologically valid stimuli in the auditory modality. Similar to visual preferences, we showed that music preferences are highly influenced by stimulus complexity. We demonstrated this by clustering a large number of participants based on their liking ratings for song excerpts from various musical genres. Our results show that, based on their liking ratings, participants can best be separated into two groups: one group with a preference for more complex songs and another group with a preference for simpler songs. Finally, we considered various demographic and personal characteristics to explore differences between the groups, and reported that at least for the current data set age and gender to be significant factors separating the two groups.}
+}
+% == BibTeX quality report for gucluturkDecomposingComplexityPreferences2019:
+% ? unused Library catalog ("Frontiers")
+
 @inproceedings{guerriniOptimalImplementationInefficient2017,
   title = {Is the Optimal Implementation Inefficient? {{Elementarily}} Not.},
   shorttitle = {Is the Optimal Implementation Inefficient?},
@@ -1276,6 +1942,28 @@ @phdthesis{guerriniTheoreticalPracticalIssues1996
 % == BibTeX quality report for guerriniTheoreticalPracticalIssues1996:
 % ? unused Library catalog ("Google Scholar")
 
+@inproceedings{guyerFreeMeStaticAnalysis2006,
+  ids = {guyerFreeMeStaticAnalysis},
+  title = {Free-{{Me}}: A Static Analysis for Automatic Individual Object Reclamation},
+  shorttitle = {Free-{{Me}}},
+  booktitle = {Proceedings of the 27th {{ACM SIGPLAN Conference}} on {{Programming Language Design}} and {{Implementation}}},
+  author = {Guyer, Samuel Z. and McKinley, Kathryn S. and Frampton, Daniel},
+  year = {2006},
+  month = jun,
+  series = {{{PLDI}} '06},
+  pages = {364--375},
+  publisher = {{Association for Computing Machinery}},
+  address = {{New York, NY, USA}},
+  doi = {10.1145/1133981.1134024},
+  url = {https://doi.org/10.1145/1133981.1134024},
+  urldate = {2023-07-18},
+  abstract = {Garbage collection has proven benefits, including fewer memory related errors and reduced programmer effort. Garbage collection, however, trades space for time. It reclaims memory only when it is invoked: invoking it more frequently reclaims memory quickly, but incurs a significant cost; invoking it less frequently fills memory with dead objects. In contrast, explicit memory management provides prompt low cost reclamation, but at the expense of programmer effort.This work comes closer to the best of both worlds by adding novel compiler and runtime support for compiler inserted frees to a garbage-collected system. The compiler's free-me analysis identifies when objects become unreachable and inserts calls to free. It combines a lightweight pointer analysis with liveness information that detects when short-lived objects die. Our approach differs from stack and region allocation in two crucial ways. First, it frees objects incrementally exactly when they become unreachable, instead of based on program scope. Second, our system does not require allocation-site lifetime homogeneity, and thus frees objects on some paths and not on others. It also handles common patterns: it can free objects in loops and objects created by factory methods.We evaluate free() variations for free-list and bump-pointer allocators. Explicit freeing improves performance by promptly reclaiming objects and reducing collection load. Compared to marksweep alone, free-me cuts total time by 22\% on average, collector time by 50\% to 70\%, and allows programs to run in 17\% less memory. This combination retains the software engineering benefits of garbage collection while increasing space efficiency and improving performance, and thus is especially appealing for real-time and space constrained systems.},
+  isbn = {978-1-59593-320-1},
+  keywords = {adaptive,compiler-assisted,copying,generational,liveness,locality,mark-sweep,pointer analysis}
+}
+% == BibTeX quality report for guyerFreeMeStaticAnalysis2006:
+% ? unused Library catalog ("ACM Digital Library")
+
 @article{hackettCallbyneedClairvoyantCallbyvalue2019,
   title = {Call-by-Need Is Clairvoyant Call-by-Value},
   author = {Hackett, Jennifer and Hutton, Graham},
@@ -1319,15 +2007,13 @@ @article{hemannFrameworkExtendingMicrokanren2017
 % ? unused Journal abbreviation ("Electron. Proc. Theor. Comput. Sci.")
 
 @inproceedings{hemannMicroKanrenMinimalFunctional2013,
-  title = {{{microKanren}}: {{A Minimal Functional Core}} for {{Relational Programming}}.},
+  title = {{{microKanren}}: A Minimal Functional Core for Relational Programming.},
   booktitle = {Proceedings of the 2013 {{Workshop}} on {{Scheme}} and {{Functional Programming}}},
   author = {Hemann, Jason and Friedman, Daniel P. .},
   year = {2013},
   url = {http://webyrd.net/scheme-2013/papers/HemannMuKanren2013.pdf},
   urldate = {2022-07-14}
 }
-% == BibTeX quality report for hemannMicroKanrenMinimalFunctional2013:
-% ? Title looks like it was stored in title-case in Zotero
 
 @inproceedings{hendersonDeterminismAnalysisMercury1996,
   ids = {hendersonDeterminismAnalysisMercury},
@@ -1343,7 +2029,7 @@ @inproceedings{hendersonDeterminismAnalysisMercury1996
 % ? unused Library catalog ("CiteSeer")
 
 @article{hirokawaDecreasingDiagramsRelative2009,
-  title = {Decreasing {{Diagrams}} and {{Relative Termination}}},
+  title = {Decreasing Diagrams and Relative Termination},
   author = {Hirokawa, Nao and Middeldorp, Aart},
   year = {2009},
   month = oct,
@@ -1359,10 +2045,9 @@ @article{hirokawaDecreasingDiagramsRelative2009
 }
 % == BibTeX quality report for hirokawaDecreasingDiagramsRelative2009:
 % ? Possibly abbreviated journal title arXiv:0910.2853 [cs]
-% ? Title looks like it was stored in title-case in Zotero
 
 @article{hirokawaStrategiesDecreasinglyConfluent2011,
-  title = {Strategies for {{Decreasingly Confluent Rewrite Systems}}},
+  title = {Strategies for Decreasingly Confluent Rewrite Systems},
   author = {Hirokawa, Nao and Middeldorp, Aart},
   year = {2011},
   journal = {Reduction Strategies in Rewriting and Programming},
@@ -1370,7 +2055,6 @@ @article{hirokawaStrategiesDecreasinglyConfluent2011
   url = {http://elp.webs.upv.es/workshops/wrs2011/pre-proceedings.pdf#page=31}
 }
 % == BibTeX quality report for hirokawaStrategiesDecreasinglyConfluent2011:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Google Scholar")
 
 @article{holmesAPLProgrammingLanguage1978,
@@ -1425,8 +2109,56 @@ @inproceedings{hudakAggregateUpdateProblem1985
 % ? unused Conference name ("the 12th ACM SIGACT-SIGPLAN symposium")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@article{hudakExpressivenessPurelyFunctional1989,
+  title = {On the {{Expressiveness}} of {{Purely Functional I}}/{{O Systems}}},
+  author = {Hudak, Paul and Sundaresh, Raman S},
+  year = {1989},
+  month = mar,
+  pages = {28},
+  abstract = {Functional programming languages have traditionally lacked complete, exible, and yet referentially transparent I/O mechanisms. Previous proposals for I/O have used either the notion of lazy streams or continuations to model interaction with the external world. We discuss and generalize these models and introduce a third, which we call the systems model, to perform I/O. The expressiveness of the styles are compared by means of an example. We then give a series of surprisingly simple translations between the three models, demonstrating that they are not as di erent as their programming styles suggest, and implying that the styles could be mixed within a single program.},
+  langid = {english}
+}
+% == BibTeX quality report for hudakExpressivenessPurelyFunctional1989:
+% Missing required field 'journal'
+% ? Title looks like it was stored in title-case in Zotero
+% ? unused Library catalog ("Zotero")
+
+@techreport{hudakHaskellVsAda1994,
+  title = {Haskell vs. {{Ada}} vs. {{C}}++ vs. {{Awk}} vs. ... {{An}} Experiment in Software Prototyping Productivity},
+  author = {Hudak, Paul and Jones, Mark P},
+  year = {1994},
+  month = oct,
+  number = {YALEU/DCS/RR-1049},
+  institution = {{Yale University}},
+  abstract = {We describe the results of an experiment in which several conventional programming languages, together with the functional language Haskell, were used to prototype a Naval Surface Warfare Center (NSWC) requirement for a Geometric Region Server. The resulting programs and development metrics were reviewed by a committee chosen by the Navy. The results indicate that the Haskell prototype took significantly less time to develop and was considerably more concise and easier to understand than the corresponding prototypes written in several different imperative languages, including Ada and C++.},
+  langid = {english}
+}
+% == BibTeX quality report for hudakHaskellVsAda1994:
+% ? unused Library catalog ("Zotero")
+
+@inproceedings{hudakHistoryHaskellBeing2007,
+  title = {A History of {{Haskell}}: Being Lazy with Class},
+  shorttitle = {A History of {{Haskell}}},
+  booktitle = {Proceedings of the Third {{ACM SIGPLAN}} Conference on {{History}} of Programming Languages},
+  author = {Hudak, Paul and Hughes, John and Peyton Jones, Simon and Wadler, Philip},
+  year = {2007},
+  month = jun,
+  publisher = {{ACM}},
+  address = {{San Diego California}},
+  doi = {10.1145/1238844.1238856},
+  url = {https://dl.acm.org/doi/10.1145/1238844.1238856},
+  urldate = {2022-06-03},
+  abstract = {This paper describes the history of Haskell, including its genesis and principles, technical contributions, implementations and tools, and applications and impact.},
+  isbn = {978-1-59593-766-7},
+  langid = {english}
+}
+% == BibTeX quality report for hudakHistoryHaskellBeing2007:
+% ? Unsure about the formatting of the booktitle
+% ? unused Conference name ("HOPL-III '07: ACM SIGPLAN History of Programming Languages Conference III")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @article{hughesWhyFunctionalProgramming1989,
-  title = {Why {{Functional Programming Matters}}},
+  title = {Why Functional Programming Matters},
   author = {Hughes, J.},
   year = {1989},
   month = feb,
@@ -1442,9 +2174,25 @@ @article{hughesWhyFunctionalProgramming1989
   langid = {english}
 }
 % == BibTeX quality report for hughesWhyFunctionalProgramming1989:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@techreport{ichbiahRationaleDesignADA1979,
+  title = {Rationale for the Design of the {{ADA}} Programming Language},
+  author = {Ichbiah, Jean D. and Heliard, Jean-Claude and Roubine, Olivier and Barnes, John G.P. and {Krieg-Brueckner}, Bernd and Wichmann, Brian A.},
+  year = {1979},
+  month = jun,
+  number = {Volume 14. Number 6. Part B,},
+  pages = {267},
+  url = {https://apps.dtic.mil/sti/citations/ADA073854},
+  urldate = {2024-01-08},
+  abstract = {This document, the Rationale for the design of the Green programming language, and the companion Reference Manual, are the two defining documents for the Green language. They serve different purposes. The Reference Manual contains a complete and concise definition of the language. Following Wirth we believe in the virtue of having a rather short reference manual. This has the advantage of providing the information in a form that can easily be consulted, read and reread several times, as the basis for developing a good familarity with the language.},
+  chapter = {Technical Reports},
+  langid = {english}
+}
+% == BibTeX quality report for ichbiahRationaleDesignADA1979:
+% Missing required field 'institution'
+% ? unused Library catalog ("apps.dtic.mil")
+
 @article{iversonNotationToolThought1980,
   title = {Notation as a Tool of Thought},
   author = {Iverson, Kenneth E.},
@@ -1472,7 +2220,7 @@ @misc{jakobsDifferentialModularSoftware
 }
 
 @inproceedings{jamesTheseusHighLevel2014,
-  title = {Theseus: {{A High Level Language}} for {{Reversible Computing}}},
+  title = {Theseus: A High Level Language for Reversible Computing},
   booktitle = {Work-in-Progress Report at {{Conference}} on {{Reversible Computation}}},
   author = {James, Roshan P and Sabry, Amr},
   year = {2014},
@@ -1483,10 +2231,69 @@ @inproceedings{jamesTheseusHighLevel2014
 }
 % == BibTeX quality report for jamesTheseusHighLevel2014:
 % ? Unsure about the formatting of the booktitle
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Conference name ("Conference on Reversible Computation")
 % ? unused Library catalog ("Zotero")
 
+@incollection{johnsonComputationalComplexityGLR1991,
+  title = {The {{Computational Complexity}} of {{GLR Parsing}}},
+  booktitle = {Generalized {{LR Parsing}}},
+  author = {Johnson, Mark},
+  editor = {Tomita, Masaru},
+  year = {1991},
+  pages = {35--42},
+  publisher = {{Springer US}},
+  address = {{Boston, MA}},
+  doi = {10.1007/978-1-4615-4034-2_3},
+  url = {https://doi.org/10.1007/978-1-4615-4034-2_3},
+  urldate = {2023-05-22},
+  abstract = {The Tomita parsing algorithm adapts Knuth’s (1967) well-known parsing algorithm for LR(K) grammars to non-LR grammars, including ambiguous grammars. Knuth’s algorithm is provably efficient: it requires at most O(n|G|) units of time, where \textbackslash G\textbackslash{} is the size of (i.e. the number of symbols in) G and n is the length of the string to be parsed. This is often significantly better than the O(n3|G|2) worst case time required by standard parsing algorithms such as the Earley algorithm. Since the Tomita algorithm is closely related to Knuth’s algorithm, one might expect that it too is provably more efficient than the Earley algorithm, especially as actual computational implementations of Tomita’s algorithm outperform implementations of the Earley algorithm (Tomita 1986, 1987).},
+  isbn = {978-1-4615-4034-2},
+  langid = {english},
+  keywords = {Computational Linguistics,Input Length,Input String,Input Symbol,Parse Tree}
+}
+% == BibTeX quality report for johnsonComputationalComplexityGLR1991:
+% ? Title looks like it was stored in title-case in Zotero
+% ? unused Library catalog ("Springer Link")
+
+@article{johnsonFirstclassStoresPartial1994,
+  title = {First-Class Stores and Partial Continuations in a Programming Language and Environment},
+  author = {Johnson, Gregory F and Duggan, Dominic},
+  year = {1994},
+  month = mar,
+  journal = {Computer Languages},
+  volume = {20},
+  number = {1},
+  pages = {53--68},
+  issn = {00960551},
+  doi = {10.1016/0096-0551(94)90014-0},
+  url = {https://linkinghub.elsevier.com/retrieve/pii/0096055194900140},
+  urldate = {2022-12-05},
+  abstract = {Inthe GL programming language and its debugger, it is possible at essentially any point during program execution to capture the entire data state (the extant store) or the control state (the sequence of pending function invocations, represented using a new construct called partial continuations). These artifacts of program execution can be bound to identifiers or placed in storage cells and subsequently manipulated either from within the executing program or interactively from the debugging environment. The captured data states and control states can be interactively examined, reasoned about and experimentally executed. This paper describes the design of a language and debugging environment that supports these two capabilities, with particular attention given to the implications of having them both in the same language. For the new features to be useful they must be efficient, and a description is given of the techniques that were used to achieve an acceptable level of efficiency.},
+  langid = {english}
+}
+% == BibTeX quality report for johnsonFirstclassStoresPartial1994:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@inproceedings{johnsonStoresPartialContinuations1988,
+  ids = {johnsonStoresPartialContinuations1988a},
+  title = {Stores and Partial Continuations as First-Class Objects in a Language and Its Environment},
+  booktitle = {Proceedings of the 15th {{ACM SIGPLAN-SIGACT}} Symposium on {{Principles}} of Programming Languages},
+  author = {Johnson, G. F. and Duggan, D.},
+  year = {1988},
+  month = jan,
+  series = {{{POPL}} '88},
+  pages = {158--168},
+  publisher = {{Association for Computing Machinery}},
+  address = {{New York, NY, USA}},
+  doi = {10.1145/73560.73574},
+  url = {https://doi.org/10.1145/73560.73574},
+  urldate = {2022-12-02},
+  isbn = {978-0-89791-252-5}
+}
+% == BibTeX quality report for johnsonStoresPartialContinuations1988:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("ACM Digital Library")
+
 @article{jonesCallbyvalueTerminationUntyped2008,
   title = {Call-by-Value Termination in the Untyped Lambda-Calculus},
   author = {Jones, Neil D. and Bohr, Nina},
@@ -1508,6 +2315,22 @@ @article{jonesCallbyvalueTerminationUntyped2008
 % == BibTeX quality report for jonesCallbyvalueTerminationUntyped2008:
 % ? unused Journal abbreviation ("Log.Meth.Comput.Sci.")
 
+@book{jonesEstimatingSoftwareCosts2007,
+  title = {Estimating Software Costs: Bringing Realism to Estimating},
+  shorttitle = {Estimating Software Costs},
+  author = {Jones, Capers},
+  year = {2007},
+  month = may,
+  edition = {2nd edition},
+  publisher = {{McGraw Hill}},
+  address = {{New York}},
+  isbn = {978-0-07-148300-1},
+  langid = {english}
+}
+% == BibTeX quality report for jonesEstimatingSoftwareCosts2007:
+% ? unused Library catalog ("Amazon")
+% ? unused Number of pages ("644")
+
 @book{jonesImplementationFunctionalProgramming1987,
   title = {The Implementation of Functional Programming Languages},
   author = {Jones, Simon Peyton},
@@ -1539,6 +2362,17 @@ @book{jonesImplementationFunctionalProgramming1987
 % == BibTeX quality report for jonesImplementationFunctionalProgramming1987:
 % ? unused Library catalog ("www.microsoft.com")
 
+@techreport{jonesSoftwareEconomicsFunction2017,
+  title = {Software Economics and Function Point Metrics: Thirty Years of {{IFPUG}} Progress},
+  author = {Jones, Capers},
+  year = {2017},
+  month = apr,
+  url = {https://www.ifpug.org/wp-content/uploads/2017/04/IYSM.-Thirty-years-of-IFPUG.-Software-Economics-and-Function-Point-Metrics-Capers-Jones.pdf},
+  urldate = {2023-12-28}
+}
+% == BibTeX quality report for jonesSoftwareEconomicsFunction2017:
+% Missing required field 'institution'
+
 @inproceedings{jonesTacklingAwkwardSquad2001,
   ids = {jonesTacklingAwkwardSquad2001a},
   title = {Tackling the Awkward Squad: Monadic Input/Output, Concurrency, Exceptions, and Foreign-Language Calls in {{Haskell}}},
@@ -1569,8 +2403,63 @@ @article{jonesWearingHairShirt2003
 % Missing required field 'journal'
 % ? unused Library catalog ("www.microsoft.com")
 
+@article{joostenTeachingFunctionalProgramming1993,
+  title = {Teaching Functional Programming to First-Year Students},
+  author = {Joosten, Stef and Berg, Klaas Van Den and Hoeven, Gerrit Van Der},
+  year = {1993},
+  month = jan,
+  journal = {Journal of Functional Programming},
+  volume = {3},
+  number = {1},
+  pages = {49--65},
+  publisher = {{Cambridge University Press}},
+  issn = {1469-7653, 0956-7968},
+  doi = {10.1017/S0956796800000599},
+  url = {https://www.cambridge.org/core/journals/journal-of-functional-programming/article/teaching-functional-programming-to-firstyear-students/041D6A27806B921685CD562695270216},
+  urldate = {2022-11-14},
+  abstract = {In the period 1986–1991, experiments have been carried out with an introductory course in computer programming, based on functional programming. Due to thorough educational design and evaluation, a successful course has been developed. This has led to a revision of the computer programming education in the first year of the computer science curriculum at the University of Twente.This article describes the approach, the aim of the computer programming course, the outline and subject matter of the course, and the evaluation. Educational research has been done to assess the quality of the course.},
+  langid = {english}
+}
+
+@article{jorgensenInterpretationProblemsRelated2012,
+  title = {Interpretation Problems Related to the Use of Regression Models to Decide on Economy of Scale in Software Development},
+  author = {Jørgensen, Magne and Kitchenham, Barbara},
+  year = {2012},
+  month = nov,
+  journal = {Journal of Systems and Software},
+  volume = {85},
+  number = {11},
+  pages = {2494--2503},
+  issn = {01641212},
+  doi = {10.1016/j.jss.2012.05.068},
+  url = {https://linkinghub.elsevier.com/retrieve/pii/S0164121212001549},
+  urldate = {2023-12-26},
+  langid = {english}
+}
+% == BibTeX quality report for jorgensenInterpretationProblemsRelated2012:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@article{jorgensenReviewStudiesExpert2004,
+  title = {A Review of Studies on Expert Estimation of Software Development Effort},
+  author = {Jørgensen, M.},
+  year = {2004},
+  month = feb,
+  journal = {Journal of Systems and Software},
+  volume = {70},
+  number = {1-2},
+  pages = {37--60},
+  issn = {01641212},
+  doi = {10.1016/S0164-1212(02)00156-5},
+  url = {https://linkinghub.elsevier.com/retrieve/pii/S0164121202001565},
+  urldate = {2023-12-26},
+  abstract = {This paper provides an extensive review of studies related to expert estimation of software development effort. The main goal and contribution of the review is to support the research on expert estimation, e.g., to ease other researcher’s search for relevant expert estimation studies. In addition, we provide software practitioners with useful estimation guidelines, based on the research-based knowledge of expert estimation processes. The review results suggest that expert estimation is the most frequently applied estimation strategy for software projects, that there is no substantial evidence in favour of use of estimation models, and that there are situations where we can expect expert estimates to be more accurate than formal estimation models. The following 12 expert estimation ‘‘best practice’’ guidelines are evaluated through the review: (1) evaluate estimation accuracy, but avoid high evaluation pressure; (2) avoid conflicting estimation goals; (3) ask the estimators to justify and criticize their estimates; (4) avoid irrelevant and unreliable estimation information; (5) use documented data from previous development tasks; (6) find estimation experts with relevant domain background and good estimation records; (7) Estimate top-down and bottom-up, independently of each other; (8) use estimation checklists; (9) combine estimates from different experts and estimation strategies; (10) assess the uncertainty of the estimate; (11) provide feedback on estimation accuracy and development task relations; and, (12) provide estimation training opportunities. We found supporting evidence for all 12 estimation principles, and provide suggestions on how to implement them in software organizations.},
+  langid = {english}
+}
+% == BibTeX quality report for jorgensenReviewStudiesExpert2004:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @article{jurkiewiczCostAddressTranslation2014,
-  title = {The {{Cost}} of {{Address Translation}}},
+  title = {The Cost of Address Translation},
   author = {Jurkiewicz, Tomasz and Mehlhorn, Kurt},
   year = {2014},
   month = apr,
@@ -1586,10 +2475,29 @@ @article{jurkiewiczCostAddressTranslation2014
 }
 % == BibTeX quality report for jurkiewiczCostAddressTranslation2014:
 % ? Possibly abbreviated journal title arXiv:1212.0703 [cs]
-% ? Title looks like it was stored in title-case in Zotero
 
-@inproceedings{kahrsNonOmegaOverlappingTRSsAre2016,
-  title = {Non-{{Omega-Overlapping TRSs}} Are {{UN}}},
+@article{kahrsInfinitaryRewritingClosure2013,
+  title = {Infinitary Rewriting: Closure Operators, Equivalences and Models},
+  shorttitle = {Infinitary Rewriting},
+  author = {Kahrs, Stefan},
+  year = {2013},
+  month = mar,
+  journal = {Acta Informatica},
+  volume = {50},
+  number = {2},
+  pages = {123--156},
+  issn = {0001-5903, 1432-0525},
+  doi = {10.1007/s00236-012-0174-y},
+  url = {http://link.springer.com/10.1007/s00236-012-0174-y},
+  urldate = {2022-10-22},
+  abstract = {Infinitary Term Rewriting allows to express infinite terms and transfinite reductions that converge to those terms. Underpinning the machinery of infinitary rewriting are closure operators on relations that facilitate the formation of transfinite reductions and transfinite equivalence proofs. The literature on infinitary rewriting has largely neglected to single out such closure operators, leaving them implicit in definitions of (transfinite) rewrite reductions, or equivalence relations. This paper unpicks some of those definitions, extracting the underlying closure principles used, as well as constructing alternative operators that lead to alternative notions of reduction and equivalence. A consequence of this unpicking is an insight into the abstraction level at which these operators can be defined. Some of the material in this paper already appeared in Kahrs (2010). The paper also generalises the notion of equational model for infinitary rewriting. This leads to semantics-based notions of equivalence that tie in with the equivalences constructed from the closure operators.},
+  langid = {english}
+}
+% == BibTeX quality report for kahrsInfinitaryRewritingClosure2013:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@inproceedings{kahrsNonomegaoverlappingTRSsAre2016,
+  title = {Non-Omega-Overlapping {{TRSs}} Are {{UN}}},
   booktitle = {1st {{International Conference}} on {{Formal Structures}} for {{Computation}} and {{Deduction}} ({{FSCD}} 2016)},
   author = {Kahrs, Stefan and Smith, Connor},
   editor = {Kesner, Delia and Pientka, Brigitte},
@@ -1606,7 +2514,7 @@ @inproceedings{kahrsNonOmegaOverlappingTRSsAre2016
   isbn = {978-3-95977-010-1},
   keywords = {consistency,omega-substitutions,uniqueness of normal forms}
 }
-% == BibTeX quality report for kahrsNonOmegaOverlappingTRSsAre2016:
+% == BibTeX quality report for kahrsNonomegaoverlappingTRSsAre2016:
 % ? Unsure about the formatting of the booktitle
 % ? unused Library catalog ("Dagstuhl Research Online Publication Server")
 
@@ -1652,7 +2560,7 @@ @article{karazerisFinalCoalgebrasAccessible2011
 
 @article{kennawayComparingCurriedUncurried1995,
   ids = {kennawayComparingCurriedUncurried},
-  title = {Comparing {{Curried}} and {{Uncurried Rewriting}}},
+  title = {Comparing Curried and Uncurried Rewriting},
   author = {Kennaway, Richard and Klop, Jan Willem and Sleep, Ronan and Vries, Fer-jan De},
   year = {1995},
   pages = {25},
@@ -1661,9 +2569,27 @@ @article{kennawayComparingCurriedUncurried1995
 }
 % == BibTeX quality report for kennawayComparingCurriedUncurried1995:
 % Missing required field 'journal'
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Zotero")
 
+@article{kennawayInfinitaryLambdaCalculus1997,
+  title = {Infinitary Lambda Calculus},
+  author = {Kennaway, J.R. and Klop, J.W. and Sleep, M.R. and {de Vries}, F.J.},
+  year = {1997},
+  month = mar,
+  journal = {Theoretical Computer Science},
+  volume = {175},
+  number = {1},
+  pages = {93--125},
+  issn = {03043975},
+  doi = {10.1016/S0304-3975(96)00171-5},
+  url = {https://linkinghub.elsevier.com/retrieve/pii/S0304397596001715},
+  urldate = {2022-11-05},
+  abstract = {In a previous paper we have established the theory of transfinite reduction for orthogonal term rewriting systems. In this paper we perform the same task for the lambda calculus. From the viewpoint of infinitary rewriting, the Bohm model of the lambda calculus can be seen as an infinitary term model. In contrast to term rewriting, there are several different possible notions of infinite tc1m, which give rise to different Bohm-like models, which embody different notions of lazy or cager computation.},
+  langid = {english}
+}
+% == BibTeX quality report for kennawayInfinitaryLambdaCalculus1997:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @article{kennawayMeaninglessTermsRewriting1999,
   title = {Meaningless Terms in Rewriting},
   author = {Kennaway, R. and van Oostrom, V. and de Vries, F.-J.},
@@ -1711,8 +2637,31 @@ @inproceedings{kennawayTransfiniteReductionsOrthogonal1991
 % ? Unsure about the formatting of the booktitle
 % ? unused Library catalog ("Springer Link")
 
+@inproceedings{kennedyCompilingContinuationsContinued2007,
+  ids = {kennedyCompilingContinuationsContinued},
+  title = {Compiling with Continuations, Continued},
+  booktitle = {Proceedings of the 12th {{ACM SIGPLAN}} International Conference on {{Functional}} Programming},
+  author = {Kennedy, Andrew},
+  year = {2007},
+  month = oct,
+  series = {{{ICFP}} '07},
+  pages = {177--190},
+  publisher = {{Association for Computing Machinery}},
+  address = {{New York, NY, USA}},
+  doi = {10.1145/1291151.1291179},
+  url = {https://doi.org/10.1145/1291151.1291179},
+  urldate = {2023-05-03},
+  abstract = {We present a series of CPS-based intermediate languages suitable for functional language compilation, arguing that they have practical benefits over direct-style languages based on A-normal form (ANF) or monads. Inlining of functions demonstrates the benefits most clearly: in ANF-based languages, inlining involves a re-normalization step that rearranges let expressions and possibly introduces a new 'join point' function, and in monadic languages, commuting conversions must be applied; in contrast, inlining in our CPS language is a simple substitution of variables for variables. We present a contification transformation implemented by simple rewrites on the intermediate language. Exceptions are modelled using so-called 'double-barrelled' CPS. Subtyping on exception constructors then gives a very straightforward effect analysis for exceptions. We also show how a graph-based representation of CPS terms can be implemented extremely efficiently, with linear-time term simplification.},
+  isbn = {978-1-59593-815-2},
+  langid = {english},
+  keywords = {continuation passing style,continuations,functional programming languages,monads,optimizing compilation}
+}
+% == BibTeX quality report for kennedyCompilingContinuationsContinued2007:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("ACM Digital Library")
+
 @inproceedings{ketemaViciousCirclesRewriting2005,
-  title = {Vicious {{Circles}} in {{Rewriting Systems}}},
+  title = {Vicious Circles in Rewriting Systems},
   booktitle = {5th {{International Workshop}} on {{Reduction Strategies}} in {{Rewriting}} and {{Programming}}},
   author = {Ketema, Jeroen and Klop, Jan Willem and {van Oostrom}, Vincent},
   year = {2005},
@@ -1725,7 +2674,6 @@ @inproceedings{ketemaViciousCirclesRewriting2005
 }
 % == BibTeX quality report for ketemaViciousCirclesRewriting2005:
 % ? Unsure about the formatting of the booktitle
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Zotero")
 
 @inproceedings{kiselyovBacktrackingInterleavingTerminating2005,
@@ -1773,7 +2721,7 @@ @misc{kiselyovManyFacesFixedpoint2013
 % ? Possibly abbreviated journal title okmij.org
 
 @phdthesis{klopCombinatoryReductionSystems1980,
-  title = {Combinatory {{Reduction Systems}}},
+  title = {Combinatory Reduction Systems},
   author = {Klop, Jan Willem},
   year = {1980},
   month = jun,
@@ -1783,7 +2731,6 @@ @phdthesis{klopCombinatoryReductionSystems1980
   school = {Rijksuniversiteit Utrecht}
 }
 % == BibTeX quality report for klopCombinatoryReductionSystems1980:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("eprints.illc.uva.nl")
 % ? unused Type ("PhD")
 
@@ -1808,9 +2755,22 @@ @inproceedings{klopExtendedTermRewriting1991
 % ? Unsure about the formatting of the booktitle
 % ? unused Library catalog ("Springer Link")
 
+@inproceedings{klopInfinitaryNormalization2005,
+  title = {Infinitary Normalization},
+  booktitle = {We {{Will Show Them}}! {{Essays}} in {{Honour}} of {{Dov Gabbay}}, {{Volume Two}}},
+  author = {Klop, Jan Willem and de Vrijer, Roel C.},
+  editor = {Artëmov, Sergei N. and Barringer, Howard and d'Avila Garcez, Artur S. and Lamb, Luís C. and Woods, John},
+  year = {2005},
+  pages = {169--192},
+  publisher = {{College Publications}}
+}
+% == BibTeX quality report for klopInfinitaryNormalization2005:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("DBLP Computer Science Bibliography")
+
 @article{knuthStructuredProgrammingGo1974,
   ids = {knuthStructuredProgrammingGo1974a},
-  title = {Structured {{Programming}} with Go to {{Statements}}},
+  title = {Structured Programming with Go to Statements},
   author = {Knuth, Donald E.},
   year = {1974},
   month = dec,
@@ -1843,6 +2803,28 @@ @phdthesis{krishnamurthiLinguisticReuse2001
 % ? unused Library catalog ("scholarship.rice.edu")
 % ? unused Type ("PhD")
 
+@inproceedings{kristensenWhenWhyWhy2007,
+  title = {The When, Why and Why Not of the {{BETA}} Programming Language},
+  booktitle = {Proceedings of the Third {{ACM SIGPLAN}} Conference on {{History}} of Programming Languages},
+  author = {Kristensen, Bent Bruun and Madsen, Ole Lehrmann and {Møller-Pedersen}, Birger},
+  year = {2007},
+  month = jun,
+  series = {{{HOPL III}}},
+  pages = {10-1--10-57},
+  publisher = {{Association for Computing Machinery}},
+  address = {{New York, NY, USA}},
+  doi = {10.1145/1238844.1238854},
+  url = {https://beta.alexandra.dk/sites/default/files/pdf/BETA-HOPL-V4.7_ref.pdf},
+  urldate = {2023-08-04},
+  abstract = {This paper tells the story of the development of BETA: a programming language with just one abstraction mechanism, instead of one abstraction mechanism for each kind of program element (classes, types, procedures, functions, etc.). The paper explains how this single abstraction mechanism, the pattern, came about and how it was designed to be so powerful that it covered the other mechanisms. In addition to describing the technical challenge of capturing all programming elements with just one abstraction mechanism, the paper also explains how the language was based upon a modeling approach, so that it could be used for analysis, design and implementation. It also illustrates how this modeling approach guided and settled the design of specific language concepts. The paper compares the BETA programming language with other languages and explains how such a minimal language can still support modeling, even though it does not have some of the language mechanisms found in other object-oriented languages. Finally, the paper tries to convey the organization, working conditions and social life around the BETA project, which turned out to be a lifelong activity for Kristen Nygaard, the authors of this paper, and many others.},
+  isbn = {978-1-59593-766-7},
+  keywords = {history of programming,object-oriented analysis,object-oriented design,object-oriented modeling,object-oriented programming,programming languages}
+}
+% == BibTeX quality report for kristensenWhenWhyWhy2007:
+% ? Unsure about the formatting of the booktitle
+% ? unused Conference name ("HOPL-III '07: ACM SIGPLAN History of Programming Languages Conference III")
+% ? unused Library catalog ("ACM Digital Library")
+
 @misc{lafontLinearLogicPages,
   title = {Linear Logic Pages},
   author = {Lafont, Yves},
@@ -1850,6 +2832,26 @@ @misc{lafontLinearLogicPages
   langid = {english}
 }
 
+@article{lagerstromIdentifyingFactorsAffecting2012,
+  title = {Identifying Factors Affecting Software Development Cost and Productivity},
+  author = {Lagerström, Robert and {von Würtemberg}, Liv Marcks and Holm, Hannes and Luczak, Oscar},
+  year = {2012},
+  month = jun,
+  journal = {Software Quality Journal},
+  volume = {20},
+  number = {2},
+  pages = {395--417},
+  issn = {0963-9314, 1573-1367},
+  doi = {10.1007/s11219-011-9137-8},
+  url = {http://link.springer.com/10.1007/s11219-011-9137-8},
+  urldate = {2023-12-26},
+  abstract = {Software systems of today are often complex, making development costs difficult to estimate. This paper uses data from 50 projects performed at one of the largest banks in Sweden to identify factors that have an impact on software development cost. Correlation analysis of the relationship between factor states and project costs was assessed using ANOVA and regression analysis. Ten out of the original 31 factors turned out to have an impact on software development project cost at the Swedish bank including the: number of function points, involved risk, number of budget revisions, primary platform, project priority, commissioning body’s unit, commissioning body, number of project participants, project duration, and number of consultants. In order to be able to compare projects of different size and complexity, this study also considers the software development productivity defined as the amount of function points per working hour in a project. The study at the bank indicates that the productivity is affected by factors such as performance of estimation and prognosis efforts, project type, number of budget revisions, existence of testing conductor, presentation interface, and number of project participants. A discussion addressing how the productivity factors relate to cost estimation models and their factors is presented. Some of the factors found to have an impact on cost are already included in estimation models such as COCOMO II, TEAMATe, and SEER-SEM, for instance function points and software platform. Thus, this paper validates these wellknown factors for cost estimation. However, several of the factors found in this study are not included in established models for software development cost estimation. Thus, this paper also provides indications for possible extensions of these models.},
+  langid = {english}
+}
+% == BibTeX quality report for lagerstromIdentifyingFactorsAffecting2012:
+% ? unused Journal abbreviation ("Software Qual J")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @inproceedings{leaJavaForkJoin2000,
   ids = {leaJavaForkJoin},
   title = {A {{Java}} Fork/Join Framework},
@@ -1892,6 +2894,84 @@ @inproceedings{levyJumboLcalculus2006
 % ? Unsure about the formatting of the booktitle
 % ? unused Library catalog ("Springer Link")
 
+@inproceedings{mackieGeometryInteractionMachine1995,
+  title = {The Geometry of Interaction Machine},
+  booktitle = {Proceedings of the 22nd {{ACM SIGPLAN-SIGACT}} Symposium on {{Principles}} of Programming Languages  - {{POPL}} '95},
+  author = {Mackie, Ian},
+  year = {1995},
+  pages = {198--208},
+  publisher = {{ACM Press}},
+  address = {{San Francisco, California, United States}},
+  doi = {10.1145/199448.199483},
+  url = {http://portal.acm.org/citation.cfm?doid=199448.199483},
+  urldate = {2023-04-28},
+  isbn = {978-0-89791-692-9},
+  langid = {english}
+}
+% == BibTeX quality report for mackieGeometryInteractionMachine1995:
+% ? Unsure about the formatting of the booktitle
+% ? unused Conference name ("the 22nd ACM SIGPLAN-SIGACT symposium")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@article{madisonRepeatedListeningIncreases2017,
+  title = {Repeated Listening Increases the Liking for Music Regardless of Its Complexity: Implications for the Appreciation and Aesthetics of Music},
+  shorttitle = {Repeated Listening Increases the Liking for Music Regardless of Its Complexity},
+  author = {Madison, Guy and Schiölde, Gunilla},
+  year = {2017},
+  month = mar,
+  journal = {Frontiers in Neuroscience},
+  volume = {11},
+  pages = {147},
+  issn = {1662-4548},
+  doi = {10.3389/fnins.2017.00147},
+  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5374342/},
+  urldate = {2024-01-08},
+  abstract = {Psychological and aesthetic theories predict that music is appreciated at optimal, peak levels of familiarity and complexity, and that appreciation of music exhibits an inverted U-shaped relationship with familiarity as well as complexity. Because increased familiarity conceivably leads to improved processing and less perceived complexity, we test whether there is an interaction between familiarity and complexity. Specifically, increased familiarity should render the music subjectively less complex, and therefore move the apex of the U curve toward greater complexity. A naturalistic listening experiment was conducted, featuring 40 music examples (ME) divided by experts into 4 levels of complexity prior to the main experiment. The MEs were presented 28 times each across a period of approximately 4 weeks, and individual ratings were assessed throughout the experiment. Ratings of liking increased monotonically with repeated listening at all levels of complexity; both the simplest and the most complex MEs were liked more as a function of listening time, without any indication of a U-shaped relation. Although the MEs were previously unknown to the participants, the strongest predictor of liking was familiarity in terms of having listened to similar music before, i.e., familiarity with musical style. We conclude that familiarity is the single most important variable for explaining differences in liking among music, regardless of the complexity of the music.},
+  pmcid = {PMC5374342},
+  pmid = {28408864}
+}
+% == BibTeX quality report for madisonRepeatedListeningIncreases2017:
+% ? unused Journal abbreviation ("Front Neurosci")
+% ? unused Library catalog ("PubMed Central")
+
+@book{madsenObjectOrientedProgrammingBeta1993,
+  title = {Object-{{Oriented}} Programming in the {{Beta}} Programming Language},
+  author = {Madsen, Ole Lehrmann and {Moller-Pedersen}, Birger and Nygaard, Kristen},
+  year = {1993},
+  month = jan,
+  edition = {2nd edition},
+  publisher = {{Assn for Computing Machinery}},
+  address = {{Wokingham, England ; Reading, Mass}},
+  url = {https://beta.cs.au.dk/Books/betabook.pdf},
+  abstract = {Object-oriented programming originated with the Simula language developed by Kristen Nygaard in Oslo in the 1960s. Now, from the birthplace of OOP, comes the new BETA programming language, for which this book is both tutorial and reference. It provides a clear introduction to the basic concepts of OOP and to more advanced topics.},
+  isbn = {978-0-201-62430-4},
+  langid = {english}
+}
+% == BibTeX quality report for madsenObjectOrientedProgrammingBeta1993:
+% ? unused Library catalog ("Amazon")
+% ? unused Number of pages ("400")
+
+@inproceedings{madsenWhatObjectorientedProgramming1988,
+  title = {What Object-Oriented Programming May Be - and What It Does Not Have to Be},
+  booktitle = {{{ECOOP}} ’88 {{European Conference}} on {{Object-Oriented Programming}}},
+  author = {Madsen, Ole Lehrmann and {Møller-Pedersen}, Birger},
+  editor = {Gjessing, Stein and Nygaard, Kristen},
+  year = {1988},
+  series = {Lecture {{Notes}} in {{Computer Science}}},
+  pages = {1--20},
+  publisher = {{Springer}},
+  address = {{Berlin, Heidelberg}},
+  doi = {10.1007/3-540-45910-3_1},
+  url = {https://tidsskrift.dk/daimipb/article/view/7627},
+  abstract = {A conceptual framework for object-oriented programming is presented. The framework is independent of specific programming language constructs. It is illustrated how this framework is reflected in an object-oriented language and the language mechanisms are compared with the corresponding elements of other object-oriented languages. Main issues of object-oriented programming are considered on the basis of the framework presented here.},
+  isbn = {978-3-540-45910-1},
+  langid = {english},
+  keywords = {Action Sequence,Message Passing,Part Object,Pattern Attribute,Program Execution}
+}
+% == BibTeX quality report for madsenWhatObjectorientedProgramming1988:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("Springer Link")
+
 @article{maraistCallbynameCallbyvalueCallbyneed1995,
   title = {Call-by-Name, Call-by-Value, Call-by-Need, and the Linear Lambda Calculus},
   author = {Maraist, John and Odersky, Martin and Turner, David N. and Wadler, Philip},
@@ -1976,6 +3056,24 @@ @incollection{martiniFineStructureExponential1995
 % == BibTeX quality report for martiniFineStructureExponential1995:
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@inproceedings{martinPEARLAgeThree1979,
+  title = {{{PEARL}} at the Age of Three: {{Case}} Study of Development and Application of a Common High Order Realtime Programming Language},
+  shorttitle = {{{PEARL}} at the Age of Three},
+  booktitle = {Proceedings of the 4th International Conference on {{Software}} Engineering},
+  author = {Martin, T.},
+  year = {1979},
+  month = sep,
+  series = {{{ICSE}} '79},
+  pages = {100--109},
+  publisher = {{IEEE Press}},
+  address = {{Munich, Germany}},
+  urldate = {2023-12-30},
+  abstract = {Programming language PEARL for industrial process automation has been developed since 1969. After extensive studies its final definition was frozen in 1976. Since then considerable experience has been gained with some 10 implementations and 140 applications. With PEARL at the age of three, these experiences are published in this paper for the first time and offered as a case study. The paper starts with a characterization of PEARL by indicating the requirements for such a tool, by classifying it with respect to other language types and by pleading for embedded realtime features. As PEARL has been submitted to ISO for international standardization, the question of how it meets the requirements set up by ISO is more thoroughly answered in Appendix A. The report of experience is given separately for implementation, application, and standardization with as much quantitative results as possible, including compiler behaviour, application areas, and economical benefit. For readers not yet familiar with PEARL a tutorial introduction by means of a simple but completely}
+}
+% == BibTeX quality report for martinPEARLAgeThree1979:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("ACM Digital Library")
+
 @article{mauborgneIncrementalUniqueRepresentation2000,
   title = {An Incremental Unique Representation for Regular Trees},
   author = {Mauborgne, Laurent},
@@ -2007,9 +3105,57 @@ @phdthesis{mauborgneRepresentationSetsTrees1999
 % ? unused Library catalog ("Google Scholar")
 % ? unused Number of pages ("197")
 
-@incollection{mcdermottExtendedCallbyPushValueReasoning2019,
-  title = {Extended {{Call-by-Push-Value}}: {{Reasoning About Effectful Programs}} and {{Evaluation Order}}},
-  shorttitle = {Extended {{Call-by-Push-Value}}},
+@article{maurerCompilingContinuations2016,
+  ids = {maurerCompilingContinuation,maurerCompilingContinuations2016a},
+  title = {Compiling without Continuations},
+  author = {Maurer, Luke and Ariola, Zena and Downen, Paul and Jones, Simon Peyton},
+  year = {2016},
+  month = nov,
+  url = {https://www.microsoft.com/en-us/research/publication/compiling-without-continuations/},
+  urldate = {2020-05-13},
+  abstract = {Many fields of study in compilers give rise to the concept of~ a join point — a place where different execution paths come together. ~ While they have often been treated by representing them as functions or~ continuations, we believe it is time to study them in their own right. We show~ that adding them …},
+  langid = {american}
+}
+% == BibTeX quality report for maurerCompilingContinuations2016:
+% Missing required field 'journal'
+% ? unused Library catalog ("www.microsoft.com")
+
+@article{maziarzHashingModuloAlphaEquivalence2021,
+  title = {Hashing {{Modulo Alpha-Equivalence}}},
+  author = {Maziarz, Krzysztof and Ellis, Tom and Lawrence, Alan and Fitzgibbon, Andrew and Jones, Simon Peyton},
+  year = {2021},
+  pages = {17},
+  abstract = {Syntax Tree (AST), which represents computational expressions using a tree structure. Subtrees of such an AST —referred to as subexpressions — are useful, because they often correspond to semantically meaningful parts of the program, such as functions. Many applications need to quickly identify all equivalent subexpressions in an AST. Examples include common subexpression elimination (CSE), as mentioned above; structure sharing to save memory, by representing all occurrences of the same subexpression by a pointer to a single shared tree; or pre-processing for machine learning, where subexpression equivalence can be used as an additional feature, for example by turning an AST into a graph with equality links.},
+  langid = {english}
+}
+% == BibTeX quality report for maziarzHashingModuloAlphaEquivalence2021:
+% Missing required field 'journal'
+% ? Title looks like it was stored in title-case in Zotero
+% ? unused Library catalog ("Zotero")
+
+@article{mccallNewLookNovice2019,
+  title = {A New Look at Novice Programmer Errors},
+  author = {McCall, Davin and Kölling, Michael},
+  year = {2019},
+  month = dec,
+  journal = {ACM Transactions on Computing Education},
+  volume = {19},
+  number = {4},
+  pages = {1--30},
+  issn = {1946-6226},
+  doi = {10.1145/3335814},
+  url = {https://dl.acm.org/doi/10.1145/3335814},
+  urldate = {2022-10-15},
+  abstract = {The types of programming errors that novice programmers make and struggle to resolve have long been of interest to researchers. Various past studies have analyzed the frequency of compiler diagnostic messages. This information, however, does not have a direct correlation to the types of errors students make, due to the inaccuracy and imprecision of diagnostic messages. Furthermore, few attempts have been made to determine the severity of different kinds of errors in terms other than frequency of occurrence. Previously, we developed a method for meaningful categorization of errors, and produced a frequency distribution of these error categories; in this paper, we extend the previous method to also make a determination of error difficulty, in order to give a better measurement of the overall severity of different kinds of errors. An error category hierarchy was developed and validated, and errors in snapshots of students source code were categorized accordingly. The result is a frequency table of logical error categories rather than diagnostic messages. Resolution time for each of the analyzed errors was calculated, and the average resolution time for each category of error was determined; this defines an error difficulty score. The combination of frequency and difficulty allow us to identify the types of error that are most problematic for novice programmers. The results show that ranking errors by severity—a product of frequency and difficulty—yields a significantly different ordering than ranking them by frequency alone, indicating that error frequency by itself may not be a suitable indicator for which errors are actually the most problematic for students. CCS Concepts: • Social and professional topics → Computing education.},
+  langid = {english}
+}
+% == BibTeX quality report for mccallNewLookNovice2019:
+% ? unused Journal abbreviation ("ACM Trans. Comput. Educ.")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@incollection{mcdermottExtendedCallbypushvalueReasoning2019,
+  title = {Extended Call-by-Push-Value: Reasoning about Effectful Programs and Evaluation Order},
+  shorttitle = {Extended Call-by-Push-Value},
   booktitle = {Programming {{Languages}} and {{Systems}}},
   author = {McDermott, Dylan and Mycroft, Alan},
   editor = {Caires, Luís},
@@ -2025,8 +3171,7 @@ @incollection{mcdermottExtendedCallbyPushValueReasoning2019
   isbn = {978-3-030-17183-4 978-3-030-17184-1},
   langid = {english}
 }
-% == BibTeX quality report for mcdermottExtendedCallbyPushValueReasoning2019:
-% ? Title looks like it was stored in title-case in Zotero
+% == BibTeX quality report for mcdermottExtendedCallbypushvalueReasoning2019:
 % ? unused Library catalog ("DOI.org (Crossref)")
 % ? unused Series title ("Lecture Notes in Computer Science")
 
@@ -2051,6 +3196,25 @@ @inproceedings{meyerovichSocioPLTPrinciplesProgramming2012
 % ? unused Conference name ("the ACM international symposium")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@article{miaraProgramIndentationComprehensibility1983,
+  title = {Program Indentation and Comprehensibility},
+  author = {Miara, Richard J. and Musselman, Joyce A. and Navarro, Juan A. and Shneiderman, Ben},
+  year = {1983},
+  month = nov,
+  journal = {Communications of the ACM},
+  volume = {26},
+  number = {11},
+  pages = {861--867},
+  issn = {0001-0782},
+  doi = {10.1145/182.358437},
+  url = {https://dl.acm.org/doi/10.1145/182.358437},
+  urldate = {2023-03-29},
+  keywords = {indentation,program format,program readability}
+}
+% == BibTeX quality report for miaraProgramIndentationComprehensibility1983:
+% ? unused Journal abbreviation ("Commun. ACM")
+% ? unused Library catalog ("ACM Digital Library")
+
 @incollection{middeldorpModularAspectsProperties1989,
   title = {Modular Aspects of Properties of Term Rewriting Systems Related to Normal Forms},
   booktitle = {Rewriting {{Techniques}} and {{Applications}}},
@@ -2072,8 +3236,29 @@ @incollection{middeldorpModularAspectsProperties1989
 % ? unused Library catalog ("DOI.org (Crossref)")
 % ? unused Series title ("Lecture Notes in Computer Science")
 
+@article{mightParsingDerivativesFunctional2011,
+  title = {Parsing with Derivatives: A Functional Pearl},
+  shorttitle = {Parsing with Derivatives},
+  author = {Might, Matthew and Darais, David and Spiewak, Daniel},
+  year = {2011},
+  month = sep,
+  journal = {ACM SIGPLAN Notices},
+  volume = {46},
+  number = {9},
+  pages = {189--195},
+  issn = {0362-1340},
+  doi = {10.1145/2034574.2034801},
+  url = {https://doi.org/10.1145/2034574.2034801},
+  urldate = {2023-05-18},
+  abstract = {We present a functional approach to parsing unrestricted context-free grammars based on Brzozowski's derivative of regular expressions. If we consider context-free grammars as recursive regular expressions, Brzozowski's equational theory extends without modification to context-free grammars (and it generalizes to parser combinators). The supporting actors in this story are three concepts familiar to functional programmers - laziness, memoization and fixed points; these allow Brzozowski's original equations to be transliterated into purely functional code in about 30 lines spread over three functions. Yet, this almost impossibly brief implementation has a drawback: its performance is sour - in both theory and practice. The culprit? Each derivative can double the size of a grammar, and with it, the cost of the next derivative. Fortunately, much of the new structure inflicted by the derivative is either dead on arrival, or it dies after the very next derivative. To eliminate it, we once again exploit laziness and memoization to transliterate an equational theory that prunes such debris into working code. Thanks to this compaction, parsing times become reasonable in practice. We equip the functional programmer with two equational theories that, when combined, make for an abbreviated understanding and implementation of a system for parsing context-free languages.},
+  keywords = {context-free grammar,derivative,formal languages,parser combinator,parsing,regular expressions}
+}
+% == BibTeX quality report for mightParsingDerivativesFunctional2011:
+% ? unused Journal abbreviation ("SIGPLAN Not.")
+% ? unused Library catalog ("ACM Digital Library")
+
 @incollection{millerOverviewLinearLogic2004,
-  title = {An {{Overview}} of {{Linear Logic Programming}}},
+  title = {An Overview of Linear Logic Programming},
   booktitle = {Linear {{Logic}} in {{Computer Science}}},
   author = {Miller, Dale},
   editor = {Ehrhard, Thomas and Girard, Jean-Yves and Ruet, Paul and Scott, Philip},
@@ -2090,7 +3275,6 @@ @incollection{millerOverviewLinearLogic2004
   langid = {english}
 }
 % == BibTeX quality report for millerOverviewLinearLogic2004:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("DOI.org (Crossref)")
 
 @article{mokhovBuildSystemsCarte2020,
@@ -2114,8 +3298,8 @@ @article{mokhovBuildSystemsCarte2020
 % ? unused Library catalog ("DOI.org (Crossref)")
 
 @inproceedings{monperrusExplainableSoftwareBot2019,
-  title = {Explainable {{Software Bot Contributions}}: {{Case Study}} of {{Automated Bug Fixes}}},
-  shorttitle = {Explainable {{Software Bot Contributions}}},
+  title = {Explainable Software Bot Contributions: Case Study of Automated Bug Fixes},
+  shorttitle = {Explainable Software Bot Contributions},
   booktitle = {2019 {{IEEE}}/{{ACM}} 1st {{International Workshop}} on {{Bots}} in {{Software Engineering}} ({{BotSE}})},
   author = {Monperrus, Martin},
   year = {2019},
@@ -2133,7 +3317,18 @@ @inproceedings{monperrusExplainableSoftwareBot2019
 }
 % == BibTeX quality report for monperrusExplainableSoftwareBot2019:
 % ? Unsure about the formatting of the booktitle
-% ? Title looks like it was stored in title-case in Zotero
+
+@inproceedings{morrisettRefiningFirstclassStores1993,
+  title = {Refining First-Class Stores},
+  booktitle = {Proceedings of the {{ACM SIGPLAN}} Workshop on State in Programming Languages},
+  author = {Morrisett, J. Gregory},
+  year = {1993},
+  pages = {73--87},
+  publisher = {{Citeseer}}
+}
+% == BibTeX quality report for morrisettRefiningFirstclassStores1993:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("Google Scholar")
 
 @article{naikTypeSystemEquivalent2008,
   title = {A Type System Equivalent to a Model Checker},
@@ -2204,8 +3399,43 @@ @misc{nishanovFibersMagnifyingGlass2018
   urldate = {2021-11-04}
 }
 
-@incollection{nygaardDevelopmentSIMULALanguages1978,
-  title = {The Development of the {{SIMULA}} Languages},
+@book{normanLivingComplexity2010,
+  title = {Living with Complexity},
+  author = {Norman, Donald A.},
+  year = {2010},
+  month = jan,
+  publisher = {{Mit Pr}},
+  address = {{Cambridge, Mass.}},
+  isbn = {978-0-262-01486-1},
+  langid = {english}
+}
+% == BibTeX quality report for normanLivingComplexity2010:
+% ? unused Library catalog ("Amazon")
+% ? unused Number of pages ("298")
+
+@article{norvigDesignPatternsDynamic1996,
+  title = {Design Patterns in Dynamic Programming},
+  author = {Norvig, Peter},
+  year = {1996},
+  month = may,
+  langid = {english}
+}
+% == BibTeX quality report for norvigDesignPatternsDynamic1996:
+% Missing required field 'journal'
+% ? unused Library catalog ("Zotero")
+
+@article{nougrahiyaIMOPSourcetosourceCompiler,
+  title = {{{IMOP}}: A Source-to-Source Compiler Framework for {{OpenMP C}} Programs},
+  author = {Nougrahiya, Aman and Nandivada, V Krishna},
+  langid = {english}
+}
+% == BibTeX quality report for nougrahiyaIMOPSourcetosourceCompiler:
+% Missing required field 'journal'
+% Missing required field 'year'
+% ? unused Library catalog ("Zotero")
+
+@incollection{nygaardDevelopmentSIMULALanguages1978,
+  title = {The Development of the {{SIMULA}} Languages},
   booktitle = {History of Programming Languages},
   author = {Nygaard, Kristen and Dahl, Ole-Johan},
   year = {1978},
@@ -2220,6 +3450,15 @@ @incollection{nygaardDevelopmentSIMULALanguages1978
 % == BibTeX quality report for nygaardDevelopmentSIMULALanguages1978:
 % ? unused Library catalog ("ACM Digital Library")
 
+@misc{obrienDesignPatterns15,
+  title = {Design Patterns 15 Years Later: An Interview},
+  author = {O'Brien, Larry},
+  journal = {InformIT},
+  url = {https://www.informit.com/articles/article.aspx?p=1404056},
+  urldate = {2023-07-26},
+  collaborator = {Gamma, Erich and Helm, Richard and Johnson, Ralph}
+}
+
 @inproceedings{oderskySecondLookOverloading1995,
   title = {A Second Look at Overloading},
   booktitle = {Proceedings of the Seventh International Conference on {{Functional}} Programming Languages and Computer Architecture  - {{FPCA}} '95},
@@ -2239,6 +3478,21 @@ @inproceedings{oderskySecondLookOverloading1995
 % ? unused Conference name ("the seventh international conference")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@book{odochertyObjectorientedAnalysisDesign2005,
+  title = {Object-Oriented Analysis and Design: Understanding System Development with {{UML}} 2.0},
+  shorttitle = {Object-Oriented Analysis and Design},
+  author = {O'Docherty, Mike},
+  year = {2005},
+  publisher = {{Wiley}},
+  address = {{Chichester, England ; Hoboken, NJ}},
+  isbn = {978-0-470-09240-8},
+  lccn = {QA76.64 .O35 2005},
+  keywords = {Computer software,Development,Object-oriented programming (Computer science)}
+}
+% == BibTeX quality report for odochertyObjectorientedAnalysisDesign2005:
+% ? unused Library catalog ("Library of Congress ISBN")
+% ? unused Number of pages ("559")
+
 @article{okasakiCallbyneedContinuationpassingStyle1994,
   ids = {okasakiCallbyneedContinuationpassingStyle1994a},
   title = {Call-by-Need and Continuation-Passing Style},
@@ -2275,6 +3529,23 @@ @book{okasakiPurelyFunctionalData1998
 % ? unused Library catalog ("Library of Congress ISBN")
 % ? unused Number of pages ("220")
 
+@techreport{palaimoComparisonRADCNASA1982,
+  title = {A Comparison of {{RADC}} and {{NASA}}/{{SEL}} Software Development Data},
+  author = {Palaimo, John},
+  year = {1982},
+  month = dec,
+  number = {ADA131756},
+  url = {https://apps.dtic.mil/sti/citations/ADA131756},
+  urldate = {2023-12-30},
+  abstract = {In September of 1978, Richard Nelson of Rome Air Development Center RADC completed a report entitled Software Data Collection and Analysis in which he examined several statistical relationships within the RADC Software Productivity Database. The relationships studied attempted to relate the size of a software project with various other metrics describing the development process The seven primary relationships studied by Nelson are given below Project Size vs. Productivity source lines per manmonth Project Size vs. Development Effort manmonths Project Size vs. Development Duration months Project Size vs. Average Manloading manmonths per month Project Size vs. Number of Errors Project Size vs. Spatial Error Rate number of errors per 1000 source lines Project Size vs. Effort Based Error Rate number of errors per 10 manmonths of development effort. This report summarizes the results of a similar examination of all but one of these relationships when data from the NASASEL databases is merged with the RADC data. The relationship between Project Size and Average Manloading will not be examined because of the different methods used on computing this metric for the two databases. However, another possible relationship, given below, is examined. Project Size vs. Temporal Error Rate number of errors per month of development time.},
+  chapter = {Technical Reports},
+  langid = {english}
+}
+% == BibTeX quality report for palaimoComparisonRADCNASA1982:
+% Missing required field 'institution'
+% ? unused Library catalog ("apps.dtic.mil")
+% ? unused Series title ("DACS technical monograph series")
+
 @inproceedings{pankratiusCombiningFunctionalImperative2012,
   title = {Combining Functional and Imperative Programming for Multicore Software: {{An}} Empirical Study Evaluating {{Scala}} and {{Java}}},
   shorttitle = {Combining Functional and Imperative Programming for Multicore Software},
@@ -2297,6 +3568,57 @@ @inproceedings{pankratiusCombiningFunctionalImperative2012
 % ? unused Conference name ("2012 34th International Conference on Software Engineering (ICSE 2012)")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@article{parkesComplexityofstrategicbehaviorComparisonSchulze2021,
+  title = {A Complexity-of-Strategic-Behavior Comparison between {{Schulze}}'s Rule and Ranked Pairs},
+  author = {Parkes, David and Xia, Lirong},
+  year = {2021},
+  month = sep,
+  journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
+  volume = {26},
+  number = {1},
+  pages = {1429--1435},
+  issn = {2374-3468, 2159-5399},
+  doi = {10.1609/aaai.v26i1.8258},
+  url = {https://ojs.aaai.org/index.php/AAAI/article/view/8258},
+  urldate = {2024-01-07},
+  abstract = {Schulze’s rule and ranked pairs are two Condorcet methods that both satisfy many natural axiomatic properties. Schulze’s rule is used in the elections of many organizations, including the Wikimedia Foundation, the Pirate Party of Sweden and Germany, the Debian project, and the Gento Project. Both rules are immune to control by cloning alternatives, but little is otherwise known about their strategic robustness, including resistance to manipulation by one or more voters, control by adding or deleting alternatives, adding or deleting votes, and bribery. Considering computational barriers, we show that these types of strategic behavior are NP-hard for ranked pairs (both constructive, in making an alternative a winner, and destructive, in precluding an alternative from being a winner). Schulze’s rule, in comparison, remains vulnerable at least to constructive manipulation by a single voter and destructive manipulation by a coalition. As the first such polynomialtime rule known to resist all such manipulations, and considering also the broad axiomatic support, ranked pairs seems worthwhile to consider for practical applications.},
+  langid = {english}
+}
+% == BibTeX quality report for parkesComplexityofstrategicbehaviorComparisonSchulze2021:
+% ? unused Journal abbreviation ("AAAI")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@inproceedings{parnasAbstractTypesDefined1976,
+  title = {Abstract Types Defined as Classes of Variables},
+  booktitle = {Proceedings of the 1976 Conference on {{Data}} : {{Abstraction}}, Definition and Structure},
+  author = {Parnas, D. L. and Shore, John E. and Weiss, David},
+  year = {1976},
+  month = mar,
+  pages = {149--154},
+  publisher = {{Association for Computing Machinery}},
+  address = {{New York, NY, USA}},
+  doi = {10.1145/800237.807133},
+  url = {https://dl.acm.org/doi/10.1145/800237.807133},
+  urldate = {2023-08-29},
+  abstract = {The concept of “type” has been used without a precise definition in discussions about programming languages for 20 years. Before the concept of user defined data types was introduced, a definition was not necessary for discussions of specific programming languages. The meaning of the term was implicit in the small list of possible types supported by the language. There was even enough similarity between different languages so that this form of definition allowed discussions of languages in general. The need for a widely accepted definition of type became clear in discussions of languages that allow users to add to the set of possible types without altering the compiler. In such languages, the concept of type is no longer implicitly defined by the set of built-in types. A consistent language must be based on a clearer definition of the notion of type than we now have.},
+  isbn = {978-1-4503-7898-7}
+}
+% == BibTeX quality report for parnasAbstractTypesDefined1976:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("ACM Digital Library")
+
+@article{parreauxUltimateConditionalSyntax,
+  title = {The {{Ultimate Conditional Syntax}}},
+  author = {Parreaux, Lionel},
+  abstract = {ML-language dialects and related typically support expressive pattern-matching syntaxes which allow programmers to write concise, expressive, and type-safe code to manipulate algebraic data types. Many features have been proposed to enhance the expressiveness of these pattern-matching syntaxes, such as pattern bindings, pattern alternatives (aka disjunction), pattern conjunction, view patterns, pattern guards, ‘if-let’ patterns, multi-way if-expressions, etc.},
+  langid = {english}
+}
+% == BibTeX quality report for parreauxUltimateConditionalSyntax:
+% Missing required field 'journal'
+% Missing required field 'year'
+% ? Title looks like it was stored in title-case in Zotero
+% ? unused Library catalog ("Zotero")
+
 @article{peytonjonesSecretsGlasgowHaskell2002,
   ids = {jonesSecretsGlasgowHaskell2002},
   title = {Secrets of the {{Glasgow Haskell Compiler}} Inliner},
@@ -2380,8 +3702,8 @@ @article{pippengerPureImpureLisp1997
 % ? unused Library catalog ("DOI.org (Crossref)")
 
 @inproceedings{pradelGoodBadUgly2015,
-  title = {The {{Good}}, the {{Bad}}, and the {{Ugly}}: {{An Empirical Study}} of {{Implicit Type Conversions}} in {{JavaScript}}},
-  shorttitle = {The {{Good}}, the {{Bad}}, and the {{Ugly}}},
+  title = {The Good, the Bad, and the Ugly: An Empirical Study of Implicit Type Conversions in {{JavaScript}}},
+  shorttitle = {The Good, the Bad, and the Ugly},
   booktitle = {29th {{European Conference}} on {{Object-Oriented Programming}} ({{ECOOP}} 2015)},
   author = {Pradel, Michael and Sen, Koushik},
   editor = {Boyland, John Tang},
@@ -2404,11 +3726,34 @@ @inproceedings{pradelGoodBadUgly2015
 }
 % == BibTeX quality report for pradelGoodBadUgly2015:
 % ? Unsure about the formatting of the booktitle
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Artwork size ("23 pages")
 % ? unused Library catalog ("Dagstuhl Research Online Publication Server")
 % ? unused Medium ("application/pdf")
 
+@article{precheltEmpiricalComparisonJava2000,
+  title = {An Empirical Comparison of {{C}}, {{C}}++, {{Java}}, {{Perl}}, {{Python}}, {{Rexx}}, and {{Tcl}} for a Search/String-Processing Program.},
+  author = {Prechelt, Lutz},
+  year = {2000},
+  month = apr,
+  abstract = {80 implementations of the same set of requirements, created by 74 different programmers in various languages, are compared for several properties, such as run time, memory consumption, source text length, comment density, program structure, reliability, and the amount of effort required for writing them. The results indicate that, for the given programming problem, ``scripting languages'' (Perl, Python, Rexx, Tcl) are more productive than conventional languages. In terms of run time and memory consumption, they often turn out better than Java and not much worse than C or C++. In general, the differences between languages tend to be smaller than the typical differences due to different programmers within the same language.}
+}
+% == BibTeX quality report for precheltEmpiricalComparisonJava2000:
+% Missing required field 'journal'
+% ? unused Library catalog ("ResearchGate")
+
+@techreport{precheltPlat_FormsWebDevelopment2007,
+  title = {Plat\_{{Forms}}: The Web Development Platform Comparison — Evaluation and Results},
+  author = {Prechelt, Lutz},
+  year = {2007},
+  month = jun,
+  number = {B-07-10},
+  institution = {{Institut für Informatik, Freie Universität Berlin}},
+  abstract = {Plat\_Forms” is a competition in which top-class teams of three professional programmers competed to implement the same requirements for a web-based system within 30 hours, each team using a different technology platform (Java EE, PHP, or Perl). Plat\_Forms intends to provide new insights into the real (rather than purported) pros, cons, and emergent properties of each platform.},
+  langid = {english}
+}
+% == BibTeX quality report for precheltPlat_FormsWebDevelopment2007:
+% ? unused Library catalog ("Zotero")
+
 @techreport{proustASAPStaticPossible2017,
   title = {{{ASAP}}: {{As Static As Possible}} Memory Management},
   author = {Proust, Raphaël L},
@@ -2483,7 +3828,7 @@ @article{quinceyRangeOptionsHandling2016
 % ? unused Library catalog ("DOI.org (Crossref)")
 
 @article{quinceyReplyCommentAngles2022,
-  title = {Reply to {{Comment}} on `{{Angles}} in the {{SI}}: A Detailed Proposal for Solving the Problem'},
+  title = {Reply to Comment on `{{Angles}} in the {{SI}}: A Detailed Proposal for Solving the Problem'},
   shorttitle = {Reply to {{Comment}} on `{{Angles}} in the {{SI}}},
   author = {Quincey, Paul},
   year = {2022},
@@ -2539,8 +3884,60 @@ @article{rivasNotionsComputationMonoids2014
   langid = {english}
 }
 
+@inproceedings{sabryReasoningProgramsContinuationpassing1992,
+  title = {Reasoning about Programs in Continuation-Passing Style.},
+  booktitle = {Proceedings of the 1992 {{ACM}} Conference on {{LISP}} and Functional Programming},
+  author = {Sabry, Amr and Felleisen, Matthias},
+  year = {1992},
+  month = jan,
+  series = {{{LFP}} '92},
+  pages = {288--298},
+  publisher = {{Association for Computing Machinery}},
+  address = {{New York, NY, USA}},
+  doi = {10.1145/141471.141563},
+  url = {https://dl.acm.org/doi/10.1145/141471.141563},
+  urldate = {2023-04-21},
+  abstract = {Plotkin's λ-value calculus is sound but incomplete for reasoning about βeegr;-transformations on programs in continuation-passing style (CPS). To find a complete extension, we define a new, compactifying CPS transformation and an “inverse”mapping, un-CPS, both of which are interesting in their own right. Using the new CPS transformation, we can determine the precise language of CPS terms closed under β7eegr;-transformations. Using the un-CPS transformation, we can derive a set of axioms such that every equation between source programs is provable if and only if βη can prove the corresponding equation between CPS programs. The extended calculus is equivalent to an untyped variant of Moggi's computational λ-calculus.},
+  isbn = {978-0-89791-481-9}
+}
+% == BibTeX quality report for sabryReasoningProgramsContinuationpassing1992:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("ACM Digital Library")
+
+@article{sammetWhyAdaNot1986,
+  title = {Why {{Ada}} Is Not Just Another Programming Language},
+  author = {Sammet, Jean E},
+  year = {1986},
+  journal = {Communications of the ACM},
+  volume = {29},
+  number = {8},
+  langid = {english}
+}
+% == BibTeX quality report for sammetWhyAdaNot1986:
+% ? unused Library catalog ("Zotero")
+
+@article{scottSPPFStyleParsingEarley2008,
+  ids = {scottSPPFStyleParsingEarley2008a},
+  title = {{{SPPF-Style}} Parsing from {{Earley}} Recognisers},
+  author = {Scott, Elizabeth},
+  year = {2008},
+  month = apr,
+  journal = {Electronic Notes in Theoretical Computer Science},
+  volume = {203},
+  number = {2},
+  pages = {53--67},
+  issn = {15710661},
+  doi = {10.1016/j.entcs.2008.03.044},
+  url = {https://linkinghub.elsevier.com/retrieve/pii/S1571066108001497},
+  urldate = {2020-06-15},
+  abstract = {In its recogniser form, Earley’s algorithm for testing whether a string can be derived from a grammar is worst case cubic on general context free grammars (CFG). Earley gave an outline of a method for turning his recognisers into parsers, but it turns out that this method is incorrect. Tomita’s GLR parser returns a shared packed parse forest (SPPF) representation of all derivations of a given string from a given CFG but is worst case unbounded polynomial order. We have given a modified worst-case cubic version, the BRNGLR algorithm, that, for any string and any CFG, returns a binarised SPPF representation of all possible derivations of a given string. In this paper we apply similar techniques to develop two versions of an Earley parsing algorithm that, in worst-case cubic time, return an SPPF representation of all derivations of a given string from a given CFG.},
+  langid = {english}
+}
+% == BibTeX quality report for scottSPPFStyleParsingEarley2008:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @inproceedings{severiDecomposingLatticeMeaningless2011,
-  title = {Decomposing the {{Lattice}} of {{Meaningless Sets}} in the {{Infinitary Lambda Calculus}}},
+  title = {Decomposing the Lattice of Meaningless Sets in the Infinitary Lambda Calculus},
   booktitle = {Logic, {{Language}}, {{Information}} and {{Computation}}},
   author = {Severi, Paula and {de Vries}, Fer-Jan},
   editor = {Beklemishev, Lev D. and {de Queiroz}, Ruy},
@@ -2557,7 +3954,6 @@ @inproceedings{severiDecomposingLatticeMeaningless2011
 }
 % == BibTeX quality report for severiDecomposingLatticeMeaningless2011:
 % ? Unsure about the formatting of the booktitle
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Springer Link")
 
 @article{sewellX86TSORigorousUsable2010,
@@ -2672,6 +4068,16 @@ @inproceedings{shirahataLinearSetTheory1998
 % == BibTeX quality report for shirahataLinearSetTheory1998:
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@techreport{shiversBottomupVreductionUplinks2004,
+  title = {Bottom-up β-Reduction: Uplinks and λ-{{DAGs}} (Extended Version)},
+  shorttitle = {Bottom-up β-Reduction},
+  author = {Shivers, Olin and Wand, Mitchell},
+  year = {2004},
+  institution = {{Citeseer}}
+}
+% == BibTeX quality report for shiversBottomupVreductionUplinks2004:
+% ? unused Library catalog ("Google Scholar")
+
 @inproceedings{shiVirtualMachineShowdown2005,
   ids = {shiVirtualMachineShowdown,shiVirtualMachineShowdown2008},
   title = {Virtual Machine Showdown: Stack versus Registers},
@@ -2727,8 +4133,29 @@ @phdthesis{shuttFexprsBasisLisp2010
 % ? unused Number of pages ("416")
 % ? unused Type ("PhD Thesis")
 
+@article{siegmundMeasuringModelingProgramming2014,
+  title = {Measuring and Modeling Programming Experience},
+  author = {Siegmund, Janet and Kästner, Christian and Liebig, Jörg and Apel, Sven and Hanenberg, Stefan},
+  year = {2014},
+  month = oct,
+  journal = {Empirical Software Engineering},
+  volume = {19},
+  number = {5},
+  pages = {1299--1334},
+  issn = {1573-7616},
+  doi = {10.1007/s10664-013-9286-4},
+  url = {https://doi.org/10.1007/s10664-013-9286-4},
+  urldate = {2023-02-13},
+  abstract = {Programming experience is an important confounding parameter in controlled experiments regarding program comprehension. In literature, ways to measure or control programming experience vary. Often, researchers neglect it or do not specify how they controlled for it. We set out to find a well-defined understanding of programming experience and a way to measure it. From published comprehension experiments, we extracted questions that assess programming experience. In a controlled experiment, we compare the answers of computer-science students to these questions with their performance in solving program-comprehension tasks. We found that self estimation seems to be a reliable way to measure programming experience. Furthermore, we applied exploratory and confirmatory factor analyses to extract and evaluate a model of programming experience. With our analysis, we initiate a path toward validly and reliably measuring and describing programming experience to better understand and control its influence in program-comprehension experiments.},
+  langid = {english},
+  keywords = {Controlled experiments,Programming experience,Questionnaire}
+}
+% == BibTeX quality report for siegmundMeasuringModelingProgramming2014:
+% ? unused Journal abbreviation ("Empir Software Eng")
+% ? unused Library catalog ("Springer Link")
+
 @inproceedings{simonsenWeakConvergenceUniform2010,
-  title = {Weak {{Convergence}} and {{Uniform Normalization}} in {{Infinitary Rewriting}}},
+  title = {Weak Convergence and Uniform Normalization in Infinitary Rewriting},
   booktitle = {Proceedings of the 21st {{International Conference}} on {{Rewriting Techniques}} and {{Applications}}},
   author = {Simonsen, Jakob Grue},
   editor = {Lynch, Christopher},
@@ -2756,14 +4183,13 @@ @inproceedings{simonsenWeakConvergenceUniform2010
 As further corollaries, we derive a number of new results for weakly convergent rewriting: Systems with finitely many rules enjoy unique normal forms, and acyclic orthogonal systems are confluent. Our results suggest that it may be possible to recover some of the positive results for strongly convergent rewriting in the setting of weak convergence, if systems with finitely many rules are considered. Finally, we give a number of counterexamples showing failure of most of the results when infinite sets of rules are allowed.}
 }
 % == BibTeX quality report for simonsenWeakConvergenceUniform2010:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Artwork size ("14 pages")
 % ? unused Library catalog ("Dagstuhl Research Online Publication Server")
 % ? unused Medium ("application/pdf")
 
 @article{singerFunctionalBabyTalk2018,
-  title = {Functional {{Baby Talk}}: {{Analysis}} of {{Code Fragments}} from {{Novice Haskell Programmers}}},
-  shorttitle = {Functional {{Baby Talk}}},
+  title = {Functional Baby Talk: Analysis of Code Fragments from Novice {{Haskell}} Programmers},
+  shorttitle = {Functional Baby Talk},
   author = {Singer, Jeremy and Archibald, Blair},
   year = {2018},
   month = may,
@@ -2777,10 +4203,26 @@ @article{singerFunctionalBabyTalk2018
   langid = {english}
 }
 % == BibTeX quality report for singerFunctionalBabyTalk2018:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Journal abbreviation ("Electron. Proc. Theor. Comput. Sci.")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@inproceedings{snowdonAccurateRuntimePrediction2007,
+  title = {Accurate Run-Time Prediction of Performance Degradation under Frequency Scaling},
+  booktitle = {2007 Workshop on Operating System Platforms for Embedded Real-Time Applications},
+  author = {Snowdon, David and Van Der Linden, Godfrey and Petters, Stefan and Heiser, Gernot},
+  year = {2007},
+  publisher = {{NICTA}},
+  issn = {1833-9646},
+  doi = {10.26190/unsworks/517},
+  url = {http://hdl.handle.net/1959.4/39905},
+  urldate = {2023-08-07},
+  abstract = {Dynamic voltage and frequency scaling is employed to minimise energy consumption in mobile devices. The energy required to execute a piece of software is highly depedent on its execution time, and devices are typically subject to timeliness or quality-of-service constraints. For both these reasons, the performance at a proposed frequency setpoint must be accurately estimated. The frequently-made assumption that performance scales linearly with core frequency has shown to be incorrect, and better performance models are required which take into account the effects, and frequency setting, of the memory architecture. This paper presents a methodology, based on off-line hardware characterisation and runtime workload characterisation, for the generation of an execution time model. Its evaluation shows that it provides a highly accurate (to within 2\% on average) prediction of performance at arbitrary frequency settings and that the models can be used to implement operating-system level dynamic voltage and frequency scaling schemes for embedded systems.},
+  langid = {english}
+}
+% == BibTeX quality report for snowdonAccurateRuntimePrediction2007:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("unsworks.unsw.edu.au")
+
 @article{snyderEncapsulationInheritanceObjectoriented1986,
   title = {Encapsulation and Inheritance in Object-Oriented Programming Languages},
   author = {Snyder, Alan},
@@ -2802,7 +4244,7 @@ @article{snyderEncapsulationInheritanceObjectoriented1986
 % ? unused Library catalog ("DOI.org (Crossref)")
 
 @inproceedings{sparksSuperstructuralReversibleLogic2014,
-  title = {Superstructural {{Reversible Logic}}},
+  title = {Superstructural Reversible Logic},
   booktitle = {3rd {{International Workshop}} on {{Linearity}}},
   author = {Sparks, Z A and Sabry, Amr},
   year = {2014},
@@ -2811,7 +4253,6 @@ @inproceedings{sparksSuperstructuralReversibleLogic2014
 }
 % == BibTeX quality report for sparksSuperstructuralReversibleLogic2014:
 % ? Unsure about the formatting of the booktitle
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("Zotero")
 
 @article{sperberGenerationLRParsers2000,
@@ -2833,6 +4274,26 @@ @article{sperberGenerationLRParsers2000
 % ? unused Journal abbreviation ("ACM Trans. Program. Lang. Syst.")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@inproceedings{staplesFormalSpecificationsBetter2013,
+  title = {Formal Specifications Better than Function Points for Code Sizing},
+  booktitle = {2013 35th {{International Conference}} on {{Software Engineering}} ({{ICSE}})},
+  author = {Staples, Mark and Kolanski, Rafal and Klein, Gerwin and Lewis, Corey and Andronick, June and Murray, Toby and Jeffery, Ross and Bass, Len},
+  year = {2013},
+  month = may,
+  pages = {1257--1260},
+  publisher = {{IEEE}},
+  address = {{San Francisco, CA, USA}},
+  doi = {10.1109/ICSE.2013.6606692},
+  url = {http://ieeexplore.ieee.org/document/6606692/},
+  urldate = {2023-12-27},
+  abstract = {Size and effort estimation is a significant challenge for the management of large-scale formal verification projects. We report on an initial study of relationships between the sizes of artefacts from the development of seL4, a formally-verified embedded systems microkernel. For each API function we first determined its COSMIC Function Point (CFP) count (based on the seL4 user manual), then sliced the formal specifications and source code, and performed a normalised line count on these artefact slices. We found strong and significant relationships between the sizes of the artefact slices, but no significant relationships between them and the CFP counts. Our finding that CFP is poorly correlated with lines of code is based on just one system, but is largely consistent with prior literature. We find CFP is also poorly correlated with the size of formal specifications. Nonetheless, lines of formal specification correlate with lines of source code, and this may provide a basis for size prediction in future formal verification projects. In future work we will investigate proof sizing.},
+  isbn = {978-1-4673-3076-3 978-1-4673-3073-2},
+  langid = {english}
+}
+% == BibTeX quality report for staplesFormalSpecificationsBetter2013:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @inproceedings{stefikEmpiricalComparisonAccuracy2011,
   title = {An Empirical Comparison of the Accuracy Rates of Novices Using the Quorum, Perl, and Randomo Programming Languages},
   booktitle = {Proceedings of the 3rd {{ACM SIGPLAN}} Workshop on {{Evaluation}} and Usability of Programming Languages and Tools - {{PLATEAU}} '11},
@@ -2853,6 +4314,26 @@ @inproceedings{stefikEmpiricalComparisonAccuracy2011
 % ? unused Conference name ("the 3rd ACM SIGPLAN workshop")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@article{stefikEmpiricalInvestigationProgramming2013,
+  title = {An Empirical Investigation into Programming Language Syntax},
+  author = {Stefik, Andreas and Siebert, Susanna},
+  year = {2013},
+  month = nov,
+  journal = {ACM Transactions on Computing Education},
+  volume = {13},
+  number = {4},
+  pages = {1--40},
+  issn = {1946-6226, 1946-6226},
+  doi = {10.1145/2534973},
+  url = {https://dl.acm.org/doi/10.1145/2534973},
+  urldate = {2021-02-13},
+  langid = {english},
+  keywords = {Novice Programmers,Programming Languages,Syntax}
+}
+% == BibTeX quality report for stefikEmpiricalInvestigationProgramming2013:
+% ? unused Journal abbreviation ("ACM Trans. Comput. Educ.")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
 @inproceedings{stentIntelligibilityFastSynthesized2011,
   title = {On the Intelligibility of Fast Synthesized Speech for Individuals with Early-Onset Blindness},
   booktitle = {The Proceedings of the 13th International {{ACM SIGACCESS}} Conference on {{Computers}} and Accessibility - {{ASSETS}} '11},
@@ -2873,8 +4354,29 @@ @inproceedings{stentIntelligibilityFastSynthesized2011
 % ? unused Conference name ("The proceedings of the 13th international ACM SIGACCESS conference")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
-@phdthesis{strakaFunctionalDataStuctures2013,
-  title = {Functional {{Data Stuctures}} and {{Algorithms}}},
+@article{stirlingDecidabilityHigherorderMatching2009,
+  title = {Decidability of Higher-Order Matching},
+  author = {Stirling, Colin},
+  editor = {Sassone, Vladimiro},
+  year = {2009},
+  month = jul,
+  journal = {Logical Methods in Computer Science},
+  volume = {5},
+  number = {3},
+  pages = {2},
+  issn = {18605974},
+  doi = {10.2168/LMCS-5(3:2)2009},
+  url = {https://lmcs.episciences.org/757},
+  urldate = {2021-11-15},
+  abstract = {We show that the higher-order matching problem is decidable using a gametheoretic argument.},
+  langid = {english}
+}
+% == BibTeX quality report for stirlingDecidabilityHigherorderMatching2009:
+% ? unused Journal abbreviation ("Log.Meth.Comput.Sci.")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@phdthesis{strakaFunctionalDataStructures2013,
+  title = {Functional Data Structures and Algorithms},
   author = {Straka, Milan},
   year = {2013},
   month = sep,
@@ -2885,8 +4387,7 @@ @phdthesis{strakaFunctionalDataStuctures2013
   school = {Computer Science Institute of Charles University},
   annotation = {Accepted: 2018-11-30T13:00:49Z}
 }
-% == BibTeX quality report for strakaFunctionalDataStuctures2013:
-% ? Title looks like it was stored in title-case in Zotero
+% == BibTeX quality report for strakaFunctionalDataStructures2013:
 % ? unused Library catalog ("dspace.cuni.cz")
 
 @article{strassburgerDeepInferenceExpansion2019,
@@ -2909,8 +4410,50 @@ @article{strassburgerDeepInferenceExpansion2019
 % ? unused Journal abbreviation ("Math. Struct. Comp. Sci.")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@inproceedings{stroustrupWhyNotJust1995,
+  title = {Why {{C}}++ Is Not Just an Object-Oriented Programming Language},
+  booktitle = {Addendum to the Proceedings of the 10th Annual Conference on {{Object-oriented}} Programming Systems, Languages, and Applications},
+  author = {Stroustrup, Bjarne},
+  year = {1995},
+  month = oct,
+  series = {{{OOPSLA}} '95},
+  pages = {1--13},
+  publisher = {{Association for Computing Machinery}},
+  address = {{New York, NY, USA}},
+  doi = {10.1145/260094.260207},
+  url = {https://www.stroustrup.com/oopsla.pdf},
+  urldate = {2023-07-22},
+  isbn = {978-0-89791-721-6}
+}
+% == BibTeX quality report for stroustrupWhyNotJust1995:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("ACM Digital Library")
+
+@article{suhEMPExecutionTime2017,
+  ids = {suhEMPExecutionTime2017a},
+  title = {{{EMP}}: Execution Time Measurement Protocol for Compute-Bound Programs},
+  shorttitle = {{{EMP}}},
+  author = {Suh, Young-Kyoon and Snodgrass, Richard T. and Kececioglu, John D. and Downey, Peter J. and Maier, Robert S. and Yi, Cheng},
+  year = {2017},
+  journal = {Software: Practice and Experience},
+  volume = {47},
+  number = {4},
+  pages = {559--597},
+  issn = {1097-024X},
+  doi = {10.1002/spe.2476},
+  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/spe.2476},
+  urldate = {2023-08-07},
+  abstract = {Measuring execution time is one of the most used performance evaluation techniques in computer science research. Inaccurate measurements cannot be used for a fair performance comparison between programs. Despite the prevalence of its use, the intrinsic variability in the time measurement makes it hard to obtain repeatable and accurate timing results of a program running on an operating system. We propose a novel execution time measurement protocol (termed EMP) for measuring the execution time of a compute-bound program on Linux, while minimizing that measurement's variability. During the development of execution time measurement protocol, we identified several factors that disturb execution time measurement. We introduce successive refinements to the protocol by addressing each of these factors, in concert, reducing variability by more than an order of magnitude. We also introduce a new visualization technique, what we term ‘dual-execution scatter plot’ that highlights infrequent, long-running daemons, differentiating them from frequent and/or short-running daemons. Our empirical results show that the proposed protocol successfully achieves three major aspects—precision, accuracy, and scalability—in execution time measurement that can work for open-source and proprietary software. Copyright © 2017 John Wiley \& Sons, Ltd.},
+  copyright = {Copyright © 2017 John Wiley \& Sons, Ltd.},
+  langid = {english},
+  keywords = {compute-bound programs,execution time,measurement,protocol}
+}
+% == BibTeX quality report for suhEMPExecutionTime2017:
+% ? unused extra: _eprint ("https://onlinelibrary.wiley.com/doi/pdf/10.1002/spe.2476")
+% ? unused Library catalog ("Wiley Online Library")
+
 @article{sutterFamilyLanguages2000,
-  title = {The {{C Family}} of {{Languages}}},
+  title = {The {{C}} Family of Languages},
   author = {Sutter, Herb},
   year = {2000},
   month = jul,
@@ -2921,8 +4464,29 @@ @article{sutterFamilyLanguages2000
   urldate = {2021-10-28},
   collaborator = {Ritchie, Dennis and Stroustrup, Bjarne and Gosling, James}
 }
-% == BibTeX quality report for sutterFamilyLanguages2000:
-% ? Title looks like it was stored in title-case in Zotero
+
+@incollection{swierstraCombinatorParsingShort2009,
+  title = {Combinator Parsing: A Short Tutorial},
+  shorttitle = {Combinator Parsing},
+  booktitle = {Language {{Engineering}} and {{Rigorous Software Development}}: {{International LerNet ALFA Summer School}} 2008, {{Piriapolis}}, {{Uruguay}}, {{February}} 24 - {{March}} 1, 2008, {{Revised Tutorial Lectures}}},
+  author = {Swierstra, S. Doaitse},
+  editor = {Bove, Ana and Barbosa, Luís Soares and Pardo, Alberto and Pinto, Jorge Sousa},
+  year = {2009},
+  series = {Lecture {{Notes}} in {{Computer Science}}},
+  volume = {5520},
+  pages = {252--300},
+  publisher = {{Springer}},
+  address = {{Berlin, Heidelberg}},
+  doi = {10.1007/978-3-642-03153-3_6},
+  url = {http://www.cs.uu.nl/research/techreps/repo/CS-2008/2008-044.pdf},
+  urldate = {2023-05-18},
+  abstract = {There are numerous ways to implement a parser for a given syntax; using parser combinators is a powerful approach to parsing which derives much of its power and expressiveness from the type system and semantics of the host programming language. This tutorial begins with the construction of a small library of parsing combinators. This library introduces the basics of combinator parsing and, more generally, demonstrates how domain specific embedded languages are able to leverage the facilities of the host language. After having constructed our small combinator library, we investigate some shortcomings of the naïve implementation introduced in the first part, and incrementally develop an implementation without these problems. Finally we discuss some further extensions of the presented library and compare our approach with similar libraries.},
+  isbn = {978-3-642-03152-6 978-3-642-03153-3},
+  langid = {english},
+  keywords = {Attribute Grammar,Error Message,Pocket Calculator,Sequential Composition,Terminal Symbol}
+}
+% == BibTeX quality report for swierstraCombinatorParsingShort2009:
+% ? unused Library catalog ("Springer Link")
 
 @article{taivalsaariNotionInheritance1996,
   title = {On the Notion of Inheritance},
@@ -2946,7 +4510,7 @@ @article{taivalsaariNotionInheritance1996
 
 @incollection{temperoWhatProgrammersInheritance2013,
   ids = {temperoWhatProgrammersInheritance2013a,temperoWhatProgrammersInheritance2013b},
-  title = {What {{Programmers Do}} with {{Inheritance}} in {{Java}}},
+  title = {What Programmers Do with Inheritance in {{Java}}},
   booktitle = {{{ECOOP}} 2013 – {{Object-Oriented Programming}}},
   author = {Tempero, Ewan and Yang, Hong Yul and Noble, James},
   editor = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard and Castagna, Giuseppe},
@@ -2963,7 +4527,6 @@ @incollection{temperoWhatProgrammersInheritance2013
   langid = {english}
 }
 % == BibTeX quality report for temperoWhatProgrammersInheritance2013:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("DOI.org (Crossref)")
 % ? unused Series title ("Lecture Notes in Computer Science")
 
@@ -2990,8 +4553,85 @@ @article{teneC4ContinuouslyConcurrent2011
 % ? unused Journal abbreviation ("SIGPLAN Not.")
 % ? unused Library catalog ("November 2011")
 
+@inproceedings{tewDevelopingValidatedAssessment2010,
+  title = {Developing a Validated Assessment of Fundamental {{CS1}} Concepts},
+  booktitle = {Proceedings of the 41st {{ACM}} Technical Symposium on {{Computer}} Science Education},
+  author = {Tew, Allison Elliott and Guzdial, Mark},
+  year = {2010},
+  month = mar,
+  pages = {97--101},
+  publisher = {{ACM}},
+  address = {{Milwaukee Wisconsin USA}},
+  doi = {10.1145/1734263.1734297},
+  url = {https://dl.acm.org/doi/10.1145/1734263.1734297},
+  urldate = {2023-02-13},
+  abstract = {Previous studies of student programming ability have raised questions about students’ ability to problem solve, read and analyze code, and understand introductory computing concepts. However, it is unclear whether these results are the product of failures of student comprehension or our inability to accurately measure their performance. We propose a method for creating a language independent CS1 assessment instrument and present the results of our analysis used to define the common conceptual content that will serve as the framework for the exam. We conclude with a discussion of future work and our progress towards developing the assessment.},
+  isbn = {978-1-4503-0006-3},
+  langid = {english}
+}
+% == BibTeX quality report for tewDevelopingValidatedAssessment2010:
+% ? Unsure about the formatting of the booktitle
+% ? unused Conference name ("SIGCSE10: The 41st ACM Technical Symposium on Computer Science Education")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@inproceedings{tewFCS1LanguageIndependent2011,
+  title = {The {{FCS1}}: A Language Independent Assessment of {{CS1}} Knowledge},
+  shorttitle = {The {{FCS1}}},
+  booktitle = {Proceedings of the 42nd {{ACM}} Technical Symposium on {{Computer}} Science Education},
+  author = {Tew, Allison Elliott and Guzdial, Mark},
+  year = {2011},
+  month = mar,
+  pages = {111--116},
+  publisher = {{ACM}},
+  address = {{Dallas TX USA}},
+  doi = {10.1145/1953163.1953200},
+  url = {https://dl.acm.org/doi/10.1145/1953163.1953200},
+  urldate = {2023-02-13},
+  abstract = {A primary goal of many CS education projects is to determine the extent to which a given intervention has had an impact on student learning. However, computing lacks valid assessments for pedagogical or research purposes. Without such valid assessments, it is difficult to accurately measure student learning or establish a relationship between the instructional setting and learning outcomes.},
+  isbn = {978-1-4503-0500-6},
+  langid = {english}
+}
+% == BibTeX quality report for tewFCS1LanguageIndependent2011:
+% ? Unsure about the formatting of the booktitle
+% ? unused Conference name ("SIGCSE '11: The 42nd ACM Technical Symposium on Computer Science Education")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@book{tomitaEfficientParsingNatural1986,
+  ids = {tomitaEfficientParsingNatural1986a},
+  title = {Efficient Parsing for Natural Language},
+  author = {Tomita, Masaru},
+  year = {1986},
+  publisher = {{Springer US}},
+  address = {{Boston, MA}},
+  doi = {10.1007/978-1-4757-1885-0},
+  url = {http://link.springer.com/10.1007/978-1-4757-1885-0},
+  urldate = {2020-06-15},
+  isbn = {978-1-4419-5198-4 978-1-4757-1885-0},
+  langid = {english},
+  keywords = {algorithms,cognition,expert system,grammar,machine translation,natural language,natural language processing,Parsing,proving,speech recognition}
+}
+% == BibTeX quality report for tomitaEfficientParsingNatural1986:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@book{tremblayTheoryPracticeCompiler1985,
+  title = {The {{Theory}} and {{Practice}} of {{Compiler Writing}}},
+  author = {Tremblay, Jean-Paul and Sorenson, Paul G.},
+  year = {1985},
+  month = jan,
+  edition = {First Edition},
+  publisher = {{McGraw-Hill College}},
+  address = {{New York}},
+  abstract = {Hardcover},
+  isbn = {978-0-07-065161-6},
+  langid = {english}
+}
+% == BibTeX quality report for tremblayTheoryPracticeCompiler1985:
+% ? Title looks like it was stored in title-case in Zotero
+% ? unused Library catalog ("Amazon")
+% ? unused Number of pages ("816")
+
 @article{trinderAlgorithmStrategyParallelism1998,
-  title = {Algorithm + {{Strategy}} = {{Parallelism}}},
+  title = {Algorithm + Strategy = Parallelism},
   author = {Trinder, P. W. and Hammond, K. and Loidl, H.-W. and Jones, Simon Peyton},
   year = {1998},
   month = jan,
@@ -3003,9 +4643,36 @@ @article{trinderAlgorithmStrategyParallelism1998
   langid = {american}
 }
 % == BibTeX quality report for trinderAlgorithmStrategyParallelism1998:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("www.microsoft.com")
 
+@article{uustaluComonadicNotionsComputation2008,
+  title = {Comonadic Notions of Computation},
+  author = {Uustalu, Tarmo and Vene, Varmo},
+  year = {2008},
+  month = jun,
+  journal = {Electronic Notes in Theoretical Computer Science},
+  volume = {203},
+  number = {5},
+  pages = {263--284},
+  issn = {15710661},
+  doi = {10.1016/j.entcs.2008.05.029},
+  url = {https://linkinghub.elsevier.com/retrieve/pii/S1571066108003435},
+  urldate = {2023-01-17},
+  abstract = {We argue that symmetric (semi)monoidal comonads provide a means to structure context-dependent notions of computation such as notions of dataflow computation (computation on streams) and of tree relabelling as in attribute evaluation. We propose a generic semantics for extensions of simply typed lambda calculus with context-dependent operations analogous to the Moggi-style semantics for effectful languages based on strong monads. This continues the work in the early 90s by Brookes, Geva and Van Stone on the use of computational comonads in intensional semantics.},
+  langid = {english}
+}
+% == BibTeX quality report for uustaluComonadicNotionsComputation2008:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@phdthesis{vanoostromConfluenceAbstractHigherorder1994,
+  title = {Confluence for Abstract and Higher-Order Rewriting},
+  author = {{van Oostrom}, V.},
+  year = {1994},
+  school = {Vrije Universiteit Amsterdam}
+}
+% == BibTeX quality report for vanoostromConfluenceAbstractHigherorder1994:
+% ? unused Type ("PhD")
+
 @article{venturinizilliReductionGraphsLambda1984,
   ids = {venturiniReductionGraphsLambda1983},
   title = {Reduction Graphs in the Lambda Calculus},
@@ -3026,8 +4693,22 @@ @article{venturinizilliReductionGraphsLambda1984
 % == BibTeX quality report for venturinizilliReductionGraphsLambda1984:
 % ? unused Library catalog ("ScienceDirect")
 
+@book{visserSyntaxDefinitionLanguage1997,
+  title = {Syntax Definition for Language Prototyping},
+  author = {Visser, Eelco},
+  year = {1997},
+  publisher = {{University}},
+  address = {{Amsterdam}},
+  isbn = {978-90-74795-75-3},
+  langid = {english},
+  note = {Zugl.: Amsterdam, Univ., Diss., 1997}
+}
+% == BibTeX quality report for visserSyntaxDefinitionLanguage1997:
+% ? unused Library catalog ("K10plus ISBN")
+% ? unused Number of pages ("383")
+
 @incollection{voigtlanderAsymptoticImprovementComputations2008,
-  title = {Asymptotic {{Improvement}} of {{Computations}} over {{Free Monads}}},
+  title = {Asymptotic Improvement of Computations over Free Monads},
   booktitle = {Mathematics of {{Program Construction}}},
   author = {Voigtländer, Janis},
   editor = {Audebaud, Philippe and {Paulin-Mohring}, Christine},
@@ -3045,12 +4726,11 @@ @incollection{voigtlanderAsymptoticImprovementComputations2008
   langid = {english}
 }
 % == BibTeX quality report for voigtlanderAsymptoticImprovementComputations2008:
-% ? Title looks like it was stored in title-case in Zotero
 % ? unused Library catalog ("DOI.org (Crossref)")
 % ? unused Series title ("Lecture Notes in Computer Science")
 
-@article{vukmirovicEfficientFullHigherOrder2021,
-  title = {Efficient {{Full Higher-Order Unification}}},
+@article{vukmirovicEfficientFullHigherorder2021,
+  title = {Efficient Full Higher-Order Unification},
   author = {Vukmirović, Petar and Bentkamp, Alexander and Nummelin, Visa},
   year = {2021},
   month = dec,
@@ -3062,8 +4742,7 @@ @article{vukmirovicEfficientFullHigherOrder2021
   urldate = {2022-06-14},
   abstract = {We developed a procedure to enumerate complete sets of higher-order unifiers based on work by Jensen and Pietrzykowski. Our procedure removes many redundant unifiers by carefully restricting the search space and tightly integrating decision procedures for fragments that admit a finite complete set of unifiers. We identify a new such fragment and describe a procedure for computing its unifiers. Our unification procedure, together with new higher-order term indexing data structures, is implemented in the Zipperposition theorem prover. Experimental evaluation shows a clear advantage over Jensen and Pietrzykowski's procedure.}
 }
-% == BibTeX quality report for vukmirovicEfficientFullHigherOrder2021:
-% ? Title looks like it was stored in title-case in Zotero
+% == BibTeX quality report for vukmirovicEfficientFullHigherorder2021:
 % ? unused Library catalog ("lmcs.episciences.org")
 
 @inproceedings{wadlerCallbyvalueDualCallbyname2003,
@@ -3128,6 +4807,73 @@ @inproceedings{wadlerHowMakeAdhoc1989
 % ? unused Conference name ("the 16th ACM SIGPLAN-SIGACT symposium")
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@article{walkerFontTuningReview2008,
+  title = {Font Tuning: {{A}} Review and New Experimental Evidence},
+  shorttitle = {Font Tuning},
+  author = {Walker, Peter},
+  year = {2008},
+  month = nov,
+  journal = {Visual Cognition},
+  volume = {16},
+  number = {8},
+  pages = {1022--1058},
+  issn = {1350-6285, 1464-0716},
+  doi = {10.1080/13506280701535924},
+  url = {http://www.tandfonline.com/doi/abs/10.1080/13506280701535924},
+  urldate = {2023-01-13},
+  abstract = {This paper reflects on the kinds of evidence able to confirm that letter and word identification in reading can be supported by encoding the underlying visual structure of the text, and specifically by deriving structural descriptions for letters. It is proposed that structure-driven processes are intimately linked to the implementation of font-specific rules for translating visual features into elements of a letter’s structural description. Evidence for such font tuning comes from studies exploring the impact of font-mixing on reading fluency, and from studies showing how the benefits of experience with a novel typeface can generalise to letters that have yet to be seen in the typeface. After reviewing this evidence, three new experiments are reported which explore font tuning in the context of the lexical decision task. The time course of font tuning, which is monitored by changing the time interval between successive test stimuli, is shown to be sensitive to the overall probability with which successive stimuli appear in the same typeface. In addition, font tuning is shown to reflect item-by-item fluctuations in this probability. Finally, the effects of font-switching are shown to generalise beyond the particular letters present in the text, and to be confined to 1-back transitions. It is concluded that font tuning reflects the implementation of a set of font-specific translation rules held in working memory, and is moderated by the reader’s implicit knowledge of the constraints present in the sequencing of successive portions of text.},
+  langid = {english}
+}
+% == BibTeX quality report for walkerFontTuningReview2008:
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@article{walstonMethodProgrammingMeasurement1977,
+  title = {A Method of Programming Measurement and Estimation},
+  author = {Walston, C. E. and Felix, C. P.},
+  year = {1977},
+  journal = {IBM Systems Journal},
+  volume = {16},
+  number = {1},
+  pages = {54--73},
+  issn = {0018-8670},
+  doi = {10.1147/sj.161.0054},
+  url = {http://ieeexplore.ieee.org/document/5388069/},
+  urldate = {2023-12-30},
+  langid = {english}
+}
+% == BibTeX quality report for walstonMethodProgrammingMeasurement1977:
+% ? unused Journal abbreviation ("IBM Syst. J.")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@misc{wankadiaRedgrepRegularExpression2013,
+  title = {Redgrep: From Regular Expression Derivatives to {{LLVM}}},
+  author = {Wankadia, Paul},
+  year = {2013},
+  month = mar,
+  address = {{Linux.conf.au 2013}},
+  url = {https://www.youtube.com/watch?v=ZJOgDovsIsg},
+  urldate = {2021-02-09}
+}
+% == BibTeX quality report for wankadiaRedgrepRegularExpression2013:
+% ? unused Library catalog ("YouTube")
+% ? unused Running time ("36:25")
+
+@inproceedings{warthWorldsControllingScope2011,
+  title = {Worlds: Controlling the Scope of Side Effects},
+  shorttitle = {Worlds},
+  author = {Warth, Alessandro and Ohshima, Yoshiki and Kaehler, Ted and Kay, Alan},
+  year = {2011},
+  month = jul,
+  volume = {6813},
+  pages = {179--203},
+  doi = {10.1007/978-3-642-22655-7_9},
+  abstract = {The state of an imperative program—e.g., the values stored in global and local variables, arrays, and objects’ instance variables—changes as its statements are executed. These changes, or side effects, are visible globally: when one part of the program modifies an object, every other part that holds a reference to the same object (either directly or indirectly) is also affected. This paper introduces worlds, a language construct that reifies the notion of program state and enables programmers to control the scope of side effects. We investigate this idea by extending both JavaScript and Squeak Smalltalk with support for worlds, provide examples of some of the interesting idioms this construct makes possible, and formalize the semantics of property/field lookup in the presence of worlds. We also describe an efficient implementation strategy (used in our Squeak-based prototype), and illustrate the practical benefits of worlds with two case studies.},
+  isbn = {978-3-642-22654-0}
+}
+% == BibTeX quality report for warthWorldsControllingScope2011:
+% Missing required field 'booktitle'
+% ? unused Library catalog ("ResearchGate")
+
 @article{weijlandSemanticsLogicPrograms1990,
   title = {Semantics for Logic Programs without Occur Check},
   author = {Weijland, W.P.},
@@ -3146,6 +4892,26 @@ @article{weijlandSemanticsLogicPrograms1990
 % == BibTeX quality report for weijlandSemanticsLogicPrograms1990:
 % ? unused Library catalog ("DOI.org (Crossref)")
 
+@article{weissOxideEssenceRust2019,
+  title = {Oxide: The Essence of {{Rust}}},
+  shorttitle = {Oxide},
+  author = {Weiss, Aaron and Patterson, Daniel and Matsakis, Nicholas D. and Ahmed, Amal},
+  year = {2019},
+  month = mar,
+  journal = {arXiv:1903.00982 [cs]},
+  eprint = {1903.00982},
+  primaryclass = {cs},
+  url = {http://arxiv.org/abs/1903.00982},
+  urldate = {2020-05-17},
+  abstract = {Rust is a major advancement in industrial programming languages due in large part to its success in bridging the gap between low-level systems programming and high-level application programming. This success has ultimately empowered programmers to more easily build reliable and efficient software, and at its heart lies a novel approach to ownership that balances type system expressivity with usability. In this work, we set out to capture the essence of this model of ownership by developing a type systems account of Rust's borrow checker. To that end, we present Oxide, a formalized programming language close to source-level Rust (but with fully-annotated types). This presentation takes a new view of lifetimes as approximate provenances of references, and our type system is able to automatically compute this information through a flow-sensitive substructural typing judgment for which we prove syntactic type safety using progress and preservation. The result is a simpler formulation of borrow checking - including recent features such as non-lexical lifetimes - that we hope researchers will be able to use as the basis for work on Rust.},
+  archiveprefix = {arxiv},
+  langid = {english},
+  keywords = {Computer Science - Programming Languages},
+  note = {Comment: In submission}
+}
+% == BibTeX quality report for weissOxideEssenceRust2019:
+% ? Possibly abbreviated journal title arXiv:1903.00982 [cs]
+
 @article{wheelerFullyCounteringTrusting2010,
   title = {Fully Countering Trusting Trust through Diverse Double-Compiling},
   author = {Wheeler, David A.},
@@ -3192,6 +4958,68 @@ @article{wikipediaLogit2020
   annotation = {Page Version ID: 964387041}
 }
 
+@inproceedings{yangDeadStoreElimination2017,
+  title = {Dead Store Elimination (Still) Considered Harmful},
+  booktitle = {26th {{USENIX Security Symposium}}},
+  author = {Yang, Zhaomo and Johannesmeyer, Brian and Olesen, Anders Trier and Lerner, Sorin and Levchenko, Kirill},
+  year = {2017},
+  month = aug,
+  address = {{Vancouver, BC, Canada}},
+  abstract = {Dead store elimination is a widely used compiler optimization that reduces code size and improves performance. However, it can also remove seemingly useless memory writes that the programmer intended to clear sensitive data after its last use. Security-savvy developers have long been aware of this phenomenon and have devised ways to prevent the compiler from eliminating these data scrubbing operations.},
+  isbn = {978-1-931971-40-9},
+  langid = {english}
+}
+% == BibTeX quality report for yangDeadStoreElimination2017:
+% ? Unsure about the formatting of the booktitle
+% ? unused Library catalog ("Zotero")
+
+@article{yaofeichenEmpiricalStudyProgramming2005,
+  title = {An Empirical Study of Programming Language Trends},
+  author = {{Yaofei Chen} and Dios, R. and Mili, A. and {Lan Wu} and {Kefei Wang}},
+  year = {2005},
+  month = may,
+  journal = {IEEE Software},
+  volume = {22},
+  number = {3},
+  pages = {72--78},
+  issn = {0740-7459},
+  doi = {10.1109/MS.2005.55},
+  url = {http://ieeexplore.ieee.org/document/1438333/},
+  urldate = {2023-06-17},
+  langid = {english}
+}
+% == BibTeX quality report for yaofeichenEmpiricalStudyProgramming2005:
+% ? unused Journal abbreviation ("IEEE Softw.")
+% ? unused Library catalog ("DOI.org (Crossref)")
+
+@inproceedings{yodaikenHowISOBecame2021,
+  title = {How {{ISO C}} Became Unusable for Operating Systems Development},
+  booktitle = {Proceedings of the 11th {{Workshop}} on {{Programming Languages}} and {{Operating Systems}}},
+  author = {Yodaiken, Victor},
+  year = {2021},
+  month = oct,
+  eprint = {2201.07845},
+  primaryclass = {cs},
+  pages = {84--90},
+  doi = {10.1145/3477113.3487274},
+  url = {http://arxiv.org/abs/2201.07845},
+  urldate = {2022-10-06},
+  abstract = {The C programming language was developed in the 1970s as a fairly unconventional systems and operating systems development tool, but has, through the course of the ISO Standards process, added many attributes of more conventional programming languages and become less suitable for operating systems development. Operating system programming continues to be done in non-ISO dialects of C. The differences provide a glimpse of operating system requirements for programming languages.},
+  archiveprefix = {arxiv},
+  langid = {english},
+  keywords = {Computer Science - Operating Systems,Computer Science - Programming Languages,D.3,D.4,{d.4, d.3}},
+  note = {Comment: PLOS '21: Proceedings of the 11th Workshop on Programming Languages and Operating Systems October 2021}
+}
+
+@misc{zeiglerComparingDevelopmentCosts1995,
+  title = {Comparing Development Costs of {{C}} and {{Ada}}},
+  author = {Zeigler, Stephen F.},
+  year = {1995},
+  month = mar,
+  url = {http://archive.adaic.com/intro/ada-vs-c/cada_art.html},
+  urldate = {2023-12-29}
+}
+
 @phdthesis{zeilbergerLogicalBasisEvaluation2009,
   title = {The Logical Basis of Evaluation Order and Pattern-Matching},
   author = {Zeilberger, Noam},