commit 801fb5c2ef7995480287cf22a8865614edccc96e
parent 9c250a0d8c09be3f8f019b953a415f8c0f884d6f
Author: Wolfgang Corcoran-Mathe <wcm@sigwinch.xyz>
Date: Mon, 5 Feb 2024 19:49:17 -0500
Formal syntax: Texify, part 1.
Diffstat:
2 files changed, 160 insertions(+), 104 deletions(-)
diff --git a/doc/r7rs-small/formal-syntax.texinfo b/doc/r7rs-small/formal-syntax.texinfo
@@ -1,15 +1,19 @@
@node Formal syntax
@section Formal syntax
-This section provides a formal syntax for Scheme written in an extended BNF.
+This section provides a formal syntax for Scheme written in an extended
+BNF.
-All spaces in the grammar are for legibility. Case is not significant except in the definitions
-of @svar{letter}, @svar{character name} and @svar{mnemonic escape}; for example, #x1A and #X1a are
-equivalent, but foo and Foo and #\space and #\Space are distinct. @svar{empty} stands for the
-empty string.
+All spaces in the grammar are for legibility. Case is not significant
+except in the definitions of @svar{letter}, @svar{character name} and @svar{mnemonic escape}; for example, @code{#x1A}
+and @code{#X1a} are equivalent, but @code{foo} and @code{Foo}
+and @code{#\space} and @code{#\Space} are distinct.
+@svar{empty} stands for the empty string.
-The following extensions to BNF are used to make the description more concise: @svar{thing}*
-means zero or more occurrences of @svar{thing}; and @svar{thing}+ means at least one @svar{thing}.
+The following extensions to BNF are used to make the description more
+concise: @arbno{@svar{thing}} means zero or more occurrences of
+@svar{thing}; and @atleastone{@svar{thing}} means at least one
+@svar{thing}.
@menu
* Lexical structure::
@@ -24,108 +28,148 @@ means zero or more occurrences of @svar{thing}; and @svar{thing}+ means at least
@node Lexical structure
@subsection Lexical structure
-This section describes how individual tokens(identifiers, numbers, etc.) are formed from
-sequences of characters. The following sections describe how expressions and programs
-are formed from sequences of tokens.
-
-@svar{Intertoken space} can occur on either side of any token, but not within a token.
-
-Identifiers that do not begin with a vertical line are terminated by a @svar{delimiter} or by the
-end of the input. So are dot, numbers, characters, and booleans. Identifiers that begin
-with a vertical line are terminated by another vertical line.
-
-The following four characters from the ASCII repertoire are reserved for future
-extensions to the language: [ ] @{ @}
-
-In addition to the identifier characters of the ASCII repertoire specified below, Scheme
-implementations may permit any additional repertoire of non-ASCII Unicode characters to
-be employed in identifiers, provided that each such character has a Unicode general
-category of Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pd, Pc, Po, Sc, Sm, Sk, So, or Co, or is
-U+200C or U+200D (the zero-width non-joiner and joiner, respectively, which are needed
-for correct spelling in Persian, Hindi, and other languages). However, it is an error for the
-first character to have a general category of Nd, Mc, or Me. It is also an error to use a
-non-Unicode character in symbols or identifiers.
-
-All Scheme implementations must permit the escape sequence \x@svar{hexdigits}; to appear in
-Scheme identifiers that are enclosed in vertical lines. If the character with the given
-Unicode scalar value is supported by the implementation, identifiers containing such a
-sequence are equivalent to identifiers containing the corresponding character.
-
-@svar{token} ⟶@svar{identifier} ∣@svar{boolean} ∣@svar{number} ∣@svar{character} ∣@svar{string}
-∣( ∣) ∣#( ∣#u8( ∣' ∣` ∣, ∣,@ ∣.
-@svar{delimiter} ⟶@svar{whitespace} ∣@svar{vertical line}
-∣( ∣) ∣" ∣;
-@svar{intraline whitespace} ⟶@svar{space or tab}
-@svar{whitespace} ⟶@svar{intraline whitespace} ∣@svar{line ending}
-@svar{vertical line} ⟶|
-@svar{line ending} ⟶@svar{newline} ∣@svar{return} @svar{newline}
-∣@svar{return}
-@svar{comment} ⟶; ⟨all subsequent characters up to a
- line ending⟩ ∣@svar{nested comment}
-∣#; @svar{intertoken space} @svar{datum}
-@svar{nested comment} ⟶#| @svar{comment text}
-@svar{comment cont}* |#
-@svar{comment text} ⟶⟨character sequence not containing
- #| or |#⟩
-@svar{comment cont} ⟶@svar{nested comment} @svar{comment text}
-@svar{directive} ⟶#!fold-case ∣#!no-fold-case Note that it is ungrammatical to follow a
-@svar{directive} with anything but a @svar{delimiter} or the end of file.
-
-@svar{atmosphere} ⟶@svar{whitespace} ∣@svar{comment} ∣@svar{directive}
-@svar{intertoken space} ⟶@svar{atmosphere}*
-
-Note that +i, -i and @svar{infnan} below are exceptions to the @svar{peculiar identifier} rule; they are
-parsed as numbers, not identifiers.
-
-@svar{identifier} ⟶@svar{initial} @svar{subsequent}*
-∣@svar{vertical line} @svar{symbol element}* @svar{vertical line}
-∣@svar{peculiar identifier}
-@svar{initial} ⟶@svar{letter} ∣@svar{special initial}
-@svar{letter} ⟶a ∣b ∣c ∣... ∣z
-∣A ∣B ∣C ∣... ∣Z
-@svar{special initial} ⟶! ∣$ ∣% ∣& ∣* ∣/ ∣: ∣< ∣=
-∣> ∣? ∣@ ∣^ ∣_ ∣~
-@svar{subsequent} ⟶@svar{initial} ∣@svar{digit}
-∣@svar{special subsequent}
-@svar{digit} ⟶0 ∣1 ∣2 ∣3 ∣4 ∣5 ∣6 ∣7 ∣8 ∣9
-@svar{hex digit} ⟶@svar{digit} ∣a ∣b ∣c ∣d ∣e ∣f
-@svar{explicit sign} ⟶+ ∣-
-@svar{special subsequent} ⟶@svar{explicit sign} ∣. ∣@
-@svar{inline hex escape} ⟶\x@svar{hex scalar value};
-@svar{hex scalar value} ⟶@svar{hex digit}+
-@svar{mnemonic escape} ⟶\a ∣\b ∣\t ∣\n ∣\r
-@svar{peculiar identifier} ⟶@svar{explicit sign}
-∣@svar{explicit sign} @svar{sign subsequent} @svar{subsequent}*
-∣@svar{explicit sign} . @svar{dot subsequent} @svar{subsequent}*
-∣. @svar{dot subsequent} @svar{subsequent}*
-@svar{dot subsequent} ⟶@svar{sign subsequent} ∣.
-@svar{sign subsequent} ⟶@svar{initial} ∣@svar{explicit sign} ∣@
-@svar{symbol element} ⟶ @svar{any character other than <vertical line} or \>
-∣@svar{inline hex escape} ∣@svar{mnemonic escape} ∣\|
-@svar{boolean} ⟶#t ∣#f ∣#true ∣#false
-
-@svar{character} ⟶#\ @svar{any character}
-∣#\ @svar{character name}
-∣#\x@svar{hex scalar value}
-@svar{character name} ⟶alarm ∣backspace ∣delete
-∣escape ∣newline ∣null ∣return ∣space ∣tab
-
-@svar{string} ⟶" @svar{string element}* "
-@svar{string element} ⟶@svar{any character other than " or \\}
-∣@svar{mnemonic escape} ∣\" ∣\\ ∣\|
-∣\@svar{intraline whitespace}*@svar{line ending}
- @svar{intraline whitespace}*
-∣@svar{inline hex escape}
-@svar{bytevector} ⟶#u8(@svar{byte}*)
-@svar{byte} ⟶@svar{any exact integer between 0 and 255}
-
-@svar{number} ⟶@svar{num 2} ∣@svar{num 8}
-∣@svar{num 10} ∣@svar{num 16}
+This section describes how individual tokens (identifiers,
+numbers, etc.) are formed from sequences of characters. The following
+sections describe how expressions and programs are formed from sequences
+of tokens.
+
+@svar{Intertoken space} can occur on either side of any token, but not
+within a token.
+
+Identifiers that do not begin with a vertical line are
+terminated by a @svar{delimiter} or by the end of the input.
+So are dot, numbers, characters, and booleans.
+Identifiers that begin with a vertical line are terminated by another vertical line.
+
+The following four characters from the ASCII repertoire
+are reserved for future extensions to the
+language: @code{[ ] @{ @}}
+
+In addition to the identifier characters of the ASCII repertoire specified
+below, Scheme implementations may permit any additional repertoire of
+Unicode characters to be employed in identifiers,
+provided that each such character has a Unicode general category of Lu,
+Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pd, Pc, Po, Sc, Sm, Sk, So,
+or Co, or is U+200C or U+200D (the zero-width non-joiner and joiner,
+respectively, which are needed for correct spelling in Persian, Hindi,
+and other languages).
+However, it is an error for the first character to have a general category
+of Nd, Mc, or Me. It is also an error to use a non-Unicode character
+in symbols or identifiers.
+
+All Scheme implementations must permit the escape sequence
+@code{\x}@svar{hexdigits}@code{;}
+to appear in Scheme identifiers that are enclosed in vertical lines. If the character
+with the given Unicode scalar value is supported by the implementation,
+identifiers containing such a sequence are equivalent to identifiers
+containing the corresponding character.
+
+@c TODO: Alignment and brackets for non-variable text.
+@format
+@svar{token} @expansion{} @svar{identifier} | @svar{boolean}
+ | @svar{number} | @svar{character} | @svar{string}
+ | @code{(} | @code{)} | @code{#(} | @code{#u8} | @code{'} |
+ | @code{`} | @code{,} | @code{,@@} | @code{.}
+@svar{delimiter} @expansion{} @svar{whitespace} | @svar{vertical line}
+ | @code{(} | @code{)} | @code{"} | @code{;}
+@svar{intraline whitespace} @expansion{} @svar{space or tab}
+@svar{whitespace} @expansion{} @svar{intraline whitespace}
+ | @svar{line ending}
+@svar{vertical line} @expansion{} @code{|}
+@svar{line ending} @expansion{} @svar{newline}
+ | @svar{return} @svar{newline} | @svar{return}
+@svar{comment} @expansion{} @code{;} all subsequent characters
+ up to a line ending
+ | @svar{nested comment}
+ | @code{#;} @svar{intertoken space} @svar{datum}
+@svar{nested comment} @expansion{} @code{#|} @svar{comment text}
+ @arbno{@svar{comment cont}} @code{|#}
+@svar{comment text} @expansion{} character sequence not containing
+ @code{#|} or @code{|#}
+@svar{comment cont} @expansion{} @svar{nested comment}
+ @svar{comment text}
+@svar{directive} @expansion{} @code{#!fold-case} | @code{#!no-fold-case}
+@end format
+
+Note that it is ungrammatical to follow a @svar{directive} with anything
+but a @svar{delimiter} or the end of file.
+
+@format
+@svar{atmosphere} @expansion{} @svar{whitespace} ∣ @svar{comment}
+ ∣ @svar{directive}
+@svar{intertoken space} @expansion{} @arbno{@svar{atmosphere}}
+@end format
+
+Note that @code{+i}, @code{-i} and @svar{infnan} below are exceptions to the
+@svar{peculiar identifier} rule; they are parsed as numbers, not
+identifiers.
+
+@format
+@svar{identifier} @expansion{} @svar{initial} @arbno{@svar{subsequent}}
+ | @svar{vertical line} @arbno{@svar{symbol element}} @svar{vertical line}
+ | @svar{peculiar identifier}
+@svar{initial} @expansion{} @svar{letter} | @svar{special initial}
+@svar{letter} @expansion{} @code{a} | @code{b} | @code{c} | @dots{} | @code{z}
+ | @code{A} | @code{B} | @code{C} | @dots{} | @code{Z}
+@svar{special initial} @expansion{} @code{!} | @code{$} | @code{%}
+ | @code{&} | @code{*} | @code{/} | @code{:} | @code{<} | @code{=}
+ | @code{>} | @code{?} | @code{^} | @code{_} | @code{~}
+@svar{subsequent} @expansion{} @svar{initial} | @svar{digit}
+ | @svar{special subsequent}
+@svar{digit} @expansion{} @code{0} | @code{1} | @code{2} | @code{3}
+ | @code{4} | @code{5} | @code{6} | @code{7} | @code{8} | @code{9}
+@svar{hex digit} @expansion{} @svar{digit} | @code{a} | @code{b}
+ | @code{c} | @code{d} | @code{e} | @code{f}
+@svar{explicit sign} @expansion{} @code{+} | @code{-}
+@svar{special subsequent} @expansion{} @svar{explicit sign} | @code{.}
+ | @code{@@}
+@svar{inline hex escape} @expansion{} @code{\x}@svar{hex scalar value}@code{;}
+@svar{hex scalar value} @expansion{} @atleastone{@svar{hex digit}}
+@svar{mnemonic escape} @expansion{} @code{\a} | @code{\b} | @code{\t}
+ | @code{\n} | @code{\r}
+@svar{peculiar identifier} @expansion{} @svar{explicit sign}
+ | @svar{explicit sign} @svar{sign subsequent} @arbno{@svar{subsequent}}
+ | @svar{explicit sign} @code{.} @svar{dot subsequent}
+ @arbno{@svar{subsequent}}
+ | @code{.} @svar{dot subsequent} @arbno{@svar{subsequent}}
+@svar{dot subsequent} @expansion{} @svar{sign subsequent} | @code{.}
+@svar{sign subsequent} @expansion{} @svar{initial} | @svar{explicit sign}
+ | @code{@@}
+@svar{symbol element} @expansion{}
+ any character other than @svar{vertical line} or @code{\}
+ | @svar{inline hex escape} | @svar{mnemonic escape} | @code{\|}
+
+@svar{boolean} @expansion{} @code{#t} | @code{#f} | @code{#true}
+ | @code{#false}
+
+@svar{character} @expansion{} #\ @svar{any character}
+ | #\ @svar{character name}
+ | #\x@svar{hex scalar value}
+
+@svar{character name} @expansion{} @code{alarm} | @code{backspace}
+ | @code{delete} | @code{escape} | @code{newline} | @code{null}
+ | @code{return} | @code{space} | @code{tab}
+
+@svar{string} @expansion{} @code{"} @arbno{@svar{string element}} @code{"}
+
+@svar{string element} @expansion{}
+ any character other than @code{"} or @code{\}
+ | @svar{mnemonic escape} | @code{\"} | @code{\\}
+ | \@arbno{@svar{intraline whitespace}}@svar{line ending}
+ @arbno{@svar{intraline whitespace}}
+ | @svar{inline hex escape}
+
+@svar{bytevector} @expansion{} \#u8(@arbno{@svar{byte}})
+
+@svar{byte} @expansion{} any exact integer between 0 and 255
+@end format
+
The following rules for @svar{num R}, @svar{complex R}, @svar{real R}, @svar{ureal R}, @svar{uinteger R}, and
@svar{prefix R} are implicitly replicated for R = 2, 8, 10, and 16. There are no rules for <decimal
2>, @svar{decimal 8}, and @svar{decimal 16}, which means that numbers containing decimal points
or exponents are always in decimal radix. Although not shown below, all alphabetic
characters used in the grammar of numbers can appear in either upper or lower case.
+
@svar{num R} ⟶@svar{prefix R} @svar{complex R}
@svar{complex R} ⟶@svar{real R} ∣@svar{real R} @ @svar{real R}
∣@svar{real R} + @svar{ureal R} i ∣@svar{real R} - @svar{ureal R} i
diff --git a/doc/r7rs-small/r7rs-texinfo-macros.texinfo b/doc/r7rs-small/r7rs-texinfo-macros.texinfo
@@ -145,3 +145,15 @@ R@sup{6}RS
@macro rationale
@subheading Rationale:
@end macro
+
+@c --- EBNF macros
+
+@c Zero or more (Kleene star).
+@macro arbno {obj}
+\obj\@sup{*}
+@end macro
+
+@c One or more.
+@macro atleastone {obj}
+\obj\@sup{+}
+@end macro