commit 0518c5c57d3267ea1c9625e76e5e55be046d5ff3
parent 740a4c6af0e87ca35ccc1bcc7f063cfbc2f87333
Author: Yuval Langer <yuval.langer@gmail.com>
Date: Wed, 10 Jan 2024 05:10:32 +0200
Start work on useful error detection and error messages.
Diffstat:
2 files changed, 82 insertions(+), 33 deletions(-)
diff --git a/TODO.org b/TODO.org
@@ -1,7 +1,7 @@
Zipheir's points:
-- ALL-CAPS-NAMES are not very Schemely,
-- you can write hex literals, e.g. #xfd, #xff, etc.,
-- every 'match' expression should have a catch-all failure clause that
- raises an exception (probably an assertion-violation, in R6RS
+- [X] ALL-CAPS-NAMES are not very Schemely,
+- [ ] you can write hex literals, e.g. #xfd, #xff, etc.,
+- [ ] every 'match' expression should have a catch-all failure clause
+ that raises an exception (probably an assertion-violation, in R6RS
terms). i.e. a more *useful* exception than "no match".
diff --git a/rsv/arbitrary-null.scm b/rsv/arbitrary-null.scm
@@ -8,13 +8,13 @@
main)
(begin
- (define ROW-TERMINATOR-BYTE 253) ;; 0xFD
- (define VALUE-TERMINATOR-BYTE 255) ;; 0xFF
- (define NULL-VALUE-BYTE 254) ;; 0xFE
+ (define row-terminator-byte #xFD) ;; 253
+ (define value-terminator-byte #xFF) ;; 255
+ (define null-value-byte #xFE) ;; 254
- (define (row-terminator-byte? byte) (= ROW-TERMINATOR-BYTE byte))
- (define (value-terminator-byte? byte) (= VALUE-TERMINATOR-BYTE byte))
- (define (null-value-byte? byte) (= NULL-VALUE-BYTE byte))
+ (define (row-terminator-byte? byte) (= row-terminator-byte byte))
+ (define (value-terminator-byte? byte) (= value-terminator-byte byte))
+ (define (null-value-byte? byte) (= null-value-byte byte))
(define (null-value? null-value our-value)
(equal? null-value our-value))
@@ -22,7 +22,7 @@
(define (field->rsv field null-value? port)
(match field
((? null-value?)
- (write-u8 NULL-VALUE-BYTE
+ (write-u8 null-value-byte
port))
(field
(write-bytevector (string->utf8 field)
@@ -32,11 +32,11 @@
(let loop ((row row))
(match row
(()
- (write-u8 ROW-TERMINATOR-BYTE
+ (write-u8 row-terminator-byte
port))
((field fields ...)
(field->rsv field null-value? port)
- (write-u8 VALUE-TERMINATOR-BYTE port)
+ (write-u8 value-terminator-byte port)
(loop fields)))))
(define (scm->rsv scm null-value? port)
@@ -47,38 +47,87 @@
(row->rsv row null-value? port)
(loop rows)))))
- (define (string->scm null-value port)
+ (define (legal-utf8? byte)
+ ;; XXX: Replace with a better predicate. (Poor Bob...)
+ (number? byte))
+
+ (define (legal-rsv? byte)
+ ;; XXX: Same as for legal-utf8?
+ (number? byte))
+
+ (define (string->scm number-of-rows-read port)
+ "Read a string from PORT.
+
+Return the string just read, and the number of bytes just
+read."
(let ((output-field-port (open-output-bytevector)))
- (let loop ()
+ (let loop ((byte-offset-from-row-start 0))
(match (read-u8 port)
- ((? value-terminator-byte? byte)
- (utf8->string (get-output-bytevector output-field-port)))
- (byte
+ ((? eof-object?)
+ (error (string-append "Prematurely terminated RSV at row "
+ (number->string number-of-rows-read)
+ " at an offset of "
+ (number->string byte-offset-from-row-start)
+ " from the beginning of the last RSV row.")))
+ ((? value-terminator-byte?)
+ (values (utf8->string (get-output-bytevector output-field-port))
+ (+ byte-offset-from-row-start 1)))
+ ((? legal-utf8? byte)
(write-u8 byte
output-field-port)
- (loop))))))
+ (loop (+ byte-offset-from-row-start 1)))
+ (_
+ (error "Illegal value returned by RSV string reader:"))))))
- (define (row->scm null-value port)
- (let loop ((row '()))
+ (define (row->scm null-value number-of-rows-read port)
+ (let loop ((row '())
+ (byte-offset-from-row-start 0))
(match (peek-u8 port)
- ((? row-terminator-byte? byte) ;; End of row.
- (read-u8 port) ;; Remove row terminator.
+ ((? eof-object?)
+ (error (string-append "Prematurely terminated RSV at row "
+ (number->string number-of-rows-read)
+ " at an offset of "
+ (number->string byte-offset-from-row-start)
+ " from the beginning of the last RSV row.")))
+ ((? row-terminator-byte?) ;; End of row.
+ (read-u8 port) ;; Remove row terminator.
(reverse row))
((? null-value-byte?) ;; Field is null.
- (read-u8 port) ;; remove null byte.
- (read-u8 port) ;; remove value terminator byte.
+ (read-u8 port) ;; remove null byte.
+ (read-u8 port) ;; remove value terminator byte.
(loop (cons null-value
- row)))
- (byte ;; Field is a string.
- (loop (cons (string->scm null-value port)
- row))))))
+ row)
+ (+ byte-offset-from-row-start
+ 2)))
+ ((? legal-utf8? byte) ;; Field is a string.
+ (let-values (((field number-of-bytes-in-the-utf8-string)
+ (string->scm number-of-rows-read port)))
+ (loop (cons field
+ row)
+ (+ byte-offset-from-row-start
+ number-of-bytes-in-the-utf8-string))))
+ (illegal-value
+ (error (string-append "Illegal value returned when reading RSV row "
+ (number->string number-of-rows-read)
+ " at offset of "
+ (number->string byte-offset-from-row-start)
+ " bytes by row reader:")
+ illegal-value)))))
(define (rsv->scm null-value port)
- (let loop ((rows '()))
+ (let loop ((rows '())
+ (number-of-rows-read 0))
(match (peek-u8 port)
((? eof-object?)
(reverse rows))
- (_
- (loop (cons (row->scm null-value port)
- rows))))))))
+ ((? legal-rsv?)
+ (loop (cons (row->scm null-value number-of-rows-read port)
+ rows)
+ (+ number-of-rows-read 1)))
+
+ (illegal-value
+ (error (string-append "Illegal value returned when reading RSV row "
+ (number->string number-of-rows-read)
+ ":")
+ illegal-value)))))))