guile-rsv

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit 0518c5c57d3267ea1c9625e76e5e55be046d5ff3
parent 740a4c6af0e87ca35ccc1bcc7f063cfbc2f87333
Author: Yuval Langer <yuval.langer@gmail.com>
Date:   Wed, 10 Jan 2024 05:10:32 +0200

Start work on useful error detection and error messages.

Diffstat:
MTODO.org | 8++++----
Mrsv/arbitrary-null.scm | 107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
2 files changed, 82 insertions(+), 33 deletions(-)

diff --git a/TODO.org b/TODO.org @@ -1,7 +1,7 @@ Zipheir's points: -- ALL-CAPS-NAMES are not very Schemely, -- you can write hex literals, e.g. #xfd, #xff, etc., -- every 'match' expression should have a catch-all failure clause that - raises an exception (probably an assertion-violation, in R6RS +- [X] ALL-CAPS-NAMES are not very Schemely, +- [ ] you can write hex literals, e.g. #xfd, #xff, etc., +- [ ] every 'match' expression should have a catch-all failure clause + that raises an exception (probably an assertion-violation, in R6RS terms). i.e. a more *useful* exception than "no match". diff --git a/rsv/arbitrary-null.scm b/rsv/arbitrary-null.scm @@ -8,13 +8,13 @@ main) (begin - (define ROW-TERMINATOR-BYTE 253) ;; 0xFD - (define VALUE-TERMINATOR-BYTE 255) ;; 0xFF - (define NULL-VALUE-BYTE 254) ;; 0xFE + (define row-terminator-byte #xFD) ;; 253 + (define value-terminator-byte #xFF) ;; 255 + (define null-value-byte #xFE) ;; 254 - (define (row-terminator-byte? byte) (= ROW-TERMINATOR-BYTE byte)) - (define (value-terminator-byte? byte) (= VALUE-TERMINATOR-BYTE byte)) - (define (null-value-byte? byte) (= NULL-VALUE-BYTE byte)) + (define (row-terminator-byte? byte) (= row-terminator-byte byte)) + (define (value-terminator-byte? byte) (= value-terminator-byte byte)) + (define (null-value-byte? byte) (= null-value-byte byte)) (define (null-value? null-value our-value) (equal? null-value our-value)) @@ -22,7 +22,7 @@ (define (field->rsv field null-value? port) (match field ((? null-value?) - (write-u8 NULL-VALUE-BYTE + (write-u8 null-value-byte port)) (field (write-bytevector (string->utf8 field) @@ -32,11 +32,11 @@ (let loop ((row row)) (match row (() - (write-u8 ROW-TERMINATOR-BYTE + (write-u8 row-terminator-byte port)) ((field fields ...) (field->rsv field null-value? port) - (write-u8 VALUE-TERMINATOR-BYTE port) + (write-u8 value-terminator-byte port) (loop fields))))) (define (scm->rsv scm null-value? port) @@ -47,38 +47,87 @@ (row->rsv row null-value? port) (loop rows))))) - (define (string->scm null-value port) + (define (legal-utf8? byte) + ;; XXX: Replace with a better predicate. (Poor Bob...) + (number? byte)) + + (define (legal-rsv? byte) + ;; XXX: Same as for legal-utf8? + (number? byte)) + + (define (string->scm number-of-rows-read port) + "Read a string from PORT. + +Return the string just read, and the number of bytes just +read." (let ((output-field-port (open-output-bytevector))) - (let loop () + (let loop ((byte-offset-from-row-start 0)) (match (read-u8 port) - ((? value-terminator-byte? byte) - (utf8->string (get-output-bytevector output-field-port))) - (byte + ((? eof-object?) + (error (string-append "Prematurely terminated RSV at row " + (number->string number-of-rows-read) + " at an offset of " + (number->string byte-offset-from-row-start) + " from the beginning of the last RSV row."))) + ((? value-terminator-byte?) + (values (utf8->string (get-output-bytevector output-field-port)) + (+ byte-offset-from-row-start 1))) + ((? legal-utf8? byte) (write-u8 byte output-field-port) - (loop)))))) + (loop (+ byte-offset-from-row-start 1))) + (_ + (error "Illegal value returned by RSV string reader:")))))) - (define (row->scm null-value port) - (let loop ((row '())) + (define (row->scm null-value number-of-rows-read port) + (let loop ((row '()) + (byte-offset-from-row-start 0)) (match (peek-u8 port) - ((? row-terminator-byte? byte) ;; End of row. - (read-u8 port) ;; Remove row terminator. + ((? eof-object?) + (error (string-append "Prematurely terminated RSV at row " + (number->string number-of-rows-read) + " at an offset of " + (number->string byte-offset-from-row-start) + " from the beginning of the last RSV row."))) + ((? row-terminator-byte?) ;; End of row. + (read-u8 port) ;; Remove row terminator. (reverse row)) ((? null-value-byte?) ;; Field is null. - (read-u8 port) ;; remove null byte. - (read-u8 port) ;; remove value terminator byte. + (read-u8 port) ;; remove null byte. + (read-u8 port) ;; remove value terminator byte. (loop (cons null-value - row))) - (byte ;; Field is a string. - (loop (cons (string->scm null-value port) - row)))))) + row) + (+ byte-offset-from-row-start + 2))) + ((? legal-utf8? byte) ;; Field is a string. + (let-values (((field number-of-bytes-in-the-utf8-string) + (string->scm number-of-rows-read port))) + (loop (cons field + row) + (+ byte-offset-from-row-start + number-of-bytes-in-the-utf8-string)))) + (illegal-value + (error (string-append "Illegal value returned when reading RSV row " + (number->string number-of-rows-read) + " at offset of " + (number->string byte-offset-from-row-start) + " bytes by row reader:") + illegal-value))))) (define (rsv->scm null-value port) - (let loop ((rows '())) + (let loop ((rows '()) + (number-of-rows-read 0)) (match (peek-u8 port) ((? eof-object?) (reverse rows)) - (_ - (loop (cons (row->scm null-value port) - rows)))))))) + ((? legal-rsv?) + (loop (cons (row->scm null-value number-of-rows-read port) + rows) + (+ number-of-rows-read 1))) + + (illegal-value + (error (string-append "Illegal value returned when reading RSV row " + (number->string number-of-rows-read) + ":") + illegal-value)))))))