guile-rsv

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit 2d78ec17559f5eaf2cf772ae301bd7d210c70c34
parent bd35a5c9e3e4d0666e645bd3a8026f46c9663496
Author: Yuval Langer <yuval.langer@gmail.com>
Date:   Sat, 13 Jan 2024 07:38:57 +0200

Implement a stream of rows reader of RSV.

Also:
- Rename procedures to conventional names.
- Replace position record with row and column count variables, for error reporting.
- Add points to TODO.

Diffstat:
MREADME.md | 4+++-
MTODO.org | 6++++++
Mrsv/arbitrary-null.scm | 109++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Drsv/read-position.scm | 44--------------------------------------------
Arsv/rows-streams.scm | 27+++++++++++++++++++++++++++
Mtests.scm | 53++++++++++++++++++++++++++++++++++++++++-------------
6 files changed, 134 insertions(+), 109 deletions(-)

diff --git a/README.md b/README.md @@ -25,9 +25,11 @@ $ printf '(("moo" #f) (#f "poo"))' | ./scm2rsv | ./rsv2scm (("moo" #f) (#f "poo")) ``` +## Repositories: + Main repository is at [Codeberg.org][3]. -## Mirrors: +### Mirrors: - [Kaka Farm's Stagit][4] - [Github][5]. diff --git a/TODO.org b/TODO.org @@ -1,4 +1,6 @@ - [ ] Use a single object to track RSV position. +- [ ] I don't like the naming of the libraries. Rename things into + something proper. Zipheir's points: @@ -17,3 +19,7 @@ Zipheir's points: That way, people can easily extract them. So (error <message> row-num offset) You can then call error-object-irritants in R7RS to get them out. It's not important for the binary tools, of course. +- [ ] Oh yeah, if they're doing I/O they should definitely be called + read-something. +- [ ] Actually, X->Y procedures in Scheme generally return one + value. I don't think there are any exceptions. diff --git a/rsv/arbitrary-null.scm b/rsv/arbitrary-null.scm @@ -1,12 +1,12 @@ (define-library (rsv arbitrary-null) (import (scheme base) (scheme file) - (scheme write) - - (rsv read-position)) - (export scm->rsv - rsv->scm - main) + (scheme write)) + (export write-rsv + write-rsv-row + read-rsv + read-rsv-row + legal-rsv?) (begin (define row-terminator-byte #xFD) ;; 253 @@ -17,16 +17,16 @@ (define (value-terminator-byte? byte) (= value-terminator-byte byte)) (define (null-value-byte? byte) (= null-value-byte byte)) - (define (field->rsv field null-value port) + (define (write-rsv-field field port) (cond - ((equal? null-value field) + ((not field) (write-u8 null-value-byte port)) (else (write-bytevector (string->utf8 field) port)))) - (define (row->rsv row null-value port) + (define (write-rsv-row row port) (let loop ((row row)) (cond ((null? row) @@ -35,11 +35,11 @@ (else (let ((field (car row)) (rest-of-fields (cdr row))) - (field->rsv field null-value port) + (write-rsv-field field port) (write-u8 value-terminator-byte port) (loop rest-of-fields)))))) - (define (scm->rsv scm null-value port) + (define (write-rsv scm port) "Convert provided SCM, which is a list of lists of strings or null values, into RSV and write it into PORT. The SCM, the sexp representation of RSV, is defined as: @@ -47,14 +47,14 @@ The SCM, the sexp representation of RSV, is defined as: RSV := ( ROW* ) ROW := ( STRING-or-NULL-VALUE* ) STRING-or-NULL-VALUE := String | Null-Value -Null-Value := The value provided as NULL-VALUE" +Null-Value := #f" (let loop ((scm scm)) (cond ((null? scm) '()) (else (let ((row (car scm)) (rest-of-rows (cdr scm))) - (row->rsv row null-value port) + (write-rsv-row row port) (loop rest-of-rows)))))) (define (legal-utf8? byte) @@ -67,33 +67,34 @@ Null-Value := The value provided as NULL-VALUE" (and (integer? byte) (< -1 byte #x100))) - (define (string->scm position port) + (define (read-rsv-string row-count column-count port) "Read a string from PORT. Return the string just read, and the number of bytes just read." (let ((output-field-port (open-output-bytevector))) - (let loop ((position position)) + (let loop ((column-count column-count)) (let ((byte (read-u8 port))) (cond ((eof-object? byte) (error (string-append "Prematurely terminated RSV at row " - (number->string (position-row position)) + (number->string row-count) " at an offset of " - (number->string (position-column position)) + (number->string column-count) " from the beginning of the last RSV row."))) ((value-terminator-byte? byte) (values (utf8->string (get-output-bytevector output-field-port)) - (add-position-column position 1))) + row-count + column-count)) ((legal-utf8? byte) (write-u8 byte output-field-port) - (loop (add-position-column position 1))) + (loop (+ column-count 1))) (else (error "Illegal value returned by RSV string reader:" byte))))))) - (define (read-null-value position null-value port) + (define (read-null-value row-count column-count port) ;; `row->scm` had already `peek-u8`-ed and knows that the next ;; `read-u8` will be the Null-Value-Byte, a.k.a. #xFE, so we can ;; remove it without looking at it. @@ -102,88 +103,94 @@ read." (cond ((value-terminator-byte? byte) ;; Return successfully. - (values null-value - (add-position-column position 2))) + (values row-count + (+ column-count 2))) ((number? byte) ;; If we've read any other byte following a null byte, we ;; raise an error. (error (string-append "Expected a Value-Terminator-Byte (#xFF) after" " a Null Byte (#xFE) at row " - (number->string (position-row position)) + (number->string row-count) " at an offset of " - (number->string (position-column position)) + (number->string column-count) " from the beggining of the last RSV row, instead got: " (number->string byte)))) (else (error (string-append "Expected a Value-Terminator-Byte (#xFF) after" " a Null Byte (#xFE) at row " - (number->string (position-row position)) + (number->string row-count) " at an offset of " - (number->string (position-column position)) + (number->string column-count) " from the beggining of the last RSV row, instead got illegal value:") byte))))) - (define (row->scm null-value position port) + (define (read-rsv-row row-count column-count port) (let loop ((row '()) - (position position)) + (row-count row-count) + (column-count column-count)) (let ((byte (peek-u8 port))) (cond ((eof-object? byte) (error (string-append "Prematurely terminated RSV at row " - (number->string (position-row position)) + (number->string row-count) " at an offset of " - (number->string (position-column position)) + (number->string column-count) " from the beginning of the last RSV row."))) ((row-terminator-byte? byte) ;; End of row. (read-u8 port) ;; Remove row terminator. (values (reverse row) - (add-position-row (set-position-column position 0) - 1))) + (+ row-count 1) + 0)) ((null-value-byte? byte) ;; Field is null. - (let-values (((new-field-value new-position) - (read-null-value position null-value port))) - (loop (cons new-field-value - row) - new-position))) + (let-values (((new-row-count new-column-count) + (read-null-value row-count column-count port))) + (loop (cons #f row) + new-row-count + new-column-count))) ((value-terminator-byte? byte) (read-u8 port) (loop (cons "" row) - (add-position-column position 1))) + row-count + (+ column-count 1))) ((legal-utf8? byte) ;; Field is a string. - (let-values (((new-field-value new-position) - (string->scm position - port))) + (let-values (((new-field-value new-row-count new-column-count) + (read-rsv-string row-count + column-count + port))) (loop (cons new-field-value row) - new-position))) + new-row-count + new-column-count))) (else - (error (string-append "Illegal value returned in row->scm when reading RSV row " - (number->string (position-row position)) + (error (string-append "Illegal value returned in read-rsv-row when reading RSV row " + (number->string row-count) " at offset of " - (number->string (position-column position)) + (number->string column-count) " bytes by row reader:") byte)))))) - (define (rsv->scm null-value port) + (define (read-rsv port) (let loop ((rows '()) - (position (make-position 0 0))) + (row-count 0) + (column-count 0)) (let ((byte (peek-u8 port))) (cond ((eof-object? byte) (reverse rows)) ((legal-rsv? byte) - (let-values (((new-row new-position) - (row->scm null-value position port))) + (let-values (((new-row new-row-count new-column-count) + (read-rsv-row row-count column-count port))) (loop (cons new-row rows) - new-position))) + new-row-count + new-column-count))) (else (error (string-append "Illegal value returned when reading RSV row " - (number->string (position-row position)) + (number->string row-count) ":") byte)))))))) diff --git a/rsv/read-position.scm b/rsv/read-position.scm @@ -1,44 +0,0 @@ -(define-library (rsv read-position) - (import (scheme base) - (srfi srfi-9)) - (export make-position - position-row - position-column - add-position-row - add-position-column - add-position-row-column - set-position-column) - - (begin - (define-record-type <position> - (make-position row column) - position? - (row position-row) - (column position-column)) - - (define (set-position-row position n) - (make-position (+ (position-row position) - n) - (position-column position))) - - (define (set-position-column position n) - (make-position (position-row position) - (+ (position-column position) - n))) - - (define (add-position-row position n) - "Add N to POSITION's row count." - (set-position-row position - (+ (position-row position) - n))) - - (define (add-position-column position n) - "Add N to POSITION's column count." - (set-position-column position - (+ (position-column position) - n))) - - (define (add-position-row-column position n m) - "Add N to POSITION's row count and M to its column count." - (make-position (add-position-row position n) - (add-position-column position m))))) diff --git a/rsv/rows-streams.scm b/rsv/rows-streams.scm @@ -0,0 +1,27 @@ +(define-library (rsv rows-streams) + (import (scheme base) + + (srfi srfi-41) + + (rsv arbitrary-null)) + (export port->rsv-row-stream) + + (begin + (define-stream (port->rsv-row-stream port) + (let loop ((row-count 0) + (column-count 0)) + (let ((byte (peek-u8 port))) + (cond + ((eof-object? byte) stream-null) + + ((legal-rsv? byte) + (let-values (((new-row new-row-count new-column-count) + (read-rsv-row row-count column-count port))) + (stream-cons new-row + (loop new-row-count + new-column-count)))) + + (else + (error "Illegal value returned when reading RSV row: (row number, illegal value)" + row-count + byte)))))))) diff --git a/tests.scm b/tests.scm @@ -4,9 +4,13 @@ (scheme write) (srfi srfi-1) + (srfi srfi-41) (srfi srfi-64) - (rsv arbitrary-null)) + (statprof) + + (rsv arbitrary-null) + (rsv rows-streams)) (begin (define valid-filenames @@ -46,23 +50,46 @@ (else (error "Illegal value while reading file:" byte))))))) - (define (run-back-and-forth-test) - (let* ((file-contents (get-bytevector-all (current-input-port))) - (input-bytevector-port (open-input-bytevector file-contents)) - (scm (rsv->scm #f input-bytevector-port)) - (output-bytevector-port (open-output-bytevector))) - (scm->rsv scm #f output-bytevector-port) - (values file-contents (get-output-bytevector output-bytevector-port)))) + (define (run-back-and-forth-test original-rsv) + (statprof + (lambda () + (let* ((input-bytevector-port (open-input-bytevector original-rsv)) + (scm (read-rsv input-bytevector-port)) + (output-bytevector-port (open-output-bytevector))) + (write-rsv scm output-bytevector-port) + (test-equal original-rsv + (get-output-bytevector output-bytevector-port)))) + #:count-calls? #t)) + + (define (scm-rows-stream->rsv stream) + (let ((output-bytevector-port (open-output-bytevector))) + (let loop ((stream stream)) + (cond + ((stream-null? stream) + (get-output-bytevector output-bytevector-port)) + (else + (let* ((new-scm-row (stream-car stream))) + (write-rsv-row new-scm-row output-bytevector-port) + (loop (stream-cdr stream)))))))) + + (define (run-stream-back-and-forth-test original-rsv) + (let* ((input-bytevector-port (open-input-bytevector original-rsv)) + (rows-stream (port->rsv-row-stream input-bytevector-port))) + (let ((regenerated-rsv (scm-rows-stream->rsv rows-stream))) + (test-equal original-rsv regenerated-rsv)))) (test-begin "RSV") (for-each (lambda (filename) (write filename) (newline) - (test-group filename - (let-values (((original-rsv regenerated-rsv) - (with-input-from-file filename - run-back-and-forth-test))) - (test-equal original-rsv regenerated-rsv)))) + (call-with-input-file filename + (lambda (port) + (test-group filename + (let ((original-rsv (get-bytevector-all port))) + (display 'run-back-and-forth-test) (newline) + (run-back-and-forth-test original-rsv) + (display 'run-stream-back-and-forth-test) (newline) + (run-stream-back-and-forth-test original-rsv)))))) valid-filenames) (test-end "RSV")))