commit 2d78ec17559f5eaf2cf772ae301bd7d210c70c34
parent bd35a5c9e3e4d0666e645bd3a8026f46c9663496
Author: Yuval Langer <yuval.langer@gmail.com>
Date: Sat, 13 Jan 2024 07:38:57 +0200
Implement a stream of rows reader of RSV.
Also:
- Rename procedures to conventional names.
- Replace position record with row and column count variables, for error reporting.
- Add points to TODO.
Diffstat:
6 files changed, 134 insertions(+), 109 deletions(-)
diff --git a/README.md b/README.md
@@ -25,9 +25,11 @@ $ printf '(("moo" #f) (#f "poo"))' | ./scm2rsv | ./rsv2scm
(("moo" #f) (#f "poo"))
```
+## Repositories:
+
Main repository is at [Codeberg.org][3].
-## Mirrors:
+### Mirrors:
- [Kaka Farm's Stagit][4]
- [Github][5].
diff --git a/TODO.org b/TODO.org
@@ -1,4 +1,6 @@
- [ ] Use a single object to track RSV position.
+- [ ] I don't like the naming of the libraries. Rename things into
+ something proper.
Zipheir's points:
@@ -17,3 +19,7 @@ Zipheir's points:
That way, people can easily extract them. So (error <message>
row-num offset) You can then call error-object-irritants in R7RS to
get them out. It's not important for the binary tools, of course.
+- [ ] Oh yeah, if they're doing I/O they should definitely be called
+ read-something.
+- [ ] Actually, X->Y procedures in Scheme generally return one
+ value. I don't think there are any exceptions.
diff --git a/rsv/arbitrary-null.scm b/rsv/arbitrary-null.scm
@@ -1,12 +1,12 @@
(define-library (rsv arbitrary-null)
(import (scheme base)
(scheme file)
- (scheme write)
-
- (rsv read-position))
- (export scm->rsv
- rsv->scm
- main)
+ (scheme write))
+ (export write-rsv
+ write-rsv-row
+ read-rsv
+ read-rsv-row
+ legal-rsv?)
(begin
(define row-terminator-byte #xFD) ;; 253
@@ -17,16 +17,16 @@
(define (value-terminator-byte? byte) (= value-terminator-byte byte))
(define (null-value-byte? byte) (= null-value-byte byte))
- (define (field->rsv field null-value port)
+ (define (write-rsv-field field port)
(cond
- ((equal? null-value field)
+ ((not field)
(write-u8 null-value-byte
port))
(else
(write-bytevector (string->utf8 field)
port))))
- (define (row->rsv row null-value port)
+ (define (write-rsv-row row port)
(let loop ((row row))
(cond
((null? row)
@@ -35,11 +35,11 @@
(else
(let ((field (car row))
(rest-of-fields (cdr row)))
- (field->rsv field null-value port)
+ (write-rsv-field field port)
(write-u8 value-terminator-byte port)
(loop rest-of-fields))))))
- (define (scm->rsv scm null-value port)
+ (define (write-rsv scm port)
"Convert provided SCM, which is a list of lists of strings or null values, into RSV and write it into PORT.
The SCM, the sexp representation of RSV, is defined as:
@@ -47,14 +47,14 @@ The SCM, the sexp representation of RSV, is defined as:
RSV := ( ROW* )
ROW := ( STRING-or-NULL-VALUE* )
STRING-or-NULL-VALUE := String | Null-Value
-Null-Value := The value provided as NULL-VALUE"
+Null-Value := #f"
(let loop ((scm scm))
(cond
((null? scm) '())
(else
(let ((row (car scm))
(rest-of-rows (cdr scm)))
- (row->rsv row null-value port)
+ (write-rsv-row row port)
(loop rest-of-rows))))))
(define (legal-utf8? byte)
@@ -67,33 +67,34 @@ Null-Value := The value provided as NULL-VALUE"
(and (integer? byte)
(< -1 byte #x100)))
- (define (string->scm position port)
+ (define (read-rsv-string row-count column-count port)
"Read a string from PORT.
Return the string just read, and the number of bytes just
read."
(let ((output-field-port (open-output-bytevector)))
- (let loop ((position position))
+ (let loop ((column-count column-count))
(let ((byte (read-u8 port)))
(cond
((eof-object? byte)
(error (string-append "Prematurely terminated RSV at row "
- (number->string (position-row position))
+ (number->string row-count)
" at an offset of "
- (number->string (position-column position))
+ (number->string column-count)
" from the beginning of the last RSV row.")))
((value-terminator-byte? byte)
(values (utf8->string (get-output-bytevector output-field-port))
- (add-position-column position 1)))
+ row-count
+ column-count))
((legal-utf8? byte)
(write-u8 byte
output-field-port)
- (loop (add-position-column position 1)))
+ (loop (+ column-count 1)))
(else
(error "Illegal value returned by RSV string reader:"
byte)))))))
- (define (read-null-value position null-value port)
+ (define (read-null-value row-count column-count port)
;; `row->scm` had already `peek-u8`-ed and knows that the next
;; `read-u8` will be the Null-Value-Byte, a.k.a. #xFE, so we can
;; remove it without looking at it.
@@ -102,88 +103,94 @@ read."
(cond
((value-terminator-byte? byte)
;; Return successfully.
- (values null-value
- (add-position-column position 2)))
+ (values row-count
+ (+ column-count 2)))
((number? byte)
;; If we've read any other byte following a null byte, we
;; raise an error.
(error (string-append "Expected a Value-Terminator-Byte (#xFF) after"
" a Null Byte (#xFE) at row "
- (number->string (position-row position))
+ (number->string row-count)
" at an offset of "
- (number->string (position-column position))
+ (number->string column-count)
" from the beggining of the last RSV row, instead got: "
(number->string byte))))
(else
(error (string-append "Expected a Value-Terminator-Byte (#xFF) after"
" a Null Byte (#xFE) at row "
- (number->string (position-row position))
+ (number->string row-count)
" at an offset of "
- (number->string (position-column position))
+ (number->string column-count)
" from the beggining of the last RSV row, instead got illegal value:")
byte)))))
- (define (row->scm null-value position port)
+ (define (read-rsv-row row-count column-count port)
(let loop ((row '())
- (position position))
+ (row-count row-count)
+ (column-count column-count))
(let ((byte (peek-u8 port)))
(cond ((eof-object? byte)
(error (string-append "Prematurely terminated RSV at row "
- (number->string (position-row position))
+ (number->string row-count)
" at an offset of "
- (number->string (position-column position))
+ (number->string column-count)
" from the beginning of the last RSV row.")))
((row-terminator-byte? byte) ;; End of row.
(read-u8 port) ;; Remove row terminator.
(values (reverse row)
- (add-position-row (set-position-column position 0)
- 1)))
+ (+ row-count 1)
+ 0))
((null-value-byte? byte) ;; Field is null.
- (let-values (((new-field-value new-position)
- (read-null-value position null-value port)))
- (loop (cons new-field-value
- row)
- new-position)))
+ (let-values (((new-row-count new-column-count)
+ (read-null-value row-count column-count port)))
+ (loop (cons #f row)
+ new-row-count
+ new-column-count)))
((value-terminator-byte? byte)
(read-u8 port)
(loop (cons ""
row)
- (add-position-column position 1)))
+ row-count
+ (+ column-count 1)))
((legal-utf8? byte) ;; Field is a string.
- (let-values (((new-field-value new-position)
- (string->scm position
- port)))
+ (let-values (((new-field-value new-row-count new-column-count)
+ (read-rsv-string row-count
+ column-count
+ port)))
(loop (cons new-field-value
row)
- new-position)))
+ new-row-count
+ new-column-count)))
(else
- (error (string-append "Illegal value returned in row->scm when reading RSV row "
- (number->string (position-row position))
+ (error (string-append "Illegal value returned in read-rsv-row when reading RSV row "
+ (number->string row-count)
" at offset of "
- (number->string (position-column position))
+ (number->string column-count)
" bytes by row reader:")
byte))))))
- (define (rsv->scm null-value port)
+ (define (read-rsv port)
(let loop ((rows '())
- (position (make-position 0 0)))
+ (row-count 0)
+ (column-count 0))
(let ((byte (peek-u8 port)))
(cond
((eof-object? byte)
(reverse rows))
((legal-rsv? byte)
- (let-values (((new-row new-position)
- (row->scm null-value position port)))
+ (let-values (((new-row new-row-count new-column-count)
+ (read-rsv-row row-count column-count port)))
(loop (cons new-row rows)
- new-position)))
+ new-row-count
+ new-column-count)))
(else
(error (string-append "Illegal value returned when reading RSV row "
- (number->string (position-row position))
+ (number->string row-count)
":")
byte))))))))
diff --git a/rsv/read-position.scm b/rsv/read-position.scm
@@ -1,44 +0,0 @@
-(define-library (rsv read-position)
- (import (scheme base)
- (srfi srfi-9))
- (export make-position
- position-row
- position-column
- add-position-row
- add-position-column
- add-position-row-column
- set-position-column)
-
- (begin
- (define-record-type <position>
- (make-position row column)
- position?
- (row position-row)
- (column position-column))
-
- (define (set-position-row position n)
- (make-position (+ (position-row position)
- n)
- (position-column position)))
-
- (define (set-position-column position n)
- (make-position (position-row position)
- (+ (position-column position)
- n)))
-
- (define (add-position-row position n)
- "Add N to POSITION's row count."
- (set-position-row position
- (+ (position-row position)
- n)))
-
- (define (add-position-column position n)
- "Add N to POSITION's column count."
- (set-position-column position
- (+ (position-column position)
- n)))
-
- (define (add-position-row-column position n m)
- "Add N to POSITION's row count and M to its column count."
- (make-position (add-position-row position n)
- (add-position-column position m)))))
diff --git a/rsv/rows-streams.scm b/rsv/rows-streams.scm
@@ -0,0 +1,27 @@
+(define-library (rsv rows-streams)
+ (import (scheme base)
+
+ (srfi srfi-41)
+
+ (rsv arbitrary-null))
+ (export port->rsv-row-stream)
+
+ (begin
+ (define-stream (port->rsv-row-stream port)
+ (let loop ((row-count 0)
+ (column-count 0))
+ (let ((byte (peek-u8 port)))
+ (cond
+ ((eof-object? byte) stream-null)
+
+ ((legal-rsv? byte)
+ (let-values (((new-row new-row-count new-column-count)
+ (read-rsv-row row-count column-count port)))
+ (stream-cons new-row
+ (loop new-row-count
+ new-column-count))))
+
+ (else
+ (error "Illegal value returned when reading RSV row: (row number, illegal value)"
+ row-count
+ byte))))))))
diff --git a/tests.scm b/tests.scm
@@ -4,9 +4,13 @@
(scheme write)
(srfi srfi-1)
+ (srfi srfi-41)
(srfi srfi-64)
- (rsv arbitrary-null))
+ (statprof)
+
+ (rsv arbitrary-null)
+ (rsv rows-streams))
(begin
(define valid-filenames
@@ -46,23 +50,46 @@
(else
(error "Illegal value while reading file:" byte)))))))
- (define (run-back-and-forth-test)
- (let* ((file-contents (get-bytevector-all (current-input-port)))
- (input-bytevector-port (open-input-bytevector file-contents))
- (scm (rsv->scm #f input-bytevector-port))
- (output-bytevector-port (open-output-bytevector)))
- (scm->rsv scm #f output-bytevector-port)
- (values file-contents (get-output-bytevector output-bytevector-port))))
+ (define (run-back-and-forth-test original-rsv)
+ (statprof
+ (lambda ()
+ (let* ((input-bytevector-port (open-input-bytevector original-rsv))
+ (scm (read-rsv input-bytevector-port))
+ (output-bytevector-port (open-output-bytevector)))
+ (write-rsv scm output-bytevector-port)
+ (test-equal original-rsv
+ (get-output-bytevector output-bytevector-port))))
+ #:count-calls? #t))
+
+ (define (scm-rows-stream->rsv stream)
+ (let ((output-bytevector-port (open-output-bytevector)))
+ (let loop ((stream stream))
+ (cond
+ ((stream-null? stream)
+ (get-output-bytevector output-bytevector-port))
+ (else
+ (let* ((new-scm-row (stream-car stream)))
+ (write-rsv-row new-scm-row output-bytevector-port)
+ (loop (stream-cdr stream))))))))
+
+ (define (run-stream-back-and-forth-test original-rsv)
+ (let* ((input-bytevector-port (open-input-bytevector original-rsv))
+ (rows-stream (port->rsv-row-stream input-bytevector-port)))
+ (let ((regenerated-rsv (scm-rows-stream->rsv rows-stream)))
+ (test-equal original-rsv regenerated-rsv))))
(test-begin "RSV")
(for-each (lambda (filename)
(write filename) (newline)
- (test-group filename
- (let-values (((original-rsv regenerated-rsv)
- (with-input-from-file filename
- run-back-and-forth-test)))
- (test-equal original-rsv regenerated-rsv))))
+ (call-with-input-file filename
+ (lambda (port)
+ (test-group filename
+ (let ((original-rsv (get-bytevector-all port)))
+ (display 'run-back-and-forth-test) (newline)
+ (run-back-and-forth-test original-rsv)
+ (display 'run-stream-back-and-forth-test) (newline)
+ (run-stream-back-and-forth-test original-rsv))))))
valid-filenames)
(test-end "RSV")))