guile-rsv

R7RS Scheme library for reading and writing RSV (Rows of String Values) data format. Specified in https://github.com/Stenway/RSV-Specification and demonstrated in https://www.youtube.com/watch?v=tb_70o6ohMA
git clone https://kaka.farm/~stagit/guile-rsv.git
Log | Files | Refs | README | LICENSE

commit 2107f768bfd55c2423a8ec06453ed6815f6dfb5a
parent 68837a803097567f1739c045745aeb08c93ae0dd
Author: Yuval Langer <yuval.langer@gmail.com>
Date:   Mon, 22 Jan 2024 08:36:06 +0200

Move all the code into the (rsv internal) library, import it into (rsv) and (rsv rows-streams), and export only public stuff from (rsv) and (rsv rows-streams).  Also adapt tests.scm for this new library structure and move (rsv rows-streams) specific tests into tests-rows-streams.scm.

Diffstat:
Mrsv.scm | 235++-----------------------------------------------------------------------------
Arsv/internal.scm | 264+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mrsv/rows-streams.scm | 4+++-
Atests-rows-streams.scm | 123+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtests.scm | 25++-----------------------
5 files changed, 397 insertions(+), 254 deletions(-)

diff --git a/rsv.scm b/rsv.scm @@ -17,236 +17,11 @@ (define-library (rsv) (import (scheme base) (scheme file) - (scheme write)) - (export legal-rsv? - read-rsv - read-rsv-row + (scheme write) + + (rsv internal)) + (export read-rsv rsv-bytevector->scm scm->rsv-bytevector write-rsv - write-rsv-row) - - (begin - (define row-terminator-byte #xFD) ;; 253 - (define value-terminator-byte #xFF) ;; 255 - (define null-value-byte #xFE) ;; 254 - - (define (row-terminator-byte? byte) (= row-terminator-byte byte)) - (define (value-terminator-byte? byte) (= value-terminator-byte byte)) - (define (null-value-byte? byte) (= null-value-byte byte)) - - (define (write-rsv-field field port) - (cond - ((not field) - (write-u8 null-value-byte - port)) - (else - (write-bytevector (string->utf8 field) - port)))) - - (define (write-rsv-row row port) - (unless (binary-port? port) - (error "Provided port should be a binary port." - port)) - - (let loop ((row row)) - (cond - ((null? row) - (write-u8 row-terminator-byte - port)) - (else - (let ((field (car row)) - (rest-of-fields (cdr row))) - (write-rsv-field field port) - (write-u8 value-terminator-byte port) - (loop rest-of-fields)))))) - - (define (write-rsv scm port) - "Convert provided SCM, which is a list of lists of strings or null values, into RSV and write it into PORT. - -The SCM, the sexp representation of RSV, is defined as: - -RSV := ( ROW* ) -ROW := ( STRING-or-NULL-VALUE* ) -STRING-or-NULL-VALUE := String | Null-Value -Null-Value := #f" - (unless (binary-port? port) - (error "Expected binary port." port)) - - (let loop ((scm scm)) - (cond - ((null? scm) '()) - (else - (let ((row (car scm)) - (rest-of-rows (cdr scm))) - (write-rsv-row row port) - (loop rest-of-rows)))))) - - (define (scm->rsv-bytevector scm) - (let ((output-bytevector-port (open-output-bytevector))) - (write-rsv scm output-bytevector-port) - (get-output-bytevector output-bytevector-port))) - - (define (legal-utf8? byte) - ;; XXX: Replace with a better predicate. - (and (integer? byte) - (< -1 byte #xF8))) - - (define (legal-rsv? byte) - ;; XXX: Same as for legal-utf8? - (and (integer? byte) - (< -1 byte #x100))) - - (define (read-rsv-string row-count column-count port) - "Read a string from PORT. - -Return the string just read, and the number of bytes just -read." - (unless (binary-port? port) - (error "Expected binary port." port)) - - (let ((output-field-port (open-output-bytevector))) - (let loop ((column-count column-count)) - (let ((byte (read-u8 port))) - (cond - ((eof-object? byte) - (error "Prematurely terminated RSV at: (row, column)" - row-count - column-count)) - ((not (legal-rsv? byte)) - (error "Illegal value returned by RSV string reader: (illegal value, row, column)" - byte - row-count - column-count)) - ((value-terminator-byte? byte) - (values (utf8->string (get-output-bytevector output-field-port)) - row-count - column-count)) - ((legal-utf8? byte) - (write-u8 byte - output-field-port) - (loop (+ column-count 1)))))))) - - (define (read-null-value row-count column-count port) - ;; `row->scm` had already `peek-u8`-ed and knows that the next - ;; `read-u8` will be the Null-Value-Byte, a.k.a. #xFE, so we can - ;; remove it without looking at it. - (unless (binary-port? port) - (error "Expected binary port." port)) - - ;; Remove Null-Value-Byte. XXX: Already checked in the - ;; procedure calling read-null-value, so it should be okay not - ;; to check it, no? Can a time-of-check-time-of-use result when not checking? - (let ((byte (read-u8 port))) - (cond - ((eof-object? byte) - (error "Prematurely terminated RSV: (row, column)" - row-count - column-count)) - ((not (legal-rsv? byte)) - (error "Illegal value returned by RSV string reader: (illegal value, row, column)" - byte - row-count - column-count)) - ((not (null-value-byte? byte)) - (error "Expected a Null Byte (#xFE), instead got: (unexpected byte, row, column)" - byte - row-count - column-count)))) - - (let ((byte (read-u8 port))) ;; Remove value terminator byte. - (cond - ((eof-object? byte) - (error "Prematurely terminated RSV: (row, column)" - row-count - column-count)) - ((not (legal-rsv? byte)) - (error "Expected a Value-Terminator-Byte (#xFF) after a Null Byte (#xFE), instead got illegal value: (illegal value, row, column)" - byte - row-count - column-count)) - ((value-terminator-byte? byte) - ;; Return successfully. - (values row-count - (+ column-count 2))) - (else - (error "Expected a Value-Terminator-Byte (#xFF) after a Null Byte (#xFE), instead got illegal value: (illegal value, row, column)" - byte - row-count - column-count))))) - - (define (read-rsv-row row-count column-count port) - (unless (binary-port? port) - (error "Provided port should be a binary port." - port)) - (let loop ((row '()) - (row-count row-count) - (column-count column-count)) - (let ((byte (peek-u8 port))) - (cond ((eof-object? byte) - (error "Prematurely terminated RSV: (row, column)" - row-count - column-count)) - ((row-terminator-byte? byte) ;; End of row. - (read-u8 port) ;; Remove row terminator. - (values (reverse row) - (+ row-count 1) - 0)) - - ((null-value-byte? byte) ;; Field is null. - (let-values (((new-row-count new-column-count) - (read-null-value row-count column-count port))) - (loop (cons #f row) - new-row-count - new-column-count))) - - ((value-terminator-byte? byte) - (read-u8 port) - (loop (cons "" - row) - row-count - (+ column-count 1))) - - ((legal-utf8? byte) ;; Field is a string. - (let-values (((new-field-value new-row-count new-column-count) - (read-rsv-string row-count - column-count - port))) - (loop (cons new-field-value - row) - new-row-count - new-column-count))) - - (else - (error "Illegal value returned in read-rsv-row: (illegal value, row, column)" - byte - row-count - column-count)))))) - - (define (read-rsv port) - (unless (binary-port? port) - (error "Provided port should be a binary port." - port)) - (let loop ((rows '()) - (row-count 0) - (column-count 0)) - (let ((byte (peek-u8 port))) - (cond - ((eof-object? byte) - (reverse rows)) - - ((legal-rsv? byte) - (let-values (((new-row new-row-count new-column-count) - (read-rsv-row row-count column-count port))) - (loop (cons new-row rows) - new-row-count - new-column-count))) - - (else - (error "Illegal value returned when reading RSV row: (illegal value, row)" - byte - row-count)))))) - - (define (rsv-bytevector->scm bv) - (let ((bytevector-input-port (open-input-bytevector bv))) - (read-rsv bytevector-input-port))))) + port->rsv-row-stream)) diff --git a/rsv/internal.scm b/rsv/internal.scm @@ -0,0 +1,264 @@ +;;; Scheme implementation of RSV - Rows of String Values. +;;; Copyright (C) 2024 Yuval Langer. +;;; +;;; This program is free software: you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +(define-library (rsv internal) + (import (scheme base) + (scheme file) + (scheme write) + + (srfi srfi-41)) + (export legal-rsv? + legal-utf8? + null-value-byte + null-value-byte? + read-null-value + read-rsv + read-rsv-row + read-rsv-string + row-terminator-byte + row-terminator-byte? + rsv-bytevector->scm + scm->rsv-bytevector + value-terminator-byte + value-terminator-byte? + write-rsv + write-rsv-field + write-rsv-row) + + (begin + (define row-terminator-byte #xFD) ;; 253 + (define value-terminator-byte #xFF) ;; 255 + (define null-value-byte #xFE) ;; 254 + + (define (row-terminator-byte? byte) (= row-terminator-byte byte)) + (define (value-terminator-byte? byte) (= value-terminator-byte byte)) + (define (null-value-byte? byte) (= null-value-byte byte)) + + (define (write-rsv-field field port) + (cond + ((not field) + (write-u8 null-value-byte + port)) + (else + (write-bytevector (string->utf8 field) + port)))) + + (define (write-rsv-row row port) + (unless (binary-port? port) + (error "Provided port should be a binary port." + port)) + + (let loop ((row row)) + (cond + ((null? row) + (write-u8 row-terminator-byte + port)) + (else + (let ((field (car row)) + (rest-of-fields (cdr row))) + (write-rsv-field field port) + (write-u8 value-terminator-byte port) + (loop rest-of-fields)))))) + + (define (write-rsv scm port) + "Convert provided SCM, which is a list of lists of strings or null values, into RSV and write it into PORT. + +The SCM, the sexp representation of RSV, is defined as: + +RSV := ( ROW* ) +ROW := ( STRING-or-NULL-VALUE* ) +STRING-or-NULL-VALUE := String | Null-Value +Null-Value := #f" + (unless (binary-port? port) + (error "Expected binary port." port)) + + (let loop ((scm scm)) + (cond + ((null? scm) '()) + (else + (let ((row (car scm)) + (rest-of-rows (cdr scm))) + (write-rsv-row row port) + (loop rest-of-rows)))))) + + (define (scm->rsv-bytevector scm) + (let ((output-bytevector-port (open-output-bytevector))) + (write-rsv scm output-bytevector-port) + (get-output-bytevector output-bytevector-port))) + + (define (legal-utf8? byte) + ;; XXX: Replace with a better predicate. + (and (integer? byte) + (< -1 byte #xF8))) + + (define (legal-rsv? byte) + ;; XXX: Same as for legal-utf8? + (and (integer? byte) + (< -1 byte #x100))) + + (define (read-rsv-string row-count column-count port) + "Read a string from PORT. + +Return the string just read, and the number of bytes just +read." + (unless (binary-port? port) + (error "Expected binary port." port)) + + (let ((output-field-port (open-output-bytevector))) + (let loop ((column-count column-count)) + (let ((byte (read-u8 port))) + (cond + ((eof-object? byte) + (error "Prematurely terminated RSV at: (row, column)" + row-count + column-count)) + ((not (legal-rsv? byte)) + (error "Illegal value returned by RSV string reader: (illegal value, row, column)" + byte + row-count + column-count)) + ((value-terminator-byte? byte) + (values (utf8->string (get-output-bytevector output-field-port)) + row-count + column-count)) + ((legal-utf8? byte) + (write-u8 byte + output-field-port) + (loop (+ column-count 1)))))))) + + (define (read-null-value row-count column-count port) + ;; `row->scm` had already `peek-u8`-ed and knows that the next + ;; `read-u8` will be the Null-Value-Byte, a.k.a. #xFE, so we can + ;; remove it without looking at it. + (unless (binary-port? port) + (error "Expected binary port." port)) + + ;; Remove Null-Value-Byte. XXX: Already checked in the + ;; procedure calling read-null-value, so it should be okay not + ;; to check it, no? Can a time-of-check-time-of-use result when not checking? + (let ((byte (read-u8 port))) + (cond + ((eof-object? byte) + (error "Prematurely terminated RSV: (row, column)" + row-count + column-count)) + ((not (legal-rsv? byte)) + (error "Illegal value returned by RSV string reader: (illegal value, row, column)" + byte + row-count + column-count)) + ((not (null-value-byte? byte)) + (error "Expected a Null Byte (#xFE), instead got: (unexpected byte, row, column)" + byte + row-count + column-count)))) + + (let ((byte (read-u8 port))) ;; Remove value terminator byte. + (cond + ((eof-object? byte) + (error "Prematurely terminated RSV: (row, column)" + row-count + column-count)) + ((not (legal-rsv? byte)) + (error "Expected a Value-Terminator-Byte (#xFF) after a Null Byte (#xFE), instead got illegal value: (illegal value, row, column)" + byte + row-count + column-count)) + ((value-terminator-byte? byte) + ;; Return successfully. + (values row-count + (+ column-count 2))) + (else + (error "Expected a Value-Terminator-Byte (#xFF) after a Null Byte (#xFE), instead got illegal value: (illegal value, row, column)" + byte + row-count + column-count))))) + + (define (read-rsv-row row-count column-count port) + (unless (binary-port? port) + (error "Provided port should be a binary port." + port)) + (let loop ((row '()) + (row-count row-count) + (column-count column-count)) + (let ((byte (peek-u8 port))) + (cond ((eof-object? byte) + (error "Prematurely terminated RSV: (row, column)" + row-count + column-count)) + ((row-terminator-byte? byte) ;; End of row. + (read-u8 port) ;; Remove row terminator. + (values (reverse row) + (+ row-count 1) + 0)) + + ((null-value-byte? byte) ;; Field is null. + (let-values (((new-row-count new-column-count) + (read-null-value row-count column-count port))) + (loop (cons #f row) + new-row-count + new-column-count))) + + ((value-terminator-byte? byte) + (read-u8 port) + (loop (cons "" + row) + row-count + (+ column-count 1))) + + ((legal-utf8? byte) ;; Field is a string. + (let-values (((new-field-value new-row-count new-column-count) + (read-rsv-string row-count + column-count + port))) + (loop (cons new-field-value + row) + new-row-count + new-column-count))) + + (else + (error "Illegal value returned in read-rsv-row: (illegal value, row, column)" + byte + row-count + column-count)))))) + + (define (read-rsv port) + (unless (binary-port? port) + (error "Provided port should be a binary port." + port)) + (let loop ((rows '()) + (row-count 0) + (column-count 0)) + (let ((byte (peek-u8 port))) + (cond + ((eof-object? byte) + (reverse rows)) + + ((legal-rsv? byte) + (let-values (((new-row new-row-count new-column-count) + (read-rsv-row row-count column-count port))) + (loop (cons new-row rows) + new-row-count + new-column-count))) + + (else + (error "Illegal value returned when reading RSV row: (illegal value, row)" + byte + row-count)))))) + + (define (rsv-bytevector->scm bv) + (let ((bytevector-input-port (open-input-bytevector bv))) + (read-rsv bytevector-input-port))))) diff --git a/rsv/rows-streams.scm b/rsv/rows-streams.scm @@ -16,10 +16,12 @@ (define-library (rsv rows-streams) (import (scheme base) + (scheme file) + (scheme write) (srfi srfi-41) - (rsv)) + (rsv internal)) (export port->rsv-row-stream) (begin diff --git a/tests-rows-streams.scm b/tests-rows-streams.scm @@ -0,0 +1,123 @@ +;;; Scheme implementation of RSV - Rows of String Values. +;;; Copyright (C) 2024 Yuval Langer. +;;; +;;; This program is free software: you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +(define-library (rsv tests rows-streams) + (import (scheme base) + (scheme file) + (scheme write) + + (srfi srfi-1) + (srfi srfi-41) + (srfi srfi-64) + + (statprof) + + (rsv) + (rsv rows-streams)) + + (begin + (define valid-filenames + (append (map (lambda (n) + (string-append "TestFiles/Valid_00" + (number->string n) + ".rsv")) + (iota 9 1)) + (map (lambda (n) + (string-append "TestFiles/Valid_0" + (number->string n) + ".rsv")) + (iota 70 10)))) + + (define invalid-filenames + (append (map (lambda (n) + (string-append "TestFiles/Invalid_00" + (number->string n) + ".rsv")) + (iota 9 1)) + (map (lambda (n) + (string-append "TestFiles/Invalid_0" + (number->string n) + ".rsv")) + (iota 20 10)))) + + (define (get-bytevector-all port) + (let ((output-bytevector-port (open-output-bytevector))) + (let loop () + (let ((byte (read-u8 port))) + (cond + ((eof-object? byte) + (get-output-bytevector output-bytevector-port)) + ((number? byte) + (write-u8 byte output-bytevector-port) + (loop)) + (else + (error "Illegal value while reading file:" byte))))))) + + (define (scm-rows-stream->rsv stream) + (let ((output-bytevector-port (open-output-bytevector))) + (let loop ((stream stream)) + (cond + ((stream-null? stream) + (get-output-bytevector output-bytevector-port)) + (else + (let* ((new-scm-row (stream-car stream)) + (new-scm-row-pretending-to-be-a-table (list new-scm-row))) + (write-rsv new-scm-row-pretending-to-be-a-table + output-bytevector-port) + (loop (stream-cdr stream)))))))) + + (define (back-and-forth original-rsv) + (let* ((input-bytevector-port (open-input-bytevector original-rsv)) + (rows-stream (port->rsv-row-stream input-bytevector-port))) + (let ((regenerated-rsv (scm-rows-stream->rsv rows-stream))) + regenerated-rsv))) + + (define (run-stream-back-and-forth-failing-test original-rsv) + (statprof + (lambda () + (test-error (cons (back-and-forth original-rsv) + original-rsv))) + #:count-calls? #t)) + + (define (run-stream-back-and-forth-test original-rsv) + (statprof + (lambda () + (test-equal original-rsv (back-and-forth original-rsv))) + #:count-calls? #t)) + + (test-begin "rsv-rows-streams-tests") + + (for-each (lambda (filename) + (write filename) (newline) + (call-with-input-file filename + (lambda (port) + (test-group filename + (let ((original-rsv (get-bytevector-all port))) + (display 'run-stream-back-and-forth-failing-test) (newline) + (run-stream-back-and-forth-failing-test original-rsv)))))) + invalid-filenames) + + (for-each (lambda (filename) + (write filename) (newline) + (call-with-input-file filename + (lambda (port) + (test-group filename + (let ((original-rsv (get-bytevector-all port))) + (display 'run-stream-back-and-forth-test) (newline) + (run-stream-back-and-forth-test original-rsv)))))) + valid-filenames) + + (test-end "rsv-rows-streams-tests"))) diff --git a/tests.scm b/tests.scm @@ -20,13 +20,11 @@ (scheme write) (srfi srfi-1) - (srfi srfi-41) (srfi srfi-64) (statprof) - (rsv) - (rsv rows-streams)) + (rsv)) (begin (define valid-filenames @@ -95,23 +93,6 @@ (scm->rsv-bytevector (rsv-bytevector->scm original-rsv)))) #:count-calls? #t)) - (define (scm-rows-stream->rsv stream) - (let ((output-bytevector-port (open-output-bytevector))) - (let loop ((stream stream)) - (cond - ((stream-null? stream) - (get-output-bytevector output-bytevector-port)) - (else - (let* ((new-scm-row (stream-car stream))) - (write-rsv-row new-scm-row output-bytevector-port) - (loop (stream-cdr stream)))))))) - - (define (run-stream-back-and-forth-test original-rsv) - (let* ((input-bytevector-port (open-input-bytevector original-rsv)) - (rows-stream (port->rsv-row-stream input-bytevector-port))) - (let ((regenerated-rsv (scm-rows-stream->rsv rows-stream))) - (test-equal original-rsv regenerated-rsv)))) - (test-begin "RSV") (for-each (lambda (filename) @@ -131,9 +112,7 @@ (test-group filename (let ((original-rsv (get-bytevector-all port))) (display 'run-back-and-forth-test) (newline) - (run-back-and-forth-test original-rsv) - (display 'run-stream-back-and-forth-test) (newline) - (run-stream-back-and-forth-test original-rsv)))))) + (run-back-and-forth-test original-rsv)))))) valid-filenames) (test-end "RSV")))