commit 2107f768bfd55c2423a8ec06453ed6815f6dfb5a
parent 68837a803097567f1739c045745aeb08c93ae0dd
Author: Yuval Langer <yuval.langer@gmail.com>
Date: Mon, 22 Jan 2024 08:36:06 +0200
Move all the code into the (rsv internal) library, import it into (rsv) and (rsv rows-streams), and export only public stuff from (rsv) and (rsv rows-streams). Also adapt tests.scm for this new library structure and move (rsv rows-streams) specific tests into tests-rows-streams.scm.
Diffstat:
M | rsv.scm | | | 235 | ++----------------------------------------------------------------------------- |
A | rsv/internal.scm | | | 264 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | rsv/rows-streams.scm | | | 4 | +++- |
A | tests-rows-streams.scm | | | 123 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | tests.scm | | | 25 | ++----------------------- |
5 files changed, 397 insertions(+), 254 deletions(-)
diff --git a/rsv.scm b/rsv.scm
@@ -17,236 +17,11 @@
(define-library (rsv)
(import (scheme base)
(scheme file)
- (scheme write))
- (export legal-rsv?
- read-rsv
- read-rsv-row
+ (scheme write)
+
+ (rsv internal))
+ (export read-rsv
rsv-bytevector->scm
scm->rsv-bytevector
write-rsv
- write-rsv-row)
-
- (begin
- (define row-terminator-byte #xFD) ;; 253
- (define value-terminator-byte #xFF) ;; 255
- (define null-value-byte #xFE) ;; 254
-
- (define (row-terminator-byte? byte) (= row-terminator-byte byte))
- (define (value-terminator-byte? byte) (= value-terminator-byte byte))
- (define (null-value-byte? byte) (= null-value-byte byte))
-
- (define (write-rsv-field field port)
- (cond
- ((not field)
- (write-u8 null-value-byte
- port))
- (else
- (write-bytevector (string->utf8 field)
- port))))
-
- (define (write-rsv-row row port)
- (unless (binary-port? port)
- (error "Provided port should be a binary port."
- port))
-
- (let loop ((row row))
- (cond
- ((null? row)
- (write-u8 row-terminator-byte
- port))
- (else
- (let ((field (car row))
- (rest-of-fields (cdr row)))
- (write-rsv-field field port)
- (write-u8 value-terminator-byte port)
- (loop rest-of-fields))))))
-
- (define (write-rsv scm port)
- "Convert provided SCM, which is a list of lists of strings or null values, into RSV and write it into PORT.
-
-The SCM, the sexp representation of RSV, is defined as:
-
-RSV := ( ROW* )
-ROW := ( STRING-or-NULL-VALUE* )
-STRING-or-NULL-VALUE := String | Null-Value
-Null-Value := #f"
- (unless (binary-port? port)
- (error "Expected binary port." port))
-
- (let loop ((scm scm))
- (cond
- ((null? scm) '())
- (else
- (let ((row (car scm))
- (rest-of-rows (cdr scm)))
- (write-rsv-row row port)
- (loop rest-of-rows))))))
-
- (define (scm->rsv-bytevector scm)
- (let ((output-bytevector-port (open-output-bytevector)))
- (write-rsv scm output-bytevector-port)
- (get-output-bytevector output-bytevector-port)))
-
- (define (legal-utf8? byte)
- ;; XXX: Replace with a better predicate.
- (and (integer? byte)
- (< -1 byte #xF8)))
-
- (define (legal-rsv? byte)
- ;; XXX: Same as for legal-utf8?
- (and (integer? byte)
- (< -1 byte #x100)))
-
- (define (read-rsv-string row-count column-count port)
- "Read a string from PORT.
-
-Return the string just read, and the number of bytes just
-read."
- (unless (binary-port? port)
- (error "Expected binary port." port))
-
- (let ((output-field-port (open-output-bytevector)))
- (let loop ((column-count column-count))
- (let ((byte (read-u8 port)))
- (cond
- ((eof-object? byte)
- (error "Prematurely terminated RSV at: (row, column)"
- row-count
- column-count))
- ((not (legal-rsv? byte))
- (error "Illegal value returned by RSV string reader: (illegal value, row, column)"
- byte
- row-count
- column-count))
- ((value-terminator-byte? byte)
- (values (utf8->string (get-output-bytevector output-field-port))
- row-count
- column-count))
- ((legal-utf8? byte)
- (write-u8 byte
- output-field-port)
- (loop (+ column-count 1))))))))
-
- (define (read-null-value row-count column-count port)
- ;; `row->scm` had already `peek-u8`-ed and knows that the next
- ;; `read-u8` will be the Null-Value-Byte, a.k.a. #xFE, so we can
- ;; remove it without looking at it.
- (unless (binary-port? port)
- (error "Expected binary port." port))
-
- ;; Remove Null-Value-Byte. XXX: Already checked in the
- ;; procedure calling read-null-value, so it should be okay not
- ;; to check it, no? Can a time-of-check-time-of-use result when not checking?
- (let ((byte (read-u8 port)))
- (cond
- ((eof-object? byte)
- (error "Prematurely terminated RSV: (row, column)"
- row-count
- column-count))
- ((not (legal-rsv? byte))
- (error "Illegal value returned by RSV string reader: (illegal value, row, column)"
- byte
- row-count
- column-count))
- ((not (null-value-byte? byte))
- (error "Expected a Null Byte (#xFE), instead got: (unexpected byte, row, column)"
- byte
- row-count
- column-count))))
-
- (let ((byte (read-u8 port))) ;; Remove value terminator byte.
- (cond
- ((eof-object? byte)
- (error "Prematurely terminated RSV: (row, column)"
- row-count
- column-count))
- ((not (legal-rsv? byte))
- (error "Expected a Value-Terminator-Byte (#xFF) after a Null Byte (#xFE), instead got illegal value: (illegal value, row, column)"
- byte
- row-count
- column-count))
- ((value-terminator-byte? byte)
- ;; Return successfully.
- (values row-count
- (+ column-count 2)))
- (else
- (error "Expected a Value-Terminator-Byte (#xFF) after a Null Byte (#xFE), instead got illegal value: (illegal value, row, column)"
- byte
- row-count
- column-count)))))
-
- (define (read-rsv-row row-count column-count port)
- (unless (binary-port? port)
- (error "Provided port should be a binary port."
- port))
- (let loop ((row '())
- (row-count row-count)
- (column-count column-count))
- (let ((byte (peek-u8 port)))
- (cond ((eof-object? byte)
- (error "Prematurely terminated RSV: (row, column)"
- row-count
- column-count))
- ((row-terminator-byte? byte) ;; End of row.
- (read-u8 port) ;; Remove row terminator.
- (values (reverse row)
- (+ row-count 1)
- 0))
-
- ((null-value-byte? byte) ;; Field is null.
- (let-values (((new-row-count new-column-count)
- (read-null-value row-count column-count port)))
- (loop (cons #f row)
- new-row-count
- new-column-count)))
-
- ((value-terminator-byte? byte)
- (read-u8 port)
- (loop (cons ""
- row)
- row-count
- (+ column-count 1)))
-
- ((legal-utf8? byte) ;; Field is a string.
- (let-values (((new-field-value new-row-count new-column-count)
- (read-rsv-string row-count
- column-count
- port)))
- (loop (cons new-field-value
- row)
- new-row-count
- new-column-count)))
-
- (else
- (error "Illegal value returned in read-rsv-row: (illegal value, row, column)"
- byte
- row-count
- column-count))))))
-
- (define (read-rsv port)
- (unless (binary-port? port)
- (error "Provided port should be a binary port."
- port))
- (let loop ((rows '())
- (row-count 0)
- (column-count 0))
- (let ((byte (peek-u8 port)))
- (cond
- ((eof-object? byte)
- (reverse rows))
-
- ((legal-rsv? byte)
- (let-values (((new-row new-row-count new-column-count)
- (read-rsv-row row-count column-count port)))
- (loop (cons new-row rows)
- new-row-count
- new-column-count)))
-
- (else
- (error "Illegal value returned when reading RSV row: (illegal value, row)"
- byte
- row-count))))))
-
- (define (rsv-bytevector->scm bv)
- (let ((bytevector-input-port (open-input-bytevector bv)))
- (read-rsv bytevector-input-port)))))
+ port->rsv-row-stream))
diff --git a/rsv/internal.scm b/rsv/internal.scm
@@ -0,0 +1,264 @@
+;;; Scheme implementation of RSV - Rows of String Values.
+;;; Copyright (C) 2024 Yuval Langer.
+;;;
+;;; This program is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation, either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+(define-library (rsv internal)
+ (import (scheme base)
+ (scheme file)
+ (scheme write)
+
+ (srfi srfi-41))
+ (export legal-rsv?
+ legal-utf8?
+ null-value-byte
+ null-value-byte?
+ read-null-value
+ read-rsv
+ read-rsv-row
+ read-rsv-string
+ row-terminator-byte
+ row-terminator-byte?
+ rsv-bytevector->scm
+ scm->rsv-bytevector
+ value-terminator-byte
+ value-terminator-byte?
+ write-rsv
+ write-rsv-field
+ write-rsv-row)
+
+ (begin
+ (define row-terminator-byte #xFD) ;; 253
+ (define value-terminator-byte #xFF) ;; 255
+ (define null-value-byte #xFE) ;; 254
+
+ (define (row-terminator-byte? byte) (= row-terminator-byte byte))
+ (define (value-terminator-byte? byte) (= value-terminator-byte byte))
+ (define (null-value-byte? byte) (= null-value-byte byte))
+
+ (define (write-rsv-field field port)
+ (cond
+ ((not field)
+ (write-u8 null-value-byte
+ port))
+ (else
+ (write-bytevector (string->utf8 field)
+ port))))
+
+ (define (write-rsv-row row port)
+ (unless (binary-port? port)
+ (error "Provided port should be a binary port."
+ port))
+
+ (let loop ((row row))
+ (cond
+ ((null? row)
+ (write-u8 row-terminator-byte
+ port))
+ (else
+ (let ((field (car row))
+ (rest-of-fields (cdr row)))
+ (write-rsv-field field port)
+ (write-u8 value-terminator-byte port)
+ (loop rest-of-fields))))))
+
+ (define (write-rsv scm port)
+ "Convert provided SCM, which is a list of lists of strings or null values, into RSV and write it into PORT.
+
+The SCM, the sexp representation of RSV, is defined as:
+
+RSV := ( ROW* )
+ROW := ( STRING-or-NULL-VALUE* )
+STRING-or-NULL-VALUE := String | Null-Value
+Null-Value := #f"
+ (unless (binary-port? port)
+ (error "Expected binary port." port))
+
+ (let loop ((scm scm))
+ (cond
+ ((null? scm) '())
+ (else
+ (let ((row (car scm))
+ (rest-of-rows (cdr scm)))
+ (write-rsv-row row port)
+ (loop rest-of-rows))))))
+
+ (define (scm->rsv-bytevector scm)
+ (let ((output-bytevector-port (open-output-bytevector)))
+ (write-rsv scm output-bytevector-port)
+ (get-output-bytevector output-bytevector-port)))
+
+ (define (legal-utf8? byte)
+ ;; XXX: Replace with a better predicate.
+ (and (integer? byte)
+ (< -1 byte #xF8)))
+
+ (define (legal-rsv? byte)
+ ;; XXX: Same as for legal-utf8?
+ (and (integer? byte)
+ (< -1 byte #x100)))
+
+ (define (read-rsv-string row-count column-count port)
+ "Read a string from PORT.
+
+Return the string just read, and the number of bytes just
+read."
+ (unless (binary-port? port)
+ (error "Expected binary port." port))
+
+ (let ((output-field-port (open-output-bytevector)))
+ (let loop ((column-count column-count))
+ (let ((byte (read-u8 port)))
+ (cond
+ ((eof-object? byte)
+ (error "Prematurely terminated RSV at: (row, column)"
+ row-count
+ column-count))
+ ((not (legal-rsv? byte))
+ (error "Illegal value returned by RSV string reader: (illegal value, row, column)"
+ byte
+ row-count
+ column-count))
+ ((value-terminator-byte? byte)
+ (values (utf8->string (get-output-bytevector output-field-port))
+ row-count
+ column-count))
+ ((legal-utf8? byte)
+ (write-u8 byte
+ output-field-port)
+ (loop (+ column-count 1))))))))
+
+ (define (read-null-value row-count column-count port)
+ ;; `row->scm` had already `peek-u8`-ed and knows that the next
+ ;; `read-u8` will be the Null-Value-Byte, a.k.a. #xFE, so we can
+ ;; remove it without looking at it.
+ (unless (binary-port? port)
+ (error "Expected binary port." port))
+
+ ;; Remove Null-Value-Byte. XXX: Already checked in the
+ ;; procedure calling read-null-value, so it should be okay not
+ ;; to check it, no? Can a time-of-check-time-of-use result when not checking?
+ (let ((byte (read-u8 port)))
+ (cond
+ ((eof-object? byte)
+ (error "Prematurely terminated RSV: (row, column)"
+ row-count
+ column-count))
+ ((not (legal-rsv? byte))
+ (error "Illegal value returned by RSV string reader: (illegal value, row, column)"
+ byte
+ row-count
+ column-count))
+ ((not (null-value-byte? byte))
+ (error "Expected a Null Byte (#xFE), instead got: (unexpected byte, row, column)"
+ byte
+ row-count
+ column-count))))
+
+ (let ((byte (read-u8 port))) ;; Remove value terminator byte.
+ (cond
+ ((eof-object? byte)
+ (error "Prematurely terminated RSV: (row, column)"
+ row-count
+ column-count))
+ ((not (legal-rsv? byte))
+ (error "Expected a Value-Terminator-Byte (#xFF) after a Null Byte (#xFE), instead got illegal value: (illegal value, row, column)"
+ byte
+ row-count
+ column-count))
+ ((value-terminator-byte? byte)
+ ;; Return successfully.
+ (values row-count
+ (+ column-count 2)))
+ (else
+ (error "Expected a Value-Terminator-Byte (#xFF) after a Null Byte (#xFE), instead got illegal value: (illegal value, row, column)"
+ byte
+ row-count
+ column-count)))))
+
+ (define (read-rsv-row row-count column-count port)
+ (unless (binary-port? port)
+ (error "Provided port should be a binary port."
+ port))
+ (let loop ((row '())
+ (row-count row-count)
+ (column-count column-count))
+ (let ((byte (peek-u8 port)))
+ (cond ((eof-object? byte)
+ (error "Prematurely terminated RSV: (row, column)"
+ row-count
+ column-count))
+ ((row-terminator-byte? byte) ;; End of row.
+ (read-u8 port) ;; Remove row terminator.
+ (values (reverse row)
+ (+ row-count 1)
+ 0))
+
+ ((null-value-byte? byte) ;; Field is null.
+ (let-values (((new-row-count new-column-count)
+ (read-null-value row-count column-count port)))
+ (loop (cons #f row)
+ new-row-count
+ new-column-count)))
+
+ ((value-terminator-byte? byte)
+ (read-u8 port)
+ (loop (cons ""
+ row)
+ row-count
+ (+ column-count 1)))
+
+ ((legal-utf8? byte) ;; Field is a string.
+ (let-values (((new-field-value new-row-count new-column-count)
+ (read-rsv-string row-count
+ column-count
+ port)))
+ (loop (cons new-field-value
+ row)
+ new-row-count
+ new-column-count)))
+
+ (else
+ (error "Illegal value returned in read-rsv-row: (illegal value, row, column)"
+ byte
+ row-count
+ column-count))))))
+
+ (define (read-rsv port)
+ (unless (binary-port? port)
+ (error "Provided port should be a binary port."
+ port))
+ (let loop ((rows '())
+ (row-count 0)
+ (column-count 0))
+ (let ((byte (peek-u8 port)))
+ (cond
+ ((eof-object? byte)
+ (reverse rows))
+
+ ((legal-rsv? byte)
+ (let-values (((new-row new-row-count new-column-count)
+ (read-rsv-row row-count column-count port)))
+ (loop (cons new-row rows)
+ new-row-count
+ new-column-count)))
+
+ (else
+ (error "Illegal value returned when reading RSV row: (illegal value, row)"
+ byte
+ row-count))))))
+
+ (define (rsv-bytevector->scm bv)
+ (let ((bytevector-input-port (open-input-bytevector bv)))
+ (read-rsv bytevector-input-port)))))
diff --git a/rsv/rows-streams.scm b/rsv/rows-streams.scm
@@ -16,10 +16,12 @@
(define-library (rsv rows-streams)
(import (scheme base)
+ (scheme file)
+ (scheme write)
(srfi srfi-41)
- (rsv))
+ (rsv internal))
(export port->rsv-row-stream)
(begin
diff --git a/tests-rows-streams.scm b/tests-rows-streams.scm
@@ -0,0 +1,123 @@
+;;; Scheme implementation of RSV - Rows of String Values.
+;;; Copyright (C) 2024 Yuval Langer.
+;;;
+;;; This program is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation, either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+(define-library (rsv tests rows-streams)
+ (import (scheme base)
+ (scheme file)
+ (scheme write)
+
+ (srfi srfi-1)
+ (srfi srfi-41)
+ (srfi srfi-64)
+
+ (statprof)
+
+ (rsv)
+ (rsv rows-streams))
+
+ (begin
+ (define valid-filenames
+ (append (map (lambda (n)
+ (string-append "TestFiles/Valid_00"
+ (number->string n)
+ ".rsv"))
+ (iota 9 1))
+ (map (lambda (n)
+ (string-append "TestFiles/Valid_0"
+ (number->string n)
+ ".rsv"))
+ (iota 70 10))))
+
+ (define invalid-filenames
+ (append (map (lambda (n)
+ (string-append "TestFiles/Invalid_00"
+ (number->string n)
+ ".rsv"))
+ (iota 9 1))
+ (map (lambda (n)
+ (string-append "TestFiles/Invalid_0"
+ (number->string n)
+ ".rsv"))
+ (iota 20 10))))
+
+ (define (get-bytevector-all port)
+ (let ((output-bytevector-port (open-output-bytevector)))
+ (let loop ()
+ (let ((byte (read-u8 port)))
+ (cond
+ ((eof-object? byte)
+ (get-output-bytevector output-bytevector-port))
+ ((number? byte)
+ (write-u8 byte output-bytevector-port)
+ (loop))
+ (else
+ (error "Illegal value while reading file:" byte)))))))
+
+ (define (scm-rows-stream->rsv stream)
+ (let ((output-bytevector-port (open-output-bytevector)))
+ (let loop ((stream stream))
+ (cond
+ ((stream-null? stream)
+ (get-output-bytevector output-bytevector-port))
+ (else
+ (let* ((new-scm-row (stream-car stream))
+ (new-scm-row-pretending-to-be-a-table (list new-scm-row)))
+ (write-rsv new-scm-row-pretending-to-be-a-table
+ output-bytevector-port)
+ (loop (stream-cdr stream))))))))
+
+ (define (back-and-forth original-rsv)
+ (let* ((input-bytevector-port (open-input-bytevector original-rsv))
+ (rows-stream (port->rsv-row-stream input-bytevector-port)))
+ (let ((regenerated-rsv (scm-rows-stream->rsv rows-stream)))
+ regenerated-rsv)))
+
+ (define (run-stream-back-and-forth-failing-test original-rsv)
+ (statprof
+ (lambda ()
+ (test-error (cons (back-and-forth original-rsv)
+ original-rsv)))
+ #:count-calls? #t))
+
+ (define (run-stream-back-and-forth-test original-rsv)
+ (statprof
+ (lambda ()
+ (test-equal original-rsv (back-and-forth original-rsv)))
+ #:count-calls? #t))
+
+ (test-begin "rsv-rows-streams-tests")
+
+ (for-each (lambda (filename)
+ (write filename) (newline)
+ (call-with-input-file filename
+ (lambda (port)
+ (test-group filename
+ (let ((original-rsv (get-bytevector-all port)))
+ (display 'run-stream-back-and-forth-failing-test) (newline)
+ (run-stream-back-and-forth-failing-test original-rsv))))))
+ invalid-filenames)
+
+ (for-each (lambda (filename)
+ (write filename) (newline)
+ (call-with-input-file filename
+ (lambda (port)
+ (test-group filename
+ (let ((original-rsv (get-bytevector-all port)))
+ (display 'run-stream-back-and-forth-test) (newline)
+ (run-stream-back-and-forth-test original-rsv))))))
+ valid-filenames)
+
+ (test-end "rsv-rows-streams-tests")))
diff --git a/tests.scm b/tests.scm
@@ -20,13 +20,11 @@
(scheme write)
(srfi srfi-1)
- (srfi srfi-41)
(srfi srfi-64)
(statprof)
- (rsv)
- (rsv rows-streams))
+ (rsv))
(begin
(define valid-filenames
@@ -95,23 +93,6 @@
(scm->rsv-bytevector (rsv-bytevector->scm original-rsv))))
#:count-calls? #t))
- (define (scm-rows-stream->rsv stream)
- (let ((output-bytevector-port (open-output-bytevector)))
- (let loop ((stream stream))
- (cond
- ((stream-null? stream)
- (get-output-bytevector output-bytevector-port))
- (else
- (let* ((new-scm-row (stream-car stream)))
- (write-rsv-row new-scm-row output-bytevector-port)
- (loop (stream-cdr stream))))))))
-
- (define (run-stream-back-and-forth-test original-rsv)
- (let* ((input-bytevector-port (open-input-bytevector original-rsv))
- (rows-stream (port->rsv-row-stream input-bytevector-port)))
- (let ((regenerated-rsv (scm-rows-stream->rsv rows-stream)))
- (test-equal original-rsv regenerated-rsv))))
-
(test-begin "RSV")
(for-each (lambda (filename)
@@ -131,9 +112,7 @@
(test-group filename
(let ((original-rsv (get-bytevector-all port)))
(display 'run-back-and-forth-test) (newline)
- (run-back-and-forth-test original-rsv)
- (display 'run-stream-back-and-forth-test) (newline)
- (run-stream-back-and-forth-test original-rsv))))))
+ (run-back-and-forth-test original-rsv))))))
valid-filenames)
(test-end "RSV")))