s c h e m e w i k i . o r g

/ WebHome / Sandbox.ScmToXexpr

This Web


WebHome 
WebChanges 
WebTopicList 
WebStatistics 

All Webs


Chicken
Cookbook
Erlang
Know
Main
Plugins
Sandbox
Scm
TWiki  

Schematics


Schematics Home
TWiki Shorthand

Offsite Links


Schemers.org
Lambda the Ultimate
twiki.org

Scm2Xexpr - Scheme code formatter

Overview

The program listed below converts Scheme source code, stored in a specified file or text string, into PLT's Xexpr representation of XML. This can be used to generate web pages containing formatted Scheme code. The program listing below is generated by a PLT web servlet which uses this code.

About the code

The code was adapted from Dorai Sitaram's TeX2page, with the following changes:

With the exception of side effects encapsulated in the input stream code, and the use of destructive reverse!, for performance reasons, the resulting program is side-effect free.

scm2xexpr.scm

#|
   Copyright (c) 1997-2003, Dorai Sitaram.
   Portions copyright (c) 2004, Anton van Straaten.
   All rights reserved.

   Permission to distribute and use this work for any
   purpose is hereby granted provided this copyright
   notice is included in the copy.  This work is provided
   as is, with no warranty of any kind.
|#

(module scm2xexpr mzscheme

  (provide scheme-text->xexpr
           scheme-file->xexpr
           scheme->xexpr)

  (require "scheme-names.scm")

  ; constants
  (define *invisible-space* (list '*invisible-space*))
  (define *return* (integer->char 13))
  (define *scm-token-delims*
    (list #\( #\) #\[ #\] #\{ #\} #\' #\` #\" #\; #\, #\|))

  (define *scm-variables* '())

  (define scheme-file->xexpr
    (lambda (filename . stylesheet-name)
      (apply scheme->xexpr filename 'file stylesheet-name)))

  (define scheme-text->xexpr
    (lambda (source-text . stylesheet-name)
      (apply scheme->xexpr source-text 'source stylesheet-name)))

  ; scheme->xexpr : string * symbol [* string] -> xexpr
  (define scheme->xexpr
    (lambda (input-source input-type . stylesheet-name)
      (apply maybe-wrap-in-page
             `(div ([class "scheme"]) ; use div so stylesheet can apply margin
                   (pre               ; can't use class here, twiki won't notice pre tag
                    ,@((make-input-stream input-source input-type) 
                       (lambda (get-char toss-back-char)
                         ((make-formatter get-char toss-back-char)
                          (lambda (snoop get-next-xexpr)
                            (let loop ((output-elements '()))
                              (let ((c (snoop)))
                                (if (eof-object? c) 
                                    (reverse! output-elements)
                                    (loop (cons (get-next-xexpr) 
                                                output-elements)))))))))))
             stylesheet-name)))

  (define-syntax define-interface
    (syntax-rules (done:)
      ((_ name (op ...))
       (define name
         (lambda (op ...)
           (lambda (user)
             (user op ...)))))

      ((_ name (op ...) done: finalizer)
       (define name
         (lambda (op ... finalizer)
           (lambda (user)
             (begin0
               (user op ...)
               (finalizer))))))))

  ; input-stream interface idea inspired by Mayer Goldberg:
  ; http://www.cs.bgu.ac.il/~elhadad/advpro/2001/input-streams.scm
  ; Used here to wrap TeX2page input stream code, to reduce inter-
  ; dependencies, and separate input stream code from formatting code.
  (define-interface ^input-stream (get unget) done: close-input)
  (define-interface ^formatter (snoop-actual-char get-next-xexpr))

  ; creates an ^input-stream bound to the specified input source.  Wraps TeX2page input mechanism.
  (define make-input-stream
    (let* ([make-stream 
            (lambda (current-input close-input)
              (let ((input-line-no 1))
                (^input-stream
                 (lambda ()
                   (let ((b (bport-buffer current-input)))
                     (if (null? b)
                         (let ((p (bport-port current-input)))
                           (if (not p)
                               eof
                               (let ((c (read-char p)))
                                 (cond
                                   ((eof-object? c) c)
                                   ((char=? c #\newline)
                                    (set! input-line-no (+ input-line-no 1))
                                    c)
                                   (else c)))))
                         (let ((c (car b)))
                           (set-bport-buffer! current-input (cdr b))
                           c))))
                 
                 (lambda (c)
                   (set-bport-buffer!
                    current-input
                    (cons c (bport-buffer current-input))))
                 
                 close-input)))]

           [make-string-stream 
            (lambda (s)
              (make-stream (make-bport 'buffer (string->list s)) (lambda () #t)))])

      (lambda (source source-type)
        (case source-type
          ((file)   (if (file-exists? source)
                        (let ((inp (open-input-file source)))
                          (make-stream (make-bport 'port inp) (lambda () (close-input-port inp) #t)))
                        (make-string-stream (format ";*** Error: file not found: ~a\n" source))))

          ((source) (make-string-stream source))))))

  ; create a ^formatter interface based on the specified ^input-stream interface
  (define make-formatter
    (lambda (get-char toss-back-char)
      (letrec 
          ([scm-output-hash
            (lambda ()
              (get-actual-char)
              (let ((c (snoop-actual-char)))
                (cond
                  ((eof-object? c)
                   '(span ([class "selfeval"]) "#"))
                  ((char=? c #\|) (scm-output-extended-comment))
                  (else (toss-back-char #\#) (scm-output-token (scm-get-token))))))]
           
           [scm-output-next-chunk
            (lambda ()
              (let ((c (snoop-actual-char)))
                (cond
                  ((char=? c #\;) (scm-output-comment))
                  ((char=? c #\") (scm-output-string))
                  ((char=? c #\#) (scm-output-hash))
                  ((char=? c #\,)
                   (get-actual-char)
                   `(span ([class "keyword"])
                          ,(let ((c (snoop-actual-char)))
                             (cond 
                               ((char=? c #\@) (get-actual-char) ",@")
                               (else ",")))))
                  ((or (char=? c #\') (char=? c #\`))
                   (get-actual-char)
                   `(span ([class "keyword"]) 
                          ,(scm-emit-html-char c)))
                  ((or (char-whitespace? c) (memv c *scm-token-delims*))
                   (get-actual-char)
                   (scm-emit-html-char c))
                  (else (scm-output-token (scm-get-token))))))]
           
           [snoop-actual-char
            (lambda ()
              (let ((c (snoop-char)))
                (cond
                  ((eof-object? c) c)
                  ((invisible-space? c) (get-char) (snoop-actual-char))
                  ((char=? c *return*)
                   (get-char)
                   (let ((c (snoop-actual-char)))
                     (if (and (not (eof-object? c)) (char=? c #\newline))
                         c
                         (begin (toss-back-char #\newline) #\newline))))
                  (else c))))]
           
           [scm-output-comment
            (lambda ()
              `(span ([class "comment"])
                     ,(list->string 
                       (reverse!
                        (let loop ((comment-chars '()))
                          (let ((c (get-actual-char)))
                            (cond
                              ((eof-object? c) comment-chars)
                              ((char=? c #\newline) (cons c comment-chars))  ; final newline included in comment
                              ((and (char-whitespace? c)
                                    (let ((c2 (snoop-actual-char)))
                                      (or (eof-object? c2) (char=? c2 #\newline))))
                               (get-actual-char)
                               (cons #\newline comment-chars))               ; ditto here
                              (else (loop (cons c comment-chars))))))))))]
           
           [scm-output-extended-comment
            (lambda ()
              (get-actual-char)
              `(span ([class "comment"])
                     ,(string-append
                       "#|"
                       (list->string
                        (reverse!
                         (let loop ((comment-chars '()))
                           (let ((c (get-actual-char)))
                             (cond
                               ((eof-object? c) comment-chars)
                               ((char=? c #\|)
                                (let ((c2 (snoop-actual-char)))
                                  (cond
                                    ((eof-object? c2) comment-chars)
                                    ((char=? c2 #\#) (get-actual-char) comment-chars)
                                    (else (loop (cons c comment-chars))))))
                               (else (loop (cons c comment-chars))))))))
                       "|#")))]
           
           [scm-get-token
            (lambda ()
              (list->string
               (reverse!
                (let loop ((s '()) (esc? #f))
                  (let ((c (snoop-actual-char)))
                    (cond
                      ((eof-object? c) s)
                      (esc? (get-actual-char) (loop (cons c s) #f))
                      ((char=? c #\\) (get-actual-char) (loop (cons c s) #t))
                      ((or (char-whitespace? c) (memv c *scm-token-delims*)) s)
                      (else (get-actual-char) (loop (cons c s) #f))))))))]
           
           [snoop-char (lambda () (let ((c (get-char))) (toss-back-char c) c))]
           
           [get-actual-char
            (lambda ()
              (let ((c (get-char)))
                (cond
                  ((eof-object? c) c)
                  ((invisible-space? c) (get-actual-char))
                  ((char=? c *return*)
                   (let ((c (snoop-actual-char)))
                     (if (and (not (eof-object? c)) (char=? c #\newline))
                         (get-actual-char)
                         #\newline)))
                  (else c))))]
           
           [scm-output-string
            (lambda ()
              (get-actual-char) 
              `(span ([class "selfeval"])
                     ,(string-append
                       "\""
                       (list->string
                        (reverse!
                         (let loop ((s '()) (esc? #f))
                           (let ((c (get-actual-char)))
                             (case c
                               ((#\") (if esc? (loop (cons c s) #f)
                                          s))
                               ((#\\) (loop (cons c s) (not esc?)))
                               (else  (loop (cons c s) #f)))))))
                       "\"")))])
        
        (^formatter snoop-actual-char scm-output-next-chunk))))
  
  ; todo: add support for title
  (define maybe-wrap-in-page
    (lambda (xexpr . stylesheet-name)
      (if (null? stylesheet-name)
          xexpr
          `(html
            (head
             (link ([rel "stylesheet"]
                    [type "text/css"]
                    [href ,(car stylesheet-name)])))
            (body ,xexpr)))))
  
  (define scm-emit-html-char
    (lambda (c)
      (if (eof-object? c) '()
          (list->string (list c)))))
  
  (define scm-output-token
    (lambda (s)
      (let ((type (scm-get-type s)))
        (if (eq? type 'background) s
            `(span ([class ,(symbol->string type)]) ,s)))))
  
  (define member/string-ci=?
    (lambda (s ss) (ormap (lambda (x) (string-ci=? x s)) ss)))
  
  (define string-is-flanked-by-stars?
    (lambda (s)
      (let ((n (string-length s)))
        (and (>= n 3)
             (char=? (string-ref s 0) #\*)
             (char=? (string-ref s (- n 1)) #\*)))))
  
  (define string-starts-with-hash? (lambda (s) (char=? (string-ref s 0) #\#)))
  
  (define scm-get-type
    (lambda (s)
      (cond
        ((member/string-ci=? s *scm-keywords*) 'keyword)
        ((member/string-ci=? s *scm-builtins*) 'builtin)
        ((member/string-ci=? s *scm-variables*) 'variable)
        ((string-is-flanked-by-stars? s) 'global)
        (else
         (let ((colon (string-index s #\:)))
           (cond
             (colon (if (= colon 0) 'selfeval 'variable))
             ((string-is-all-dots? s) 'background)
             ((string-starts-with-hash? s) 'selfeval)
             ((string->number s) 'selfeval)
             (else 'variable)))))))
  
  (define-struct bport (port buffer))
  
  ; wrap plt constructor to match tex2page requirements
  (set! make-bport
        (let ((make-bport-orig make-bport))
          (lambda (field value)
            (apply make-bport-orig
                   (case field
                     ((buffer) (list #f value))
                     ((port)   (list value '())))))))
  
  (define invisible-space? (lambda (x) (eq? x *invisible-space*)))
  
  (define string-index
    (lambda (s c)
      (let ((n (string-length s)))
        (let loop ((i 0))
          (cond
            ((>= i n) #f)
            ((char=? (string-ref s i) c) i)
            (else (loop (+ i 1))))))))
  
  (define string-is-all-dots?
    (lambda (s)
      (let ((n (string-length s)))
        (let loop ((i 0))
          (cond
            ((>= i n) #t)
            ((char=? (string-ref s i) #\.) (loop (+ i 1)))
            (else #f))))))
  )

scheme-names.scm

(module scheme-names mzscheme
  (provide *scm-builtins* *scm-keywords*)

  (define *scm-builtins* 
    '("abs"
      "acos"
      "angle"
      "append"
      "apply"
      "asin"
      "assq" "assv" "assoc"
      "atan"
      "boolean?"
      "car" "cdr"
      "caar"   "cadr"   "cdar"   "cddr"
      "caaar"  "caadr"  "cadar"  "caddr"  "cdaar"  "cdadr"  "cddar"  "cdddr"
      "caaaar" "caaadr" "caadar" "caaddr" "cadaar" "cadadr" "caddar" "cadddr" 
      "cdaaar" "cdaadr" "cdadar" "cdaddr" "cddaar" "cddadr" "cdddar" "cddddr"
      "call-with-current-continuation" "call/cc"
      "call-with-input-file"
      "call-with-output-file"
      "call-with-values"
      "char?"
      "char=?"    "char<?"    "char>?"    "char<=?"    "char>=?"
      "char-ci=?" "char-ci<?" "char-ci>?" "char-ci<=?" "char-ci>=?"
      "char-alphabetic?" "char-numeric?" "char-whitespace?" "char-upper-case?" "char-lower-case?"
      "char-ready?"
      "char->integer"
      "char-upcase" "char-downcase"
      "cons"
      "ceiling"
      "close-input=port"
      "close-output-port"
      "complex?"
      "cos"
      "current-input-port"
      "current-output-port"
      "denominator"
      "display"
      "dynamic-wind"
      "eof-object?"
      "eq?"
      "equal?"
      "eqv?"
      "eval"
      "even?"
      "exact?"
      "exact->inexact"
      "exp"
      "expt"
      "floor"
      "for-each"
      "force"
      "gcd"
      "imag-part"
      "inexact?"
      "inexact->exact"
      "input-port?"
      "integer?"
      "integer->char"
      "interaction-environment"
      "lcm"
      "length"
      "list"
      "list?"
      "list-ref"
      "list-tail"
      "list->string"
      "list->vector"
      "load"
      "log"
      "magnitude"
      "make-polar"
      "make-rectangular"
      "make-string"
      "make-vector"
      "map"
      "max" "min"
      "memq" "memv" "member"
      "modulo"
      "negative?"
      "newline"
      "not"
      "null?"
      "null-environment"
      "number?"
      "number->string"
      "numerator"
      "odd?"
      "open-input-file"
      "open-output-file"
      "output-port?"
      "pair?"
      "peek-char"
      "positive?"
      "procedure?"
      "quotient"
      "rational?"
      "rationalize"
      "read"
      "read-char"
      "real?"
      "real-part"
      "remainder"
      "reverse"
      "round"
      "scheme-report-environment"
      "sin"
      "set-car!"
      "set-cdr!"
      "sqrt"
      "string"
      "string?"
      "string-append"
      "string-copy"
      "string-fill!"
      "string-length"
      "string-ref"
      "string-set!"
      "string->list"
      "string->number"
      "string->symbol"
      "string=?"
      "string-ci=?"
      "string<?" "string>?" "string<=?" "string>=?"
      "string-ci<?" "string-ci>?" "string-ci<=?" "string-ci>=?"
      "substring"
      "symbol?"
      "symbol->string"
      "tan"
      "transcript-on"
      "transcript-off"
      "truncate"
      "values"
      "vector"
      "vector?"
      "vector-fill!"
      "vector-length"
      "vector-ref"
      "vector-set!"
      "vector->list"
      "with-input-from-file"
      "with-output-to-file"
      "write"
      "write-char"
      "zero?"
      "=" "<" ">" "<=" ">="
      "+" "*" "-" "/"
      ))
  
  (define *scm-keywords*
    '("=>"
      "and"
      "begin"
      "begin0"
      "case"
      "cond"
      "define"
      "define-macro"
      "define-syntax"
      "define-struct"
      "delay"
      "do"
      "else"
      "fluid-let"
      "if"
      "lambda"
      "let"
      "let-syntax"
      "let*"
      "letrec"
      "letrec-syntax"
      "module"
      "or"
      "provide"
      "quasiquote"
      "quote"
      "require"
      "require-for-syntax"
      "set!"
      "syntax-case"
      "syntax-rules"
      "unless"
      "unquote"
      "unquote-splicing"
      "when"
      "with-handlers")))

pretty.scm

The following is a PLT web servlet which invokes the above code:

(require (lib "unitsig.ss")
         (lib "servlet-sig.ss" "web-server")
         (lib "servlet-helpers.ss" "web-server")
         "scm2xexpr.scm")

(unit/sig () (import servlet^)
  (let ((env (request-bindings initial-request)))
    (let ((sym (cond
                 ((exists-binding? 'source env) 'source)
                 ; WARNING: following would be a security risk on a public machine
                 ((exists-binding? 'file env)   'file)))) ; todo: else error
      (scheme->xexpr (extract-binding/single sym env) sym "/bluescheme.css"))))

 
 
© 2004 by the contributing authors. / You are Main.guest