;;; psgml-parse.el --- Parser for SGML-editing mode with parsing support -*- lexical-binding:t -*- ;; $Id: psgml-parse.el,v 2.105 2008/06/21 16:13:51 lenst Exp $ ;; Copyright (C) 1994-1998, 2016-2017 Free Software Foundation, Inc. ;; Author: Lennart Staflin ;; Acknowledgment: ;; The catalog and XML parsing code was contributed by ;; David Megginson ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License ;; as published by the Free Software Foundation; either version 3 ;; of the License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see . ;;; Commentary: ;; Part of major mode for editing the SGML document-markup language. ;;; Code: (require 'psgml) (require (if (featurep 'xemacs) 'psgml-lucid 'psgml-other)) ;;; Interface to psgml-dtd (eval-and-compile (autoload 'sgml-do-usemap-element "psgml-dtd") (autoload 'sgml-write-dtd "psgml-dtd") (autoload 'sgml-check-dtd-subset "psgml-dtd") ) (require 'cl-lib) ;;;; Advise to do-auto-fill (defvar sgml-auto-fill-inhibit-function nil "If non-nil, it should be a function of no arguments. The functions is evaluated before the standard auto-fill function, `do-auto-fill', tries to fill a line. If the function returns a true value the auto-fill is inhibited.") ;;(defadvice do-auto-fill (around disable-auto-fill-hook activate) ;; (or (and sgml-auto-fill-inhibit-function ;; (funcall sgml-auto-fill-inhibit-function)) ;; ad-do-it)) ;;;; Variables (defvar sgml-psgml-pi-enable-outside-dtd nil) ;;; Hooks (defvar sgml-open-element-hook nil "The hook run by `sgml-open-element'. Theses functions are called with two arguments, the first argument is the opened element and the second argument is the attribute specification list. It is probably best not to refer to the content or the end-tag of the element.") (defvar sgml-close-element-hook nil "The hook run by `sgml-close-element'. These functions are invoked with `sgml-current-tree' bound to the element just parsed.") (defvar sgml-doctype-parsed-hook nil "This hook is called after the doctype has been parsed. It can be used to load any additional information into the DTD structure.") (defvar sgml-sysid-resolve-functions nil "A list of functions for resolving sysids. Each function should take one argument, the system identifier of an entity. If the function can handle that identifier, it should insert the text of the entity into the current buffer at point and return t. If the system identifier is not handled the function should return nil.") ;;; Internal variables (defconst sgml-pcdata-token (intern "#PCDATA")) (defvar sgml-computed-map nil "Internal representation of entity search map.") (defvar sgml-used-entity-map nil "Value of `sgml-current-entity-map' used to compute the map in `sgml-compute-map'.") (defvar sgml-last-element nil "Used to keep information about position in element structure between commands.") (defconst sgml-users-of-last-element '(sgml-beginning-of-element sgml-end-of-element sgml-up-element sgml-backward-up-element sgml-backward-element sgml-forward-element sgml-down-element sgml-show-context sgml-next-data-field ) "List of commands that set the variable `sgml-last-element'.") (defvar sgml-parser-syntax nil "Syntax table used during parsing.") (defvar sgml-ecat-assoc nil "Assoc list caching parsed ecats.") (defvar sgml-catalog-assoc nil "Assoc list caching parsed catalogs.") ;;; Variables dynamically bound to affect parsing (defvar sgml-throw-on-warning nil "Set to a symbol other than nil to make `sgml-log-warning' throw to that symbol.") (defvar sgml-throw-on-error nil "Set to a symbol other than nil to make `sgml-error' throw to that symbol.") (defvar sgml-show-warnings nil "Set to t to show warnings.") (defvar sgml-close-element-trap nil "Can be nil for no trap, an element or t for any element. Tested by `sgml-close-element' to see if the parse should be ended.") (defvar sgml-goal 0 "Point in buffer to parse up to.") (defvar sgml-shortref-handler (function sgml-handle-shortref) "Function called by parser to handle a short reference. Called with the entity as argument. The start and end of the short reference is `sgml-markup-start' and point.") (defvar sgml-data-function nil "Function called with parsed data.") (defvar sgml-entity-function nil "Function called with entity referenced at current point in parse.") (defvar sgml-pi-function nil "Function called with parsed processing instruction.") (defvar sgml-signal-data-function nil "Called when some data characters are conceptually parsed. E.g. a data entity reference.") (defvar sgml-throw-on-element-change nil "Throw tag.") ;;; Global variables active during parsing (defvar sgml-parsing-dtd nil "This variable is bound to t while parsing a DTD (subset).") (defvar sgml-rs-ignore-pos nil "Set to position of last parsing start in current buffer.") (make-variable-buffer-local 'sgml-rs-ignore-pos) (defvar sgml-dtd-info nil "Holds the `sgml-dtd' structure describing the current DTD.") (defvar sgml-current-namecase-general t "Value of `sgml-namecase-general' in main buffer. Valid during parsing.") (defvar sgml-current-omittag nil "Value of `sgml-omittag' in main buffer. Valid during parsing.") (defvar sgml-current-shorttag nil "Value of `sgml-shorttag' in main buffer. Valid during parsing.") (defvar sgml-current-localcat nil "Value of `sgml-local-catalogs' in main buffer. Valid during parsing.") (defvar sgml-current-local-ecat nil "Value of `sgml-local-ecat-files' in main buffer. Valid during parsing.") (defvar sgml-current-top-buffer nil "The buffer of the document entity, the main buffer. Valid during parsing. This is used to find current directory for catalogs.") (defvar sgml-current-state nil "Current state in content model or model type if CDATA, RCDATA or ANY.") (defvar sgml-current-shortmap nil "The current active short reference map.") (defvar sgml-current-tree nil "Current parse tree node, identifies open element.") (defvar sgml-previous-tree nil "Previous tree node in current tree. This is nil if no previous node.") (defvar sgml-last-buffer nil "Buffer where last parse was ended. Used for restarting parser at the point where it left of.") (defvar sgml-markup-type nil "Contains the type of markup parsed last. The value is a symbol: nil - pcdata or space CDATA - CDATA or RCDATA comment - comment declaration doctype - doctype declaration end-tag ignored - ignored marked section ms-end - marked section start, if not ignored ms-start - marked section end, if not ignored pi - processing instruction sgml - SGML declaration start-tag entity - general entity reference param - parameter reference shortref- short reference mdecl - markup declaration") (defvar sgml-top-tree nil "Root node of parse tree during parsing.") (defvar sgml-markup-tree nil "Tree node of markup parsed. In case markup closed element this is different from `sgml-current-tree'. Only valid after `sgml-parse-to'.") (defvar sgml-markup-start nil "Start point of markup being parsed.") (defvar sgml-conref-flag nil "Set by `sgml-parse-attribute-specification-list' if a CONREF attribute is parsed.") (defvar sgml-no-elements nil "Number of declared elements.") ;;; Vars used in *param* buffers (defvar sgml-previous-buffer nil) (defvar sgml-current-eref nil "This is the entity reference used to enter current entity. If this is nil, then current entity is main buffer.") (defvar sgml-current-file nil "This is the file name of the current entity.") (defvar sgml-scratch-buffer nil "The global value of this variable is the first scratch buffer for entities. The entity buffers can have a buffer local value for this variable to point to the next scratch buffer.") (put 'sgml-scratch-buffer 'permanent-local t) (defvar sgml-last-entity-buffer nil) ;;; For loading DTD (eval-and-compile (defconst sgml-max-single-octet-number 250 "Octets greater than this is the first of a two octet coding.")) (defvar sgml-read-token-vector nil) ; Vector of symbols used to decode ; token numbers. (defvar sgml-read-nodes nil) ; Vector of nodes used when reading ; a finite automaton. ;; Buffer local variables (defvar sgml-loaded-dtd nil "File name corresponding to current DTD.") (make-variable-buffer-local 'sgml-loaded-dtd) (defvar sgml-current-element-name nil "Name of current element for mode line display.") (make-variable-buffer-local 'sgml-current-element-name) (defvar sgml-dtd-less nil "Non-nil means the document doesn't have a DTD. Applicable to XML.") (make-variable-buffer-local 'sgml-dtd-less) ;;;; Build parser syntax table (defconst sgml-parser-syntax (let ((st (make-syntax-table))) (dotimes (i 256) ;FIXME: Why 256 here and 128 for xml? (modify-syntax-entry i " " st)) ;;http://list-archive.xemacs.org/xemacs-beta/200011/msg00117.html (mapc (lambda (c) (modify-syntax-entry c "w" st)) ":ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrtsuvwxyz") (mapc (lambda (c) (modify-syntax-entry c "_" st)) "-.0123456789") st)) (defconst xml-parser-syntax (let ((tab (make-syntax-table))) (dotimes (i 128) ;FIXME: Why 128 here and 256 for sgml? (modify-syntax-entry i " " tab)) (mapc (lambda (c) (modify-syntax-entry c "w" tab)) "_:ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrtsuvwxyz") (mapc (lambda (c) (modify-syntax-entry c "_" tab)) ;; Fixme: what's the non-ASCII character doing here? -- fx "-.0123456789ยท") tab)) (defmacro sgml-with-parser-syntax (&rest body) (declare (debug t)) `(let ((cb (current-buffer))) (with-syntax-table (if sgml-xml-p xml-parser-syntax sgml-parser-syntax) (unwind-protect (progn ,@body) (setq sgml-last-buffer (current-buffer)) (set-buffer cb))))) (defmacro sgml-with-parser-syntax-ro (&rest body) (declare (debug t)) ;; Should only be used for parsing .... ;; FIXME: Use `with-silent-modifications'? `(let ((cb (current-buffer)) (buffer-modified (buffer-modified-p))) (with-syntax-table (if sgml-xml-p xml-parser-syntax sgml-parser-syntax) (unwind-protect (progn ,@body) (setq sgml-last-buffer (current-buffer)) (set-buffer cb) (unless buffer-modified (restore-buffer-modified-p buffer-modified)) (sgml-debug "Restoring buffer mod: %s" buffer-modified))))) (defvar mc-flag) (defun sgml-set-buffer-multibyte (flag) (cond ((featurep 'xemacs) flag) (t (set-buffer-multibyte (if (eq flag 'default) (default-value 'enable-multibyte-characters) flag))))) ;; Probably better. -- fx ;; (eval-and-compile ;; (if (fboundp 'set-buffer-multibyte) ;; (defalias 'sgml-set-buffer-multibyte ;; (if (fboundp 'set-buffer-multibyte) ;; 'set-buffer-multibyte ;; 'identity)))) ;;;; State machine ;; From the parsers POV a state is a mapping from tokens (in sgml it ;; is primitive state tokens) to states. The pairs of the mapping is ;; called moves. ;; DFAs are always represented by the start state, which is a ;; normal state. Normal states contain moves of two types: ;; 1. moves for required tokens, 2. moves for optional tokens. ;; By design these are keept in two different sets. ;; [Alt: they could perhaps have been keept in one set but ;; marked in different ways.] ;; The and-model groups creates too big state machines, therefor ;; there is a datastruture called and-node. ;; An and-node is a specification for a dfa that has not been computed. ;; It contains a set of dfas that all have to be traversed before going ;; to the next state. The and-nodes are only stored in moves and are ;; not seen by the parser. When a move is taken the and-node is converted ;; to an and-state. ;; An and-state keeps track of which dfas still need to be ;; traversed and the state of the current dfa. ;; move = ;; node = normal-state | and-node ;; and-node = ;; where: dfas is a set of normal-state ;; next is a normal-state ;; State = normal-state | and-state ;; The parser only knows about the state type. ;; normal-state = ;; where: opts is a set of moves for optional tokens ;; reqs is a set of moves for required tokens ;; and-state = ;; where: substate is a normal-state ;; dfas is a set of states ;; next is the next state ;; The and-state is only used during the parsing. ;; Primitiv functions to get data from parse state need ;; to know both normal-state and and-state. ;;; Representations: ;;move: (token . node) (defmacro sgml-make-move (token node) `(cons ,token ,node)) (defmacro sgml-move-token (x) `(car ,x)) (defmacro sgml-move-dest (x) `(cdr ,x)) ;; set of moves: list of moves (defmacro sgml-add-move-to-set (token node set) `(cons (cons ,token ,node) ,set)) (defmacro sgml-moves-lookup (token set) `(assq ,token ,set)) ;; normal-state: ('normal-state opts . reqs) (defsubst sgml-make-state () (cons 'normal-state (cons nil nil))) (defmacro sgml-normal-state-p (s) `(eq (car ,s) 'normal-state)) (defmacro sgml-state-opts (s) `(cadr ,s)) (defmacro sgml-state-reqs (s) `(cddr ,s)) (defmacro sgml-state-final-p (s) `(null (sgml-state-reqs ,s))) ;; adding moves ;; *** Should these functions check for ambiguity? ;; What if adding a optional move for a token that has a ;; required move? ;; What about the other way? (defsubst sgml-add-opt-move (s token dest) (or (sgml-moves-lookup token (sgml-state-opts s)) (setf (sgml-state-opts s) (sgml-add-move-to-set token dest (sgml-state-opts s))))) (defsubst sgml-add-req-move (s token dest) (or (sgml-moves-lookup token (sgml-state-reqs s)) (setf (sgml-state-reqs s) (sgml-add-move-to-set token dest (sgml-state-reqs s))))) (defsubst sgml-make-primitive-content-token (token) (let ((s1 (sgml-make-state)) (s2 (sgml-make-state))) (sgml-add-req-move s1 token s2) s1)) ;;and-state: (state next . dfas) (defsubst sgml-make-and-state (state dfas next) (cons state (cons next dfas))) (defsubst sgml-step-and-state (state and-state) (cons state (cdr and-state))) (defsubst sgml-and-state-substate (s) (car s)) (defsubst sgml-and-state-dfas (s) (cddr s)) (defsubst sgml-and-state-next (s) (cadr s)) ;;and-node: (next . dfas) (defsubst sgml-make-and-node (dfas next) (cons next dfas)) (defmacro sgml-and-node-next (n) `(car ,n)) (defmacro sgml-and-node-dfas (n) `(cdr ,n)) ;;; Using states (defsubst sgml-final (state) (if (sgml-normal-state-p state) (sgml-state-final-p state) (sgml-final-and state))) (defun sgml-final-and (state) (and (sgml-final (sgml-and-state-substate state)) (cl-loop for s in (sgml-and-state-dfas state) always (sgml-state-final-p s)) (sgml-state-final-p (sgml-and-state-next state)))) ;; get-move: State x Token --> State|nil (defsubst sgml-get-move (state token) "Return a new state or nil, after traversing TOKEN from STATE." (cond ((symbolp state) nil) ;if EMPTY slips thru... ((sgml-normal-state-p state) (let ((c (or (sgml-moves-lookup token (sgml-state-opts state)) (sgml-moves-lookup token (sgml-state-reqs state))))) (if c (let ((dest (sgml-move-dest c))) (if (sgml-normal-state-p dest) dest ;; dest is a and-node (sgml-next-sub-and (sgml-and-node-dfas dest) token (sgml-and-node-next dest))))))) (t ;state is a and-state (sgml-get-and-move state token)))) (defun sgml-get-and-move (state token) ;; state is a and-state (let ((m (sgml-get-move (sgml-and-state-substate state) token))) (cond (m (cons m (cdr state))) ((sgml-final (sgml-and-state-substate state)) (sgml-next-sub-and (sgml-and-state-dfas state) token (sgml-and-state-next state)))))) (defun sgml-next-sub-and (dfas token next) "Compute the next state, choosing from DFAS and moving by TOKEN. If this is not possible, but all DFAS are final, move by TOKEN in NEXT." (let ((allfinal t) (l dfas) (res nil) s1 s2) (while (and l (not res)) (setq s1 (car l) allfinal (and allfinal (sgml-state-final-p s1)) s2 (sgml-get-move s1 token) res (and s2 (sgml-make-and-state s2 (remq s1 dfas) next)) l (cdr l))) (cond (res) (allfinal (sgml-get-move next token))))) (defsubst sgml-tokens-of-moves (moves) (mapcar (function (lambda (m) (sgml-move-token m))) moves)) (defun sgml-required-tokens (state) (if (sgml-normal-state-p state) (sgml-tokens-of-moves (sgml-state-reqs state)) (or (sgml-required-tokens (sgml-and-state-substate state)) (cl-loop for s in (sgml-and-state-dfas state) nconc (sgml-tokens-of-moves (sgml-state-reqs s))) (sgml-tokens-of-moves (sgml-state-reqs (sgml-and-state-next state)))))) (defun sgml-optional-tokens (state) (if (sgml-normal-state-p state) (sgml-tokens-of-moves (sgml-state-opts state)) (nconc (sgml-optional-tokens (sgml-and-state-substate state)) (if (sgml-final (sgml-and-state-substate state)) (cl-loop for s in (sgml-and-state-dfas state) nconc (sgml-tokens-of-moves (sgml-state-opts s)))) (if (cl-loop for s in (sgml-and-state-dfas state) always (sgml-state-final-p s)) (sgml-tokens-of-moves (sgml-state-opts (sgml-and-state-next state))))))) ;;;; Attribute Types ;;; Basic Types ;; name = string attribute names are lisp strings ;; attval = string attribute values are lisp strings ;;; Attribute Declaration Type ;; attdecl = ;; This is the result of the ATTLIST declarations in the DTD. ;; All attribute declarations for an element is the elements ;; attlist. ;;; Attribute Declaration Operations ;; sgml-make-attdecl: name declared-value default-value -> attdecl ;; sgml-attdecl-name: attdecl -> name ;; sgml-attdecl-declared-value: attdecl -> declared-value ;; sgml-attdecl-default-value: attdecl -> default-value ;;; Attribute Declaration List Type ;; attlist = attdecl* ;;; Attribute Declaration List Operations ;; sgml-lookup-attdecl: name x attlist -> attdecl ;;; Declared Value Type ;; declared-value = (token-group | notation | simpel) ;; token-group = nametoken+ ;; notation = nametoken+ ;; simple = symbol lisp symbol corresponding to SGML type ;;; Declared Value Operations ;; sgml-declared-value-token-group: declared-value -> list of symbols ;; sgml-declared-value-notation: declared-value -> list of symbols ;; (empty list if not token-group/notation) ;;; Default Value Type ;; default-value = (required | implied | conref | specified ) ;; implied, conref = constant symbol ;; specified = (fixed | normal) ;; fixed, normal = attval ;;; Default Value Operations ;; sgml-default-value-attval: default-value -> (attval | nil) ;; sgml-default-value-type-p: type x default-value -> cond ;;; Attribute Specification Type ;; attspec = ;; This is the result of parsing an attribute specification. ;; sgml-make-attspec: name x attval -> attspec ;; sgml-attspec-name: attspec -> name ;; sgml-attspec-attval: attspec -> attval ;;; Attribute Specification List Type ;; asl = attspec* ;; aka. attribute value list ;;; Code ;;; attdecl representation = (name declared-value default-value) (defun sgml-make-attdecl (name dcl-value default-value) (list name dcl-value default-value)) (defun sgml-attdecl-name (attdecl) (car attdecl)) (defun sgml-attdecl-declared-value (attdecl) "The declared value of ATTDECL. It may be a symbol or (name-token-group (NAME1 ... NAMEn)) or (notation (NOT1 ... NOTn))" (cadr attdecl)) (defun sgml-attdecl-default-value (attdecl) "The default value of ATTDECL. The default value is either a symbol (REQUIRED | IMPLIED | CURRENT | CONREF) or a list with first element nil or symbol `FIXED' and second element the value." (car (cddr attdecl))) ;;; attlist representation = (attspec*) (defun sgml-lookup-attdecl (name attlist) "Return the attribute declaration for NAME in ATTLIST." (assoc name attlist)) (defun sgml-attribute-with-declared-value (attlist declared-value) "Find the first attribute in ATTLIST that has DECLARED-VALUE." (let ((found nil)) (while (and attlist (not found)) (when (equal declared-value (sgml-attdecl-declared-value (car attlist))) (setq found (car attlist))) (setq attlist (cdr attlist))) found)) ;;; declared-value representation ;; token-group = (name-token (symbol+)) ;; notation = (notation (symbol+)) ;; simple = symbol lisp symbol correspoinding to SGML type (defun sgml-make-declared-value (type &optional names) "Make a declared-value of TYPE. TYPE should be a symbol. If TYPE is name-token-group or notation NAMES should be a list of symbols." (if (consp names) (list type names) type)) (defun sgml-declared-value-token-group (declared-value) "Return the name token group for the DECLARED-VALUE. This applies to name token groups. For other declared values nil is returned." (and (consp declared-value) (eq 'name-token-group (car declared-value)) (cadr declared-value))) (defun sgml-declared-value-notation (declared-value) "Return the list of notation names for the DECLARED-VALUE. This applies to notation declared value. For other declared values nil is returned." (and (consp declared-value) (eq 'NOTATION (car declared-value)) (cadr declared-value))) ;;; default-value representation = symbol | ((nil | 'fixed) attval) (defun sgml-make-default-value (type &optional attval) (if attval (list type attval) type)) (defun sgml-default-value-attval (default-value) "Return the actual default value of the declared DEFAULT-VALUE. The actual value is a string. Return nil if no actual value." (and (consp default-value) (cadr default-value))) (defun sgml-default-value-type-p (type default-value) "Return true if DEFAULT-VALUE is of TYPE. Where TYPE is a symbol, one of REQUIRED, IMPLIED, CONREF, or FIXED." (or (eq type default-value) (and (consp default-value) (eq type (car default-value))))) ;;; attspec representation = (symbol . string) (defun sgml-make-attspec (name attval) "Create an attspec from NAME and ATTVAL. Special case, if ATTVAL is nil this is an implied attribute." (cons name attval)) ;; sgml-attspec-name: attspec -> name (defun sgml-attspec-name (attspec) (car attspec)) ;; sgml-attspec-attval: attspec -> attval (defun sgml-attspec-attval (attspec) "Return the value of attribute specification ATTSPEC. If ATTSPEC is nil, nil is returned." (cdr attspec)) ;;; asl representaion = (attspec*) (defun sgml-lookup-attspec (name asl) (assoc name asl)) ;;;; Element content types ;; The content of an element is defined as ;; (125 declared content | 126 content model), ;; 125 declared content = "CDATA" | "RCDATA" | "EMPTY" ;; 126 content model = (127 model group | "ANY"), ;; (65 ps+, 138 exceptions)? ;; I represent a model group with the first state of a corresponding finite ;; automaton (this is a cons). Exceptions are handled separately. ;; The other content types are represented by symbols. (defsubst sgml-model-group-p (model) (consp model)) (defconst sgml-cdata 'CDATA) (defconst sgml-rcdata 'RCDATA) (defconst sgml-empty 'EMPTY) (defconst sgml-any 'ANY) ;;;; External identifier ;; extid = (pubid? sysid? dir) ;; Representation as (pubid sysid . dir) ;; where pubid = nil | string ;; sysid = nil | string ;; dir = string (defun sgml-make-extid (pubid sysid &optional pubid-ok) (and sgml-xml-p (not pubid-ok) pubid (not sysid) (sgml-error "XML requires a system ID after a public ID")) (cons pubid (cons sysid default-directory))) (defun sgml-extid-pubid (extid) (car extid)) (defun sgml-extid-sysid (extid) (if (consp (cdr extid)) (cadr extid) (cdr extid))) (defun sgml-extid-dir (extid) "Directory where EXTID was declared." (if (consp (cdr extid)) (cddr extid) nil)) (defun sgml-extid-expand (file extid) "Expand file name FILE in the context of EXTID." (let ((sgml-system-path (cons (sgml-extid-dir extid) sgml-system-path))) (or (sgml-extid-expand-2 file sgml-system-path) (expand-file-name file (sgml-extid-dir extid))))) (defun sgml-extid-expand-2 (file directories) (cond ((null directories) nil) (t (let ((f (expand-file-name file (car directories)))) (if (file-exists-p f) f (sgml-extid-expand-2 file (cdr directories))))))) ;;;; DTD ;; DTD = (doctype, eltypes, parameters, entities, shortmaps, ;; notations, dependencies, merged) ;; DTDsubset ~=~ DTD, but doctype is unused ;; ;; doctype = name ;; eltypes = oblist ;; parameters = entity* ;; entities = entity* ;; shortmaps = (name, shortmap)* ;; dependencies = file* ;; merged = Compiled-DTD? where Compiled-DTD = (file, DTD) (cl-defstruct (sgml-dtd (:type vector) (:constructor sgml-make-dtd (doctype))) doctype ; STRING, name of doctype (eltypes ; OBLIST, element types defined (sgml-make-eltype-table)) (parameters ; ALIST (sgml-make-entity-table)) (entities ; ALIST (sgml-make-entity-table)) (shortmaps ; ALIST (sgml-make-shortref-table)) (notations ; ?? nil) (dependencies ; LIST nil) (merged ; (file . DTD) nil) (undef-entities ; LIST of entity names nil)) ;;;; Element type objects ;; An element type object contains the information about an element type ;; obtained from parsing the DTD. ;; An element type object is represented by a symbol in a special oblist. ;; A table of element type objects is represented by a oblist. ;;; Element type objects (defsubst sgml-eltype-name (et) (symbol-name et)) (defsubst sgml-eltype-defined (et) (fboundp et)) (defsubst sgml-eltype-token (et) "Return a token for the element type." et) (defsubst sgml-token-eltype (token) "Return the element type corresponding to TOKEN." token) (defmacro sgml-prop-fields (&rest names) (cons 'progn (cl-loop for n in names collect `(defmacro ,(intern (format "sgml-eltype-%s" n)) (et) (list 'get et '',n))))) (sgml-prop-fields ;;flags ; optional tags and mixed ; (perhaps in value field) ;;model ; Content type ; (perhaps in function field) attlist ; List of defined attributes includes ; List of included elements excludes ; List of excluded elements shortmap ; Associated shortref map ; nil if none and 'empty if #empty ) (defmacro sgml-eltype-flags (et) `(symbol-value ,et)) (defun sgml-eltype-model (et) (declare (gv-setter fset)) (if (fboundp et) (symbol-function et) sgml-any)) (defun sgml-eltype-stag-optional (et) (declare (gv-setter (lambda (f) (list 'sgml-set-eltype-flag et 1 f)))) (= 1 (logand (sgml-eltype-flags et) 1))) (defun sgml-eltype-etag-optional (et) (declare (gv-setter (lambda (f) (list 'sgml-set-eltype-flag et 2 f)))) (/= 0 (logand 2 (sgml-eltype-flags et)))) (defsubst sgml-eltype-mixed (et) (declare (gv-setter (lambda (f) (list 'sgml-set-eltype-flag et 4 f)))) (< 3 (sgml-eltype-flags et))) (defun sgml-set-eltype-flag (et mask f) (setf (sgml-eltype-flags et) (logior (logand (if (boundp et) (sgml-eltype-flags et) 0) (lognot mask)) (if f mask 0)))) (defun sgml-maybe-put (sym prop val) (when val (put sym prop val))) ;; FIXME: These are somewhat redundant, since setf will automatically ;; use `put' for those by default anyway. (gv-define-setter sgml-eltype-includes (l et) (list 'sgml-maybe-put et ''includes l)) (gv-define-setter sgml-eltype-excludes (l et) (list 'sgml-maybe-put et ''excludes l)) (defmacro sgml-eltype-appdata (et prop) "Get application data from element type ET with name PROP. PROP should be a symbol, reserved names are: flags, model, attlist, includes, excludes, conref-regexp, mixed, stag-optional, etag-optional." `(get ,et ,prop)) (defun sgml-eltype-all-miscdata (et) (cl-loop for p on (symbol-plist et) by (function cddr) unless (memq (car p) '(model flags includes excludes)) nconc (list (car p) (cadr p)))) (defun sgml-eltype-set-all-miscdata (et miscdata) (setf (symbol-plist et) (nconc (symbol-plist et) miscdata))) (defun sgml-make-eltype (name) (let ((et (make-symbol name))) (setf (sgml-eltype-flags et) 0) et)) ;;; Element type tables (defun sgml-make-eltype-table () "Make an empty table of element types." (make-vector 73 0)) (defun sgml-eltype-table-empty (eltype-table) (cl-loop for x across eltype-table always (eq x 0))) (defun sgml-merge-eltypes (eltypes1 eltypes2) "Return the merge of two element type tables ELTYPES1 and ELTYPES2. This may change ELTYPES1, ELTYPES2 is unchanged. Returns the new table." (if (sgml-eltype-table-empty eltypes1) eltypes2 (progn (mapatoms (function (lambda (sym) (let ((et (intern (symbol-name sym) eltypes1))) (unless (fboundp et) ; not yet defined by (sgml-read-peek) sgml-max-single-octet-number) (+ (* (- (sgml-read-octet) (eval-when-compile (1+ sgml-max-single-octet-number))) 256) (sgml-read-octet) sgml-max-single-octet-number) (sgml-read-octet))) (defun sgml-read-sexp () (prog1 (let ((standard-input (current-buffer))) (read)) (skip-chars-forward " \t") (forward-char 1))) (defsubst sgml-read-token () (aref sgml-read-token-vector (sgml-read-number))) (defsubst sgml-read-node-ref () (aref sgml-read-nodes (sgml-read-octet))) (defun sgml-read-model-seq () (cl-loop repeat (sgml-read-number) collect (sgml-read-model))) (defun sgml-read-token-seq () (cl-loop repeat (sgml-read-number) collect (sgml-read-token))) (defun sgml-read-moves () (cl-loop repeat (sgml-read-number) collect (sgml-make-move (sgml-read-token) (sgml-read-node-ref)))) (defun sgml-read-model () (let* ((n (sgml-read-number)) (sgml-read-nodes (make-vector n nil))) (cl-loop for i below n do (aset sgml-read-nodes i (sgml-make-state))) (cl-loop for e across sgml-read-nodes do (cond ((eq 255 (sgml-read-peek)) ; a and-node (sgml-read-octet) ; skip (setf (sgml-and-node-next e) (sgml-read-node-ref)) (setf (sgml-and-node-dfas e) (sgml-read-model-seq))) (t ; a normal-state (setf (sgml-state-opts e) (sgml-read-moves)) (setf (sgml-state-reqs e) (sgml-read-moves))))) (aref sgml-read-nodes 0))) (defun sgml-read-content () (let ((c (sgml-read-octet))) (cond ((eq c 0) sgml-cdata) ((eq c 1) sgml-rcdata) ((eq c 2) sgml-empty) ((eq c 3) sgml-any) ((eq c 4) nil) ((eq c 128) (sgml-read-model))))) (defun sgml-read-decode-flag (flag mask) (not (zerop (logand flag mask)))) (defun sgml-read-element (et) (sgml-eltype-set-all-miscdata et (sgml-read-sexp)) (let ((flags (sgml-read-octet))) (unless (= flags 128) (setf (sgml-eltype-flags et) flags (sgml-eltype-model et) (sgml-read-content) (sgml-eltype-includes et) (sgml-read-token-seq) (sgml-eltype-excludes et) (sgml-read-token-seq))))) (defun sgml-read-dtd () "Decode the saved DTD in current buffer, return the DTD." (let ((gc-cons-threshold (max gc-cons-threshold 500000)) (file-version (sgml-read-sexp)) dtd) (cond ((equal file-version '(sgml-saved-dtd-version 7)) (setq dtd (sgml-bdtd-read-dtd))) ;; Something else (t (error "Unknown file format for saved DTD: %s" file-version))) dtd)) (defun sgml-load-dtd (file) "Load a saved DTD from FILE." (interactive (let ((tem (expand-file-name (or sgml-default-dtd-file (sgml-default-dtd-file))))) (list (read-file-name "Load DTD from: " (file-name-directory tem) tem t (file-name-nondirectory tem))))) (setq sgml-loaded-dtd nil) ; Allow reloading of DTD ;; Search for 'file' on the sgml-system-path [ndw] (let ((real-file (car (apply #'nconc (mapcar (lambda (dir) (let ((f (expand-file-name file dir))) (if (file-exists-p f) (list f)))) (cons "." sgml-system-path)))))) (or real-file (error "Saved DTD file %s not found" file)) (let ((cb (current-buffer)) (tem nil) (dtd nil) (l (buffer-list))) ;; Search loaded buffer for a already loaded DTD (while (and l (null tem)) (set-buffer (car l)) (if (equal sgml-loaded-dtd real-file) (setq tem (current-buffer))) (setq l (cdr l))) (cond (tem ; loaded DTD found (setq dtd (sgml-pstate-dtd sgml-buffer-parse-state))) (t ; load DTD from file (set-buffer cb) (sgml-push-to-entity real-file) (message "Loading DTD from %s..." real-file) (setq dtd (sgml-read-dtd)) (message "Loading DTD from %s...done" real-file) (sgml-pop-entity))) (set-buffer cb) (sgml-set-initial-state dtd) (setq sgml-default-dtd-file file) (setq sgml-loaded-dtd real-file)))) ;;;; Binary coded DTD module ;; Works on the binary coded compiled DTD (bdtd) ;; bdtd-load: cfile dtdfile ents -> bdtd ;; bdtd-merge: bdtd dtd -> dtd? ;; bdtd-read-dtd: bdtd -> dtd ;; Implement by letting bdtd be implicitly the current buffer and ;; dtd implicit in sgml-dtd-info. (defun sgml-bdtd-load (cfile dtdfile ents) "Load the compiled dtd from CFILE into the current buffer. If this file does not exist, is of an old version or out of date, a new compiled dtd will be created from file DTDFILE and parameter entity settings in ENTS." ;;(Assume the current buffer is a scratch buffer and is empty) (sgml-debug "Trying to load compiled DTD from %s..." cfile) (sgml-set-buffer-multibyte nil) (or (and (file-readable-p cfile) (let ((coding-system-for-read 'binary)) ;; fifth arg to insert-file-contents is not available in early ;; v19. (insert-file-contents cfile nil nil nil)) (equal '(sgml-saved-dtd-version 7) (sgml-read-sexp)) (or (sgml-up-to-date-p cfile (sgml-read-sexp)) (if (eq 'ask sgml-recompile-out-of-date-cdtd) (not (y-or-n-p "Compiled DTD is out of date, recompile? ")) (not sgml-recompile-out-of-date-cdtd)))) (sgml-compile-dtd dtdfile cfile ents))) (defun sgml-up-to-date-p (file dependencies) "Check if FILE is newer than all files in the list DEPENDENCIES. If DEPENDENCIES contains the symbol t, FILE is not considered newer." (if (memq t dependencies) nil (cl-loop for f in dependencies always (file-newer-than-file-p file f)))) (defun sgml-compile-dtd (dtd-file to-file ents) "Construct a binary code compiled dtd from DTD-FILE and write it to TO-FILE. The dtd will be constructed with the parameter entities set according to ENTS. The bdtd will be left in the current buffer. The current buffer is assumed to be empty to start with." (message "Recompiling DTD file %s..." dtd-file) (let* ((sgml-dtd-info (sgml-make-dtd nil)) (parameters (sgml-dtd-parameters sgml-dtd-info)) (sgml-parsing-dtd t)) (push dtd-file (sgml-dtd-dependencies sgml-dtd-info)) (cl-loop for (name . val) in ents do (sgml-entity-declare name parameters 'text val)) (sgml-push-to-entity dtd-file) (sgml-check-dtd-subset) (sgml-debug "sgml-compile-dtd: poping entity") (sgml-pop-entity) (erase-buffer) (sgml-write-dtd sgml-dtd-info to-file) t)) (defun sgml-check-entities (params1 params2) "Check that PARAMS1 is compatible with PARAMS2." (cl-block check-entities (sgml-map-entities (function (lambda (entity) (let ((other (sgml-lookup-entity (sgml-entity-name entity) params2))) (unless (or (null other) (equal entity other)) (message "Parameter %s in compiled DTD has wrong value;\ is '%s' should be '%s'" (sgml-entity-name entity) (sgml-entity-text other) (sgml-entity-text entity)) (cl-return-from check-entities nil))))) params1) t)) (defun sgml-bdtd-merge () "Merge the binary coded dtd in the current buffer with the current dtd. The current dtd is the variable `sgml-dtd-info'. Return t if the merge was successful or nil if failed." (goto-char (point-min)) (sgml-read-sexp) ; skip filev (let ((dependencies (sgml-read-sexp)) (parameters (sgml-read-sexp)) (gc-cons-threshold (max gc-cons-threshold 500000)) temp) ;; Check compatibility of parameters (and (sgml-check-entities (sgml-dtd-parameters sgml-dtd-info) parameters) (progn ;; Do the merger (sgml-message "Reading compiled DTD...") (sgml-merge-entity-tables (sgml-dtd-parameters sgml-dtd-info) parameters) (setf (sgml-dtd-dependencies sgml-dtd-info) (nconc (sgml-dtd-dependencies sgml-dtd-info) dependencies)) ;; Doctype (setq temp (sgml-read-sexp)) (when (and temp (null (sgml-dtd-doctype sgml-dtd-info))) (setf (sgml-dtd-doctype sgml-dtd-info) temp)) ;; Element type names -- read and create token vector (setq temp (sgml-read-number)) ; # eltypes (setq sgml-read-token-vector (make-vector (1+ temp) nil)) (aset sgml-read-token-vector 0 sgml-pcdata-token) (cl-loop for i from 1 to temp do (aset sgml-read-token-vector i (sgml-lookup-eltype (sgml-read-sexp)))) ;; Element type descriptions (cl-loop for i from 1 to (sgml-read-number) do (sgml-read-element (aref sgml-read-token-vector i))) (sgml-merge-entity-tables (sgml-dtd-entities sgml-dtd-info) (sgml-read-sexp)) (sgml-merge-shortmaps (sgml-dtd-shortmaps sgml-dtd-info) (sgml-read-sexp)) (setf (sgml-dtd-notations sgml-dtd-info) (sgml-read-sexp)) t)))) (defun sgml-bdtd-read-dtd () "Create and return a dtd from the binary coded dtd in the current buffer." (let ((sgml-dtd-info (sgml-make-dtd nil))) (sgml-bdtd-merge) sgml-dtd-info)) ;;;; Set markup type (defsubst sgml-set-markup-type (type) "Set the type of the markup parsed to TYPE. The markup starts at position given by variable `sgml-markup-start' and ends at point." (when (and sgml-set-face (null sgml-current-eref)) (sgml-set-face-for sgml-markup-start (point) type)) (setq sgml-markup-type type)) ;;;; Parsing delimiters (eval-and-compile (defvar sgml-delimiters '("AND" "&" "COM" "--" "CRO" "&#" "DSC" "]" "DSO" "[" "DTGC" "]" "DTGO" "[" "ERO" "&" "ETAGO" "" "MDO" "" "PIO" "" "VI" "=" ;; Some combinations "MS-START" "" ; MSC MDC ;; XML stuff "XML-ECOM" "-->" ; end an XML comment "XML-PIC" "?>" ; end an XML processing instruction "XML-SCOM" "