;;; parseclj-parser.el --- Clojure/EDN parser -*- lexical-binding: t; -*- ;; Copyright (C) 2017-2018 Arne Brasseur ;; Author: Arne Brasseur ;; Keywords: lisp ;; Package-Requires: ((emacs "25") (a "0.1.0alpha4")) ;; Version: 0.1.0 ;; This file is not part of GNU Emacs. ;; This file is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; This file is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ;; Boston, MA 02110-1301, USA. ;;; Commentary: ;; A shift/reduce parser for Clojure source. ;;; Code: (require 'cl-lib) (require 'subr-x) (require 'a) (require 'parseclj-lex) (define-error 'parseclj-parser-error "parseclj: Syntax error") (defun parseclj--error (format &rest args) "Signal a parse error. Takes a FORMAT string and optional ARGS to be passed to `format-message'. Signals a 'parseclj-parser-error signal, which can be handled with `condition-case'." (signal 'parseclj-parser-error (list (apply #'format-message format args)))) (defun parseclj--find-opening-token (stack closing-token) "Scan STACK for an opening-token matching CLOSING-TOKEN." (cl-case (parseclj-lex-token-type closing-token) (:rparen (parseclj-lex-token-type (seq-find (lambda (token) (member (parseclj-lex-token-type token) '(:lparen :lambda))) stack))) (:rbracket :lbracket) (:rbrace (parseclj-lex-token-type (seq-find (lambda (token) (member (parseclj-lex-token-type token) '(:lbrace :set))) stack))))) (defun parseclj--reduce-coll (stack closing-token reduce-branch options) "Reduce collection based on the top of the STACK and a CLOSING-TOKEN. REDUCE-BRANCH is a function to be applied to the collection of tokens found from the top of the stack until an opening token that matches CLOSING-TOKEN. This function should return an AST token representing such collection. OPTIONS is an association list. This list is also passed down to the REDUCE-BRANCH function. See `parseclj-parser' for more information on available options." (let ((opening-token-type (parseclj--find-opening-token stack closing-token)) (fail-fast (a-get options :fail-fast t)) (collection nil)) (if (not opening-token-type) (if fail-fast (parseclj--error "At position %s, unmatched %S" (a-get closing-token :pos) (parseclj-lex-token-type closing-token)) stack) (progn ;; unwind the stack until opening-token-type is found, adding to collection (while (and stack (not (eq (parseclj-lex-token-type (car stack)) opening-token-type))) (push (pop stack) collection)) ;; did we find the right token? (if (eq (parseclj-lex-token-type (car stack)) opening-token-type) (progn (when fail-fast ;; any unreduced tokens left: bail early (when-let ((token (seq-find #'parseclj-lex-token-p collection))) (parseclj--error "At position %s, unmatched %S" (a-get token :pos) (parseclj-lex-token-type token)))) ;; all good, call the reducer so it can return an updated stack with a ;; new node at the top. (let ((opening-token (pop stack))) (funcall reduce-branch stack opening-token collection options))) ;; Unwound the stack without finding a matching paren: either bail early ;; or return the original stack and continue parsing (if fail-fast (parseclj--error "At position %s, unmatched %S" (a-get closing-token :pos) (parseclj-lex-token-type closing-token)) (reverse collection))))))) (defun parseclj--take-value (stack value-p) "Scan STACK until a value is found. Return everything up to the value in reversed order (meaning the value comes first in the result). STACK is the current parse stack to scan. VALUE-P a predicate to distinguish reduced values from non-values (tokens and whitespace)." (let ((result nil)) (cl-block nil (while stack (cond ((parseclj-lex-token-p (car stack)) (cl-return nil)) ((funcall value-p (car stack)) (cl-return (cons (car stack) result))) (t (push (pop stack) result))))))) (defun parseclj--take-token (stack value-p token-types) "Scan STACK until a token of a certain type is found. Returns nil if a value is encountered before a matching token is found. Return everything up to the token in reversed order (meaning the token comes first in the result). STACK is the current parse stack to scan. VALUE-P a predicate to distinguish reduced values from non-values (tokens and whitespace). TOKEN-TYPES are the token types to look for." (let ((result nil)) (cl-block nil (while stack (cond ((member (parseclj-lex-token-type (car stack)) token-types) (cl-return (cons (car stack) result))) ((funcall value-p (car stack)) (cl-return nil)) ((parseclj-lex-token-p (car stack)) (cl-return nil)) (t (push (pop stack) result))))))) (defun parseclj-single-value-p (stack value-p) "Return t if STACK only has a single node for which VALUE-P is true. This checks if the stack contains a single, fully reduced value, and no dangling unmatched tokens. When parsing with `:read-one' this indicates a form can be returned." (and (not (cl-reduce (lambda (bool node) (or bool (parseclj-lex-token-p node))) stack :initial-value nil)) (parseclj--take-value stack value-p))) (defun parseclj-parser (reduce-leaf reduce-branch &optional options) "Clojure/EDN stack-based shift-reduce parser. REDUCE-LEAF does reductions for leaf nodes. It is a function that takes the current value of the stack and a token, and either returns an updated stack, with a new leaf node at the top (front), or returns the stack unmodified. REDUCE-BRANCH does reductions for branch nodes. It is a function that takes the current value of the stack, the type of branch node to create, and a list of child nodes, and returns an updated stack, with the new node at the top (front). What \"node\" means in this case is up to the reducing functions, it could be AST nodes (as in the case of `parseclj-parser-clojure'), or plain values/sexps (as in the case of `parseedn-read'), or something else. The only requirement is that they should not put raw tokens back on the stack, as the parser relies on the presence or absence of these to detect parse errors. OPTIONS is an association list which is passed on to the reducing functions. Additionally the following options are recognized - `:fail-fast' Raise an error when a parse error is encountered, rather than continuing with a partial result. - `:value-p' A predicate function to differentiate values from tokens and whitespace. This is needed when scanning the stack to see if any reductions can be performed. By default anything that isn't a token is considered a value. This can be problematic when parsing with `:lexical-preservation', and which case you should provide an implementation that also returns falsy for :whitespace, :comment, and :discard AST nodes. - `:tag-readers' An association list that describes tag handler functions for any possible tag. This options in only available in `parseedn-read', for more information, please refer to its documentation. - `:read-one' Return as soon as a single complete value has been read." (let ((fail-fast (a-get options :fail-fast t)) (read-one (a-get options :read-one)) (value-p (a-get options :value-p (lambda (e) (not (parseclj-lex-token-p e))))) (stack nil) (token (parseclj-lex-next))) (while (not (or (and read-one (parseclj-single-value-p stack value-p)) (eq (parseclj-lex-token-type token) :eof))) ;; (message "STACK: %S" stack) ;; (message "TOKEN: %S\n" token) (when (and fail-fast (parseclj-lex-error-p token)) (parseclj--error "Invalid token at %s: %S" (a-get token :pos) (parseclj-lex-token-form token))) ;; Reduce based on the top item on the stack (collections) (cond ((parseclj-lex-leaf-token-p token) (setf stack (funcall reduce-leaf stack token options))) ((parseclj-lex-closing-token-p token) (setf stack (parseclj--reduce-coll stack token reduce-branch options))) (t (push token stack))) ;; Reduce based on top two items on the stack (special prefixed elements) (let* ((top-value (parseclj--take-value stack value-p)) (opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens)) new-stack) (while (and top-value opening-token) ;; (message "Reducing...") ;; (message " - STACK %S" stack) ;; (message " - OPENING-TOKEN %S" opening-token) ;; (message " - TOP-VALUE %S" top-value) (setq new-stack (nthcdr (+ (length top-value) (length opening-token)) stack)) (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value) options)) ;; recur (setq top-value (parseclj--take-value stack value-p)) (setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens)))) ;; Reduce based on top three items on the stack (metadata, namespaced maps) (let* ((top-value-1 (parseclj--take-value stack value-p)) (top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p)) (opening-token (parseclj--take-token (nthcdr (+ (length top-value-1) (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens)) new-stack) (while (and top-value-1 top-value-2 opening-token) (setq new-stack (nthcdr (apply #'+ (mapcar #'length (list top-value-1 top-value-2 opening-token))) stack)) (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value-2 top-value-1) options)) ;; recur (setq top-value-1 (parseclj--take-value stack value-p)) (setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p)) (setq opening-token (parseclj--take-token (nthcdr (+ (length top-value-1) (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens)))) (setq token (parseclj-lex-next))) ;; reduce root (when fail-fast (when-let ((token (seq-find #'parseclj-lex-token-p stack))) (parseclj--error "At position %s, unmatched %S" (a-get token :pos) (parseclj-lex-token-type token)))) (if read-one (car (parseclj--take-value stack value-p)) (car (funcall reduce-branch nil (parseclj-lex-token :root "" 1) (reverse stack) options))))) (provide 'parseclj-parser) ;;; parseclj-parser.el ends here