/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker E-mail: jan@swi.psy.uva.nl WWW: http://www.swi-prolog.org Copyright (C): 1985-2004, University of Amsterdam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA As a special exception, if you link this library with other files, compiled with a Free Software compiler, to produce an executable, this library does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. */ :- module(serql, [ serql_query/2, % +Query, -Result serql_query/3, % +Query, -Result, +Options serql_compile/3, % +Query, -Compiled, +Options serql_run/2 % +Compiled, -Reply ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdf_optimise)). :- use_module(library(lists)). :- use_module(library(option)). :- use_module(library(debug)). :- use_module(library(settings)). :- use_module(rdfql_util). :- include(entailment(load)). :- meta_predicate select_results(+,-,0). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - A Prolog path expression is a conjunction of rdf/3 statements. Parts may be wrapped in opt/1 to indicate they are optional and nodes may be of the form set(List) to indicate a conjunction of distinct values. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ %% serql_query(+Query, -Reply, +Module) % % Where Query is either a SeRQL query text or a parsed query. % Reply is, similar to the ODBC interface a term of the form % row(Col1, Col2, ...) for SELECT statements or a term rdf(S,P,O). % The predicate is non-deterministic, returning the rows or RDF % statements one-by-one. serql_query(Query, Result) :- serql_query(Query, Result, [ entailment(rdf) ]). serql_query(Query, Result, Options) :- serql_compile(Query, Compiled, Options), serql_run(Compiled, Result). %% serql_compile(+Query, -Compiled, +Options) % % Compile a SeRQL query, returning the result in Compiled. Options: % % * entailment(Entailment) % Entailment module to use. % % * type(-Type) % Return one of select(VarNames) or construct serql_compile(Text, Compiled, Options) :- atom(Options), Options \== [], !, % compatibility serql_compile(Text, Compiled, [entailment(Options)]). serql_compile(Text, serql_query(Goal, ReplyTempl, Module), Options) :- serql_parse(Text, Query), compile(Query, Goal, ReplyTempl, Module, Options). compile(select(Row0, VarNames, Path, Where, Distinct, Limit, Offset), select(Final, Solutions), Row, Module, Options) :- option(entailment(Entailment), Options, rdfs), entailment_module(Entailment, Module), mk_solutions(Distinct, Limit, Offset, Solutions), set_type(select(VarNames), Options), where_constraints(Where, Annotations), serql_compile_path(Path, select, Goal), remove_annotations(Annotations, where), projection_functions(Row0, Row, Select), ( setting(cliopatria:optimise_query, Def), option(optimise(Opt), Options, Def), Opt == true -> rdf_optimise((Goal,Where,Select), Optimised) ; Optimised = (Goal,Where,Select) ), serql_select_bind_null(Optimised, Final), debug(serql(compiled), '~@', [ portray_clause((q(Row) :- Final)) ]). compile(construct(RPath, Path, Where, Distinct, Limit, Offset), construct(Final, Solutions), RDF, Module, Options) :- option(entailment(Entailment), Options, rdfs), entailment_module(Entailment, Module), mk_solutions(Distinct, Limit, Offset, Solutions), set_type(construct, Options), where_constraints(Where, Annotations), serql_compile_path(Path, construct, Goal), remove_annotations(Annotations, where), statements(RPath, Statements), entailment_module(Entailment, Module), ( setting(cliopatria:optimise_query, Def), option(optimise(Opt), Options, Def), Opt == true -> rdf_optimise((Goal,Where), Optimised) ; Optimised = (Goal,Where) ), Final = (Optimised, serql_member_statement(RDF, Statements)), debug(serql(compiled), '~@', [ portray_clause((q(RDF) :- Final)) ]). %% mk_solutions(+Distinct, +Limit, +Offset, -Term) % % Create a solutions-selecting term compatible to SPARQL. mk_solutions(distinct, Limit, Offset, distinct(solutions(unsorted, Limit, Offset))) :- !. mk_solutions(_, Limit, Offset, solutions(unsorted, Limit, Offset)). %% set_type(+Type, +Options) % % Fill option type(X) set_type(Type, Options) :- memberchk(type(T), Options), !, ( T = Type -> true ; functor(T, Expected, _), functor(Type, Found, _), throw(error(type_error(query_type(Expected), Found), _)) ). set_type(_, _). %% serql_run(+Term, -Result) serql_run(serql_query(Parsed, Reply, Module), Reply) :- serql_run(Parsed, Reply, Module). serql_run(select(Goal, Solutions), Reply, Module) :- select_results(Solutions, Reply, Module:Goal). serql_run(construct(Goal, Solutions), Reply, Module) :- select_results(Solutions, Reply, Module:Goal). %% select_results(+Spec, -Reply, :Goal) % % Apply ordering and limits on result-set. select_results(distinct(solutions(Order, Limit, Offset)), Reply, Goal) :- !, select_results(distinct, Offset, Limit, Order, Reply, Goal). select_results(solutions(Order, Limit, Offset), Reply, Goal) :- select_results(all, Offset, Limit, Order, Reply, Goal). /******************************* * COMPILER * *******************************/ %% serql_compile_path(+PathExpr, +Type, -PrologGoal) % % Compile a Serql path expression into a plain Prolog goal. Type % is one of 'select' or 'construct'. serql_compile_path(rdf(S,P,O), Type, Conj) :- set(S, Set), !, make_set_subj_conj(Set, [], P, O, Type, Conj). serql_compile_path(rdf(S,P,O), Type, Conj) :- set(O, Set), !, make_set_obj_conj(Set, [], S, P, Type, Conj). serql_compile_path(rdf(S0, P, O), Type, Goal) :- reified(S0, S, GS), !, serql_compile_path(rdf(S, P, O), Type, G0), Goal = (G0, GS). serql_compile_path(rdf(S, P, O0), Type, Goal) :- reified(O0, O, GS), !, serql_compile_path(rdf(S, P, O), Type, G0), Goal = (G0, GS). serql_compile_path((A0,B0), Type, (A,B)) :- !, serql_compile_path(A0, Type, A), serql_compile_path(B0, Type, B). serql_compile_path(optional(Id, A0), construct, (A *-> Id=true ; Id=false)) :- !, serql_compile_path(A0, construct, A). serql_compile_path(optional(_, A0), select, (A *-> true ; true)) :- !, serql_compile_path(A0, select, A). serql_compile_path(rdf(S,P,O0), _, Goal) :- !, resource_annotations(S, GS), resource_annotations(P, GP), object_annotations(O0, O, GO), clean_conj((GS, GP, rdf(S,P,O), GO), Goal). serql_compile_path(G, _, G). reified(0, _, _) :- % catch variables !, fail. reified(rdf(S,P,O), StatementId, ( rdf(StatementId, Type, Statement), rdf(StatementId, Subject, S), rdf(StatementId, Predicate, P), rdf(StatementId, Object, O) )) :- rdf_equal(Type, rdf:type), rdf_equal(Subject, rdf:subject), rdf_equal(Predicate, rdf:predicate), rdf_equal(Object, rdf:object), rdf_equal(Statement, rdf:'Statement'). make_set_subj_conj([], _, _, _, _, true). % should not happen make_set_subj_conj([Last], [], P, O, Type, Goal) :- !, serql_compile_path(rdf(Last, P, O), Type, Goal). make_set_subj_conj([Last], Diff, P, O, Type, (Goal, Diffs)) :- !, serql_compile_path(rdf(Last, P, O), Type, Goal), make_diff(Diff, Last, Diffs). make_set_subj_conj([H|T], Diff, P, O, Type, (Goal, Diffs, More)) :- !, serql_compile_path(rdf(H, P, O), Type, Goal), make_diff(Diff, H, Diffs), make_set_subj_conj(T, [H|Diff], P, O, Type, More). make_set_obj_conj([], _, _, _, _, true). % should not happen make_set_obj_conj([Last], [], S, P, Type, Goal) :- !, serql_compile_path(rdf(S, P, Last), Type, Goal). make_set_obj_conj([Last], Diff, S, P, Type, (Goal, Diffs)) :- !, serql_compile_path(rdf(S, P, Last), Type, Goal), make_diff(Diff, Last, Diffs). make_set_obj_conj([H|T], Diff, S, P, Type, (Goal, Diffs, More)) :- !, serql_compile_path(rdf(S, P, H), Type, Goal), make_diff(Diff, H, Diffs), make_set_obj_conj(T, [H|Diff], S, P, Type, More). make_diff([], _, true). make_diff([Last], To, (Last \== To)) :- !. make_diff([H|T], To, (H \== To, More)) :- make_diff(T, To, More). %% statements(+Graph, -ListOfTriples) % % Extract a plain list of triples from an CONSTRUCT % path-expression. Optional parts of the tree are represented as %% optional(Bool, ListOfTriples). Using CONSTRUCT * (i.e. when the % executed path is the result path) the goal generated by the % compiler will unify Bool with true or false. See also % member_statement/2. statements(Graph, Statements) :- phrase(statements(Graph), Statements). statements(rdf(S,P,O)) --> { set(S, Set) }, !, subj_statements(Set, P, O). statements(rdf(S,P0,O)) --> !, { nonvar(P0), map_builtin(P0, P) -> true ; P = P0 }, [ rdf(S,P,O) ]. statements((A,B)) --> !, statements(A), statements(B). statements(optional(Id, A)) --> !, { phrase(statements(A), OptionalStatements) }, [ optional(Id, OptionalStatements) ]. statements(_) --> []. term_expansion(map_builtin(B0, P0), map_builtin(B, P)) :- rdf_global_id(B0, B), rdf_global_id(P0, P). map_builtin(serql:directSubClassOf, rdfs:subClassOf). map_builtin(serql:directSubPropertyOf, rdfs:subPropertyOf). map_builtin(serql:directType, rdf:type). subj_statements([], _, _) --> []. subj_statements([H|T], P, O) --> ( { set(O, Set) } -> obj_statements(Set, H, P) ; [ rdf(H, P, O) ] ), subj_statements(T, P, O). obj_statements([], _, _) --> []. obj_statements([H|T], S, P) --> [ rdf(S, P, H) ], obj_statements(T, S, P). set(Node, Set) :- nonvar(Node), Node = set(Set). /******************************* * SELECT FUNCTIONS * *******************************/ projection_functions(Row0, Row, Map) :- functor(Row0, Functor, Arity), functor(Row, Functor, Arity), projection_functions(0, Arity, Row0, Row, true, Map). projection_functions(Arity, Arity, _, _, Map, Map) :- !. projection_functions(I0, Arity, Row0, Row, Map0, Map) :- I is I0 + 1, arg(I, Row0, A0), ( var(A0) -> arg(I, Row, A0), projection_functions(I, Arity, Row0, Row, Map0, Map) ; arg(I, Row, A), add_conj(Map0, serql_eval(A0, A), Map1), projection_functions(I, Arity, Row0, Row, Map1, Map) ). add_conj(true, X, X) :- !. add_conj(C0, G, (C0,G)). /******************************* * WHERE CONSTRAINTS * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - The idea of this step is to turn where clauses into constraints on variables. Supported annotations (in standard order of terms): any literal resource eq(Value) like(Pattern) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ %% where_constraints(+Goal, -Annotations) % % Each annotation is either a plain annotation or a term % or(ListOfAlternatives). The latter is used if different paths % through the control-structure yields different annotations. where_constraints(Goal, Annotations) :- bagof(Annot, where_constraint_list(Goal, Annot), AltAnnots), sort_lol(AltAnnots, AltAnnots1), join_alt_annots(AltAnnots1, Annotations). %% where_constraint_list(+Goal, -Annotations) % % Interpret Goal, making annotations on the variables. % Backtracking yields alternative annotations due to choicepoints % in Goal. where_constraint_list(Goal, Annotations) :- where_constraints(Goal, AttrVars, []), attrs_to_terms(AttrVars, Annotations). where_constraints((A,B)) --> !, where_constraints(A), where_constraints(B). where_constraints((A;B)) --> !, ( where_constraints(A) ; where_constraints(B) ). where_constraints(serql_compare(like, Var, Pattern)) --> !, constrain(Var, like(Pattern)). where_constraints(serql_compare(=, Var, Value)) --> !, constrain(Var, eq(Value)). where_constraints(rdf_is_literal(V)) --> !, constrain(V, literal). where_constraints(rdf_is_resource(V)) --> !, constrain(V, resource). where_constraints(rdf(S,P,_)) --> !, constrain(S, resource), constrain(P, resource). where_constraints(_) --> []. constrain(Var, Cond) --> { var(Var) }, !, ( { get_attr(Var, where, C0) } -> { put_attr(Var, where, (Cond, C0)) }, [] ; { put_attr(Var, where, Cond) }, [ Var ] ). constrain(label(X), Cond) --> !, constrain(X, (literal, Cond)). constrain(lang(X), Cond) --> !, constrain(X, (literal, Cond)). constrain(datatype(X), Cond) --> !, constrain(X, (literal, Cond)). constrain(_, _) --> []. %% join_alt_annots(+ListOfAnnotLists, -AnnotatedVars) % % ListOfAnnotLists is a list of alternative annotations due to % choicepoints. Each annotation list represents annotations in the % form Var = Annotation. AnnotatedVars is a list of variables with % attributes representing their annotations. join_alt_annots(LoL, Annotated) :- smallest_var(LoL, Var), !, var_annotations(Var, LoL, LoL1, Annotations0), sort(Annotations0, Annotations), % remove duplicates ( empty_annotations(Annotations) -> join_alt_annots(LoL1, Annotated) ; put_annotations(Annotations, Var), Annotated = [Var|T], join_alt_annots(LoL1, T) ). join_alt_annots(LoL, []) :- assertion(maplist(=([]), LoL)). %% normalise_annotation(+A0, -A) % % Create a normalised version of an annotation for easy % processing. Currently only deals with annotations that are a % conjunction. normalise_annotation(A0, A) :- conj_to_list(A0, L0, []), sort(L0, L), list_do_conj(L, A). conj_to_list((A,B)) --> !, conj_to_list(A), conj_to_list(B). conj_to_list(A) --> [A]. list_do_conj([], any). list_do_conj([H], H) :- !. list_do_conj([H|T0], (H,T)) :- list_do_conj(T0, T). %% empty_annotations(+List) % % True if there is no sensible conclusion we can draw using the % annotations found. This is often the case if multiple paths in a % disjunction do not deal with all variables. Note that this is % not necessarily the end of the story. We could rewrite % % A,(C1;C2) into (A,C1);(A,C2) % % And apply optimisation on both branches. empty_annotations([]) :- !. empty_annotations(List) :- memberchk(any, List). put_annotations([], _). put_annotations([One], Var) :- !, put_attr(Var, where, One). put_annotations(More, Var) :- put_attr(Var, where, or(More)). %% smallest_var(+ListOfList, -Smallest) % % Get the smallest (in standard order of terms) annotated % variable. smallest_var([[S0=_|_]|T], Smallest) :- smallest_var(T, S0, Smallest). smallest_var([[]|T], Smallest) :- smallest_var(T, Smallest). smallest_var([], S, S). smallest_var([[S1=_|_]|T], S0, S) :- !, smallest(S1, S0, S2), smallest_var(T, S2, S). smallest_var([[]|T], S0, S) :- smallest_var(T, S0, S). smallest(A, B, S) :- ( A @< B -> S = A ; S = B ). %% var_annotations(+Var, +LoL0, -LoL, -Annotations) % % Get all Annotation for Var. Note that the annotation is either % the head of the list or not in the list. var_annotations(_, [], [], []) :- !. var_annotations(Var, [[Var=A|TA0]|TL0], LoL, [A|TA]) :- !, ( TA0 == [] -> LoL = TL ; LoL = [TA0|TL] ), var_annotations(Var, TL0, TL, TA). var_annotations(Var, [A0|TL0], [A0|TL], [any|A]) :- var_annotations(Var, TL0, TL, A). where:attr_unify_hook(_,_) :- fail. where:attr_portray_hook(Val, _Var) :- print(Val). %% attrs_to_terms(AttrsVars, List) % % Convert X{where=A} into X=A terms. Without this we cannot use % bagof/3 and maintain the variables. Not sure this is a bug in % bagof or not. attrs_to_terms([], []). attrs_to_terms([H|T0], [H=A|T]) :- get_attr(H, where, A0), del_attr(H, where), normalise_annotation(A0, A), attrs_to_terms(T0, T). %% sort_lol(+ListOfList, ListOfSortedLists) sort_lol([], []). sort_lol([H0|T0], [H|T]) :- sort(H0, H), sort_lol(T0, T). %% remove_annotations(+List, +Attr) remove_annotations([], _). remove_annotations([H|T], A) :- del_attr(H, A), remove_annotations(T, A). %% object_annotations(+In, -Out, -Goal) object_annotations(O0, O, G) :- get_attr(O0, where, Annotations), object_annot(Annotations, O0, O, G), !. object_annotations(O, O, true). object_annot((literal, like(Pattern)), O, literal(like(Pattern), L), O = literal(L)). %% resource_annotations(R, G) resource_annotations(R, Goal) :- get_attr(R, where, Annotations), resource_annot(Annotations, R, Goal), !. resource_annotations(_, true). resource_annot(eq(R1), R, true) :- % where A = B var(R1), !, del_attr(R, where), R = R1. resource_annot(eq(query(String)), R, true) :- !, del_attr(R, where), R = String. resource_annot(or(List), R, Goal) :- eq_list(List, Resources), !, Goal = member(R, Resources). eq_list([], []). eq_list([eq(query(R))|T0], [R|T]) :- eq_list(T0, T). %% clean_conj(+Goal0, -Goal) % % Remove redundant true statements from a conjunction clean_conj((true, G0), G) :- !, clean_conj(G0, G). clean_conj((G0, true), G) :- !, clean_conj(G0, G). clean_conj(G, G). /******************************* * PARSER * *******************************/ %% serql_parse(+Input, -ParseTree) % % Parse the SeRQL statement Input into a Prolog representation. serql_parse(Codes, Query) :- is_list(Codes), !, ( phrase(tokens(Tokens), Codes), phrase(query(Query0, NameSpaces), Tokens), expand_vars(Query0, Query1), expand_uris(Query1, NameSpaces, Query) -> true ; syntax_error(unknown) ). serql_parse(Atomic, Query) :- atomic(Atomic), !, atom_codes(Atomic, Codes), serql_parse(Codes, Query). serql_parse(Input, _) :- throw(error(type_error(text, Input), _)). /******************************* * ERRORS * *******************************/ syntax_error(What) :- throw(error(syntax_error(What), context(_, 'in SeRQL query'))). /******************************* * NAMESPACES * *******************************/ expand_uris(Var, _, Var) :- var(Var), !. expand_uris(uri(URI), _, URI) :- !. % <!foo:bar> expand_uris(uri(NS, URI0), Map, URI) :- !, % foo:bar ( memberchk(NS=Prefix, Map) -> true ; ns(NS, Prefix) -> true ; throw(error(existence_error(namespace, NS), _)) ), atom_concat(Prefix, URI0, URI). expand_uris(old_uri(NS, URI0), Map, URI) :- !, % <foo:bar> ( ( memberchk(NS=Prefix, Map) ; ns(NS, Prefix) ) -> atom_concat(Prefix, URI0, URI) ; concat_atom([NS, :, URI0], URI) ). expand_uris(Q0, Map, Q) :- compound(Q0), !, functor(Q0, Name, Arity), functor(Q, Name, Arity), expand_uris(0, Arity, Q0, Map, Q). expand_uris(Q, _, Q). expand_uris(Arity, Arity, _, _, _) :- !. expand_uris(I0, Arity, Q0, Map, Q) :- I is I0 + 1, arg(I, Q0, A0), arg(I, Q, A), expand_uris(A0, Map, A), expand_uris(I, Arity, Q0, Map, Q). %% ns(?Id, ?URI) % % Translate between namespace id and URI. If the flag % rdf_db_namespaces is true, we share the namespace declarations % with the SeRQL store. ns(NS, URI) :- setting(cliopatria:rdf_db_namespaces, true), !, rdf_db:ns(NS, URI). ns(NS, URI) :- serql_ns(NS, URI). serql_ns(rdf, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'). serql_ns(rdfs, 'http://www.w3.org/2000/01/rdf-schema#'). serql_ns(owl, 'http://www.w3.org/2002/7/owl#'). serql_ns(xsd, 'http://www.w3.org/2001/XMLSchema#'). % Wrong in SeRQL docs! serql_ns(serql,'http://rdf4j.org/schema/serql#'). /******************************* * VARIABLES * *******************************/ % TBD: Check that projection variables actually appear in the % query! expand_vars(select(*, Path0, Where0, Distinct, Limit, Offset), select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- !, var_names(Path0-Where0, Path-Where, VarNames), vars(VarNames, Vars, Names), Row =.. [row | Vars], VNames =.. [names|Names]. expand_vars(select(Projection, Path0, Where0, Distinct, Limit, Offset), select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- !, var_names(x(Projection,Path0,Where0), x(Vars,Path,Where), _VarNames), Row =.. [row | Vars], proj_names(Projection, Names), VNames =.. [names|Names]. expand_vars(construct(*, Path0, Where0, Distinct, Limit, Offset), construct(Path, Path, Where, Distinct, Limit, Offset)) :- !, var_names(x(Path0,Where0), x(Path,Where), _VarNames). expand_vars(construct(Ret0, Path0, Where0, Distinct, Limit, Offset), construct(Ret, Path, Where, Distinct, Limit, Offset)) :- !, var_names(x(Ret0,Path0,Where0), x(Ret,Path,Where), _VarNames). var_names(var(-(V)), V, _) :- !. % bnodes, mapped from {} var_names(var(Name), Var, Map) :- member(Name=Var, Map), !. var_names(Q0, Q, Map) :- compound(Q0), !, functor(Q0, Name, Arity), functor(Q, Name, Arity), var_names(0, Arity, Q0, Q, Map). var_names(Q, Q, _). var_names(Arity, Arity, _, _, _) :- !. var_names(I0, Arity, Q0, Q, Map) :- I is I0 + 1, arg(I, Q0, A0), arg(I, Q, A), var_names(A0, A, Map), var_names(I, Arity, Q0, Q, Map). vars([], [], []) :- !. % also closes list! vars([Name=Var|T0], [Var|TV], [Name|TN]) :- vars(T0, TV, TN). proj_names([], []). proj_names([var(Var)|T0], [Var|T]) :- !, proj_names(T0, T). proj_names([_|T0], [-|T]) :- proj_names(T0, T). /******************************* * ERROR LOCATIONS * *******************************/ syntax_error(What, In, []) :- throw(error(syntax_error(What), context(_, left(In)))). add_error_location(error(syntax_error(What), context(_, left(After))), Tokens) :- append(Before, After, Tokens), length(Before, BL), ( BL =< 5 -> BC = Before ; length(BC0, 5), append(_, BC0, Before), BC = ['...'|BC0] ), length(After, AL), ( AL =< 5 -> AC = After ; length(AC0, 5), append(AC0, _, After), append(AC0, ['...'], AC) ), append(BC, ['**here**'|AC], ContextTokens0), maplist(token_to_atom, ContextTokens0, ContextTokens), concat_atom(ContextTokens, ' ', Context), throw(error(syntax_error(What), context(serql_parse/2, Context))). token_to_atom(Token, Token) :- atom(Token), !. token_to_atom(id(X), X) :- !. token_to_atom(string(X), X) :- !. token_to_atom(uri(URI), X) :- !, concat_atom([<, URI, >], X). token_to_atom(uri(NS,Local), X) :- !, concat_atom([NS, Local], :, X). token_to_atom(old_uri(NS,Local), X) :- !, concat_atom([<, NS, :, Local, >], X). token_to_atom(cmp(X), X) :- !. token_to_atom(rest(X), X) :- !. token_to_atom(Token, Atom) :- term_to_atom(Token, Atom). query(Query, NameSpaces, In, Out) :- catch(compilation_unit(Query, NameSpaces, In, Out), E, add_error_location(E, In)). must_see(Token) --> [Token], !. must_see(Token) --> syntax_error(expected(Token)). must_see(Token, _) --> [Token], !. must_see(_, UserName) --> syntax_error(expected(UserName)). /******************************* * HIGH LEVEL PARSER * *******************************/ compilation_unit(Query, NameSpaces) --> query(Query), namespace_list(NameSpaces). %% namespace_list(-NSList:list)// is det. % % @param NSList List of Prefix=URI for each defined namespace namespace_list([H|T]) --> [ using ], !, must_see(namespace), !, must_see_namespace(H), namespaces(T). namespace_list([]) --> []. must_see_namespace(Decl) --> namespace(Decl), !. must_see_namespace(_) --> syntax_error(expected(namespace_declaration)). namespace(NS=URI) --> must_see(id(NS), identifier), must_see(cmp(=), =), namespace_uri(URI). namespace_uri(URI) --> [ uri(URI) ], !. namespace_uri(URI) --> [ old_uri(Protocol, Local) ], !, % New style <foo:bar> { concat_atom([Protocol, :, Local], URI) }. namespace_uri(_) --> syntax_error(expected(absolute_uri)). namespaces([H|T]) --> [ ',' ], !, must_see_namespace(H), namespaces(T). namespaces([]) --> []. query(select(Projection, Path, Where, Distinct, Limit, Offset)) --> [ select ], !, distinct(Distinct), projection(Projection), must_see(from), path_expr_list(Path), query_tail(Where, Limit, Offset). query(construct(Construct, Path, Where, Distinct, Limit, Offset)) --> [ construct ], !, distinct(Distinct), construct_clause(Construct), must_see(from), path_expr_list(Path), query_tail(Where, Limit, Offset). query(_) --> syntax_error(no_select_or_construct). distinct(distinct) --> [ distinct ], !. distinct(false) --> []. query_tail(Where, Limit, Offset) --> ( [ where ] -> ( boolean_query(Where) -> [] ; syntax_error(illegal_where_clause) ) ; {Where = true} ), ( [ limit ] -> ( pos_int(Limit) -> [] ; syntax_error(illegal_limit) ) ; {Limit = inf} ), ( [ offset ] -> ( pos_int(Offset) -> [] ; syntax_error(illegal_offset) ) ; {Offset = 0} ). projection(*) --> [ * ], !. projection([H|T]) --> var_or_value(H), !, var_or_value_list(T). projection(_) --> syntax_error(expected(projection)). construct_clause(*) --> [ * ], !. construct_clause(Path) --> path_expr_list(Path), !. construct_clause(_) --> syntax_error(expected(construct_clause)). path_expr_list(Expr) --> must_see_path_expr(E0), ( [ ',' ] -> path_expr_list(Es), { Expr = (E0, Es) } ; { Expr = E0 } ). must_see_path_expr(E) --> path_expr(E), !. must_see_path_expr(_) --> syntax_error(expected(path_expression)). path_expr(optional(_, Path)) --> [ '[' ], !, path_expr_list(Path), must_see(']'). path_expr(Expr) --> path_expr0(Expr). path_expr0(Expr) --> path_expr_head(Head), ( ( [ ';' ] -> { arg(1, Head, H) } ; { arg(3, Head, H) } ), path_expr_tail(H, Tail) -> { Expr = (Head, Tail) } ; { Expr = Head } ). path_expr_head(rdf(S, P, O)) --> must_see_node(S), must_see_edge(P), must_see_node(O). path_expr_tail(S, Expr) --> [ '[' ], path_expr_tail0(S, Expr1), [ ']' ], { Expr0 = optional(_, Expr1) }, ( [ ';' ] -> path_expr_tail(S, Tail), { Expr = (Expr0, Tail) } ; { Expr = Expr0 } ). path_expr_tail(S, Expr) --> path_expr_tail0(S, Expr). % path_expr_tail0 <=> Edge Node ((";")? Path_expr_tail)? path_expr_tail0(S, Expr) --> edge(P), must_see_node(O), { Statement = rdf(S, P, O) }, ( ( [ ';' ] -> path_expr_tail(S, Tail) ; path_expr_tail(O, Tail) ) -> { Expr = (Statement, Tail) } ; { Expr = Statement } ). must_see_edge(Edge) --> edge(Edge), !. must_see_edge(_) --> syntax_error(expected(edge)). edge(var(Var)) --> [ id(Var) ], !. edge(uri(URI)) --> [ uri(URI) ], !. % <!foo:bar> edge(uri(NS, URI)) --> [ uri(NS, URI) ], !. % foo:bar edge(old_uri(NS, URI)) --> [ old_uri(NS, URI) ], !. % <foo:bar> must_see_node(Node) --> node(Node), !. must_see_node(_) --> syntax_error(expected(node)). node(Node) --> [ '{' ], node_elem(E0), !, node_elem_list(Es), [ '}' ], ( {Es == []} -> {Node = E0} ; {Node = set([E0|Es])} ). node(var(-(_))) --> % the _ is the variable that will [ '{', '}' ]. % be shared node_elem_list([H|T]) --> [ ',' ], !, must_see_node_elem(H), node_elem_list(T). node_elem_list([]) --> []. must_see_node_elem(Elem) --> node_elem(Elem), !. must_see_node_elem(_) --> syntax_error(expected(node_element)). node_elem(Elem) --> ( var(Elem) ; uri(Elem) ; literal(Elem) ; reified_stat(Elem) ), !. reified_stat(rdf(S,P,O)) --> node(S), must_see_edge(P), must_see_node(O). /******************************* * WHERE ... * *******************************/ boolean_query(Query) --> and_expr(And), ( [ or ], boolean_query(Or) -> {Query = (And ; Or)} ; {Query = And} ). and_expr(Query) --> boolean_query0(Q0), ( [ and ], and_expr(And) -> {Query = (Q0, And)} ; {Query = Q0} ). boolean_query0(Query) --> [ '(' ], !, boolean_query(Query), must_see(')'). boolean_query0(true) --> [ true ], !. boolean_query0(fail) --> [ false ], !. boolean_query0(\+(Q)) --> [ not ], !, boolean_query0(Q). boolean_query0(serql_compare(Cmp, L, R)) --> var_or_query_value(L), [ cmp(Cmp) ], !, var_or_query_value(R). boolean_query0(serql_compare(like, Var, String)) --> var_or_value(Var), % must be var? [ like ], !, must_see_string(String). boolean_query0(rdf_is_literal(V)) --> [ isliteral, '(' ], !, var(V), must_see(')'). boolean_query0(rdf_is_resource(V)) --> [ isresource, '(' ], !, var(V), must_see(')'). boolean_query0(_) --> syntax_error(expected(boolean_test)). must_see_string(String) --> [ string(String) ], !. must_see_string(_) --> syntax_error(expected(string)). var_or_value_list([H|T]) --> [ ',' ], !, must_see_var_or_value(H), var_or_value_list(T). var_or_value_list([]) --> []. must_see_var_or_value(X) --> var_or_value(X), !. must_see_var_or_value(_) --> syntax_error(expected(var_or_value)). var_or_value(X) --> var(X), !. var_or_value(X) --> value(X). var_or_query_value(X) --> ( literal_value(Value) -> { X = query(Value) } ; var_or_value(X) ). var(var(Var)) --> [ id(Var) ], !. value(URI) --> uri(URI). value('$null$') --> [ null ]. value(Literal) --> literal(Literal), !. value(datatype(var(Var))) --> [ datatype, '(', id(Var), ')' ]. value(lang(var(Var))) --> [ lang, '(', id(Var), ')' ]. value(label(var(Var))) --> [ label, '(', id(Var), ')' ]. uri(uri(URI)) --> [uri(URI)]. uri(uri(NS, URI)) --> [uri(NS, URI)]. uri(old_uri(NS, URI)) --> [old_uri(NS, URI)]. literal(Literal) --> literal_value(Value), { Literal = literal(Value) }. literal_value(Lit) --> [ string(String) ], ( [@, id(Lang)] -> { Lit = lang(Lang, String) } ; [^^, URI] -> { Lit = type(URI, String) } ; { Lit = String } ). pos_int(I) --> [ int(I) ], { I >= 0 }. % bit weird not to have >0, but this % is the Sesame spec /******************************* * TOKENISER * *******************************/ tokens([H|T]) --> blank, token(H), !, tokens(T). tokens([]) --> blank. token(uri(URI)) --> % Old style absolute URI "<!", uri_codes(Codes), ">", { atom_codes(URI, Codes) }. token(old_uri(NS, Local)) --> % Old style local, new style absolute "<", identifier(NS), ":", uri_codes(Codes), ">", { atom_codes(Local, Codes) }. token(string(String)) --> "\"", string_codes(Codes), "\"", { atom_codes(String, Codes) }. token(Token) --> identifier(Id), !, ( ":", identifier(Local) % new style URI -> { Token = uri(Id, Local) } ; { downcase_atom(Id, Keyword), serql_keyword(Keyword) -> Token = Keyword ; Token = id(Id) } ). token(int(Int)) --> digit(D0), !, digits(Digits), { number_codes(Int, [D0|Digits]) }. token(cmp(Cmp)) --> cmp(Cmp), !. token(^^) --> "^^", !. token(Char) --> [C], { single(C), char_code(Char, C) }. token(rest(Rest), In, []) :- % catch syntax errors. In \== [], atom_codes(Rest, In). single(0'*). single(0'=). single(0'(). single(0')). single(0'{). single(0'}). single(0'[). single(0']). single(0'@). single(0',). single(0';). % cmp//1 % % Returns Prolog comparison operators from the SeRQL ones. cmp(=<) --> "<=". cmp(\=) --> "!=". cmp(>=) --> ">=". cmp(=) --> "=". cmp(<) --> "<". cmp(>) --> ">". %% uri_codes(-Codes) % % Get a URI string. Does not check for otherwise valid syntax. % This could be done using library(url). uri_codes([C0|Cs]) --> [C0], { uri_code(C0) }, !, uri_codes(Cs). uri_codes([]) --> []. uri_code(C) :- code_type(C, csym), !. uri_code(0'$). uri_code(0'-). uri_code(0'@). uri_code(0'&). uri_code(0'+). uri_code(0'.). uri_code(0'/). uri_code(0'?). uri_code(0'#). uri_code(0'=). uri_code(0':). uri_code(0'~). % officially not uri_code(0';). uri_code(0'{). uri_code(0'}). %% string_codes(-Codes) % % Chars between "...", Can contain \" and \\ string_codes([C0|Cs]) --> "\"", [C0], { C0 == 0'\\ ; C0 = 0'" }, !, string_codes(Cs). string_codes([]) --> peek(0'"). string_codes([C0|Cs]) --> [C0], string_codes(Cs). %% identifier(-Id) % % An SeRQL must start with a letter or an underscore ('_') and can % be followed by zero or more letters, numbers, underscores, % dashes ('-') or dots ('.'). identifier(Id) --> [C0], { code_type(C0, csymf) }, id_chars(Cs), { atom_codes(Id, [C0|Cs]) }. id_chars([C0|Cs]) --> [C0], { code_type(C0, csym) ; C0 == 0'. ; C0 == 0'- }, !, id_chars(Cs). id_chars([]) --> []. digit(D) --> [D], { code_type(D, digit) }. digits([D0|Ds]) --> digit(D0), !, digits(Ds). digits([]) --> []. blank --> [C], { code_type(C, space) }, !, blank. blank --> []. %% serql_keyword(?Keyword) % % True if Keyword is the lowercase version if a keyword serql_keyword(select). serql_keyword(construct). serql_keyword(from). serql_keyword(where). serql_keyword(using). serql_keyword(namespace). serql_keyword(true). serql_keyword(false). serql_keyword(not). serql_keyword(and). serql_keyword(or). serql_keyword(like). serql_keyword(label). serql_keyword(lang). serql_keyword(datatype). serql_keyword(null). serql_keyword(isresource). serql_keyword(isliteral). serql_keyword(sort). serql_keyword(in). serql_keyword(union). serql_keyword(intersect). serql_keyword(minus). serql_keyword(exists). serql_keyword(forall). serql_keyword(distinct). % SPEC: not in grammar serql_keyword(limit). % SPEC: not in grammar serql_keyword(offset). % SPEC: not in grammar /******************************* * DCG BASICS * *******************************/ peek(C, L, L) :- L = [C|_]. /******************************* * HUMAN READABLE MESSAGES * *******************************/ :- multifile prolog:message/3. prolog:message(error(syntax_error(What), context(serql_parse/2, Location))) --> [ 'Syntax error in SeRQL query: ' ], explain(What), [ ' at **here** in', nl, nl], ['~w'-[Location] ]. explain(expected(X)) --> [ '"~w" expected'-[X] ].