:- module(xml_reader,[fileToLineInfoElements/3]). /** <module> Utility LOGICMOO XML READER Allows you to read xml files from prolog. - @author Douglas R. Miles - @license LGPL */ :- use_module(library(sgml)). %atrace:-trace. useCateID:- fail. :- dynamic(xmlCateSig/1). :- dynamic(xmlCate/13). prolog_mostly_ground(Out):-ground(Out),!. prolog_mostly_ground(Out):-var(Out),!,atrace. prolog_mostly_ground([H|_Out]):-!,prolog_must(prolog_mostly_ground1(H)),!. prolog_mostly_ground(Out):- ((arg(_N,Out,Arg),prolog_must(prolog_mostly_ground1(Arg)),fail));true. prolog_mostly_ground1(Out):-prolog_must(nonvar(Out)). xmlCateSig(_). %% get_sgml_parser_defs(PARSER_DEFAULTS,PARSER_CALLBACKS) /*shorttag(false),*/ get_sgml_parser_defs(PARSER_DEFAULTS,PARSER_CALLBACKS):- current_prolog_flag(sgml_parser_defaults,PARSER_DEFAULTS), current_prolog_flag(sgml_parser_callbacks,PARSER_CALLBACKS),!. get_sgml_parser_defs( [defaults(false), space(remove),/*number(integer),*/ qualify_attributes(false), %call(decl, on_decl), %call(pi, on_pi),call(xmlns, on_xmlns),call(urlns, xmlns), %%call(error,xml_error), dialect(xml) ], [max_errors(0),call(begin, on_begin),call(end, on_end)]). % % ?- string_to_structure('<?xml version="1.0" encoding="ISO-8859-1"?>\n<xml><p>hi</p></xml>',X). % % ?- string_to_structure('<category><pattern>_ PLANETS</pattern></category>',X). on_end('xml', _) :- !, ignore(retract(in_xml_tag(_))). on_begin('xml', Attribs, _) :- !, asserta(in_xml_tag(Attribs)). on_begin(Tag, Attr, Parser) :- skipOver(not(inLineNum)), get_sgml_parser(Parser,context(Context)), Context=[Tag,xml|_], skipOver(debugFmt(on_begin(Tag, Attr, Context))), skipOver(retract(in_xml_tag(XmlAttr))), % skipOver(get_sgml_parser_defs(PARSER_DEFAULTS, PARSER_CALLBACKS)), get_sgml_parser(Parser,line(Line)), get_sgml_parser(Parser,charpos(Offset)), get_sgml_parser(Parser,file(File)), global_pathname(File,Pathname), % get_sgml_parser(Parser,source(Stream)), skipOver(asserta(inLineNum)), % load_structure(Stream,Content,[line(Line)|PARSER_DEFAULTS]),!, % skipOver( sgml_parse(Parser,[ document(Content),parse(input)])), NEW = t_l:lineInfoElem(Pathname,Line:Offset, Context, element(Tag, Attr, no_content_yet)), %%debugFmt(NEW), skipOver(ignore(retract(inLineNum))), skipOver(asserta(in_xml_tag(XmlAttr))), assertz(NEW),!. on_begin(_Tag, _Attr, _Parser) :-!. %%get_sgml_parser(Parser,context(Context)),!. %%,debugFmt(on_begin_Context(Tag, Attr, Context)). % %on_begin_ctx(TAG, URL, Parser, Context) :-!, debugFmt(on_begin_ctx(URL, TAG, Parser,Context)),!. on_begin_ctx(_TAG, _URL, _Parser, _Context) :- !. %%, debugFmt(on_begin_ctx(URL, TAG, Parser,Context)),!. :- thread_local xmlns/3. on_xmlns(rdf, URL, _Parser) :- !,debugFmt(on_xmlns(URL, rdf)),asserta(xmlns(URL, rdf, _)). on_xmlns(TAG, URL, _Parser) :- sub_atom(URL, _, _, _, 'rdf-syntax'), !, debugFmt('rdf-syntax'(URL, TAG)), immediateCall(_Ctx,asserta(xmlns(URL, rdf, _))). on_xmlns(TAG, URL, _Parser) :- debugFmt(on_xmlns(URL, TAG)). on_decl(URL, _Parser) :- debugFmt(on_decl(URL)). on_pi(URL, _Parser) :- debugFmt(on_pi(URL)). xml_error(TAG, URL, Parser) :- !, debugFmt(xml_error(URL, TAG, Parser)). % ============================================ % Loading content % ============================================ load_xml_structure_lineno(Attributes,Ctx,L):-must_maplist(load_inner_xml_lineno(Attributes,Ctx),L),!. :-thread_local(t_l:lineInfoElem/4). load_inner_xml_lineno(Attributes,Ctx,element(Tag,Attribs,ContentIn)):- appendAttributes(Ctx,Attributes,Attribs,RightAttribs), load_xml_structure(Ctx,element(Tag,RightAttribs,ContentIn)),!. /* %% offset load_inner_xml_lineno(Attributes,Ctx,element(Tag,Attribs,ContentIn)):- appendAttributes(Ctx,Attributes,Attribs,RightAttribs), prolog_must(attributeValue(Ctx,RightAttribs,[srcfile,srcdir],File,'$error')), MATCH = t_l:lineInfoElem(File,Line:Offset, Context, element(Tag, Attribs, no_content_yet)), ignore(MATCH), Context=[_Tag0,xml|_More], ignore(Line = nonfile), ignore(Offset = nonfile), NewAttribs = [srcfile=File,lineno=Line:Offset|RightAttribs], ignore(retract(MATCH)), load_xml_structure(Ctx,element(Tag,NewAttribs,ContentIn)),!. */ /* load_inner_xml_lineno(Attributes,Ctx,element(Tag,Attribs,ContentIn)):- prolog_must(current_value(Ctx,srcfile,File)), retract((t_l:lineInfoElem(File0,Line0:Offset0,graph, element(_Tag0, _Attr0, _Content0)))), prolog_must(call(OLD)), MATCH = t_l:lineInfoElem(File,Line:Offset,Context, element(Tag, Attribs, _ContentIn)),!, prolog_must((call(MATCH),!,not(not((Line:Offset)==(Line0:Offset0))),retract(OLD), load_xml_structure(Ctx,element(Tag,[srcinfo=File0:Line0-Offset0|Attribs],ContentIn)), NEW = t_l:lineInfoElem(File,Line:Offset,Attributes, element(Tag, Attribs, ContentIn)), assertz(NEW))),!. */ tls :- string_to_structure('<xml><p>hi</p></xml>',X),debugFmt(X). tls2 :- string_to_structure('<?xml version="1.0" encoding="ISO-8859-1"?>\n<xml><p>hi</p></xml>\n\n',X),debugFmt(X). string_to_structure(String,XMLSTRUCTURESIN):- fail, sformat(Strin0,'<pre>~s</pre>',[String]),string_to_structure0(Strin0,XMLSTRUCTURES),!, prolog_must([element(pre,[],XMLSTRUCTURESIN)]=XMLSTRUCTURES). string_to_structure(String,XMLSTRUCTURES):- string_to_structure0(String,XMLSTRUCTURES),!. string_to_structure0(String,XMLSTRUCTURES):- %%get_sgml_parser_defs(PARSER_DEFAULTS,_PARSER_CALLBACKS), PARSER_DEFAULTS = [defaults(false), space(remove),/*number(integer),*/ qualify_attributes(false),dialect(xml)], string_to_structure0(String,PARSER_DEFAULTS,XMLSTRUCTURES),!. string_to_structure(String,PARSER_DEFAULTS0,XMLSTRUCTURES):-string_to_structure0(String,PARSER_DEFAULTS0,XMLSTRUCTURES). string_to_structure0(String,PARSER_DEFAULTS0,XMLSTRUCTURESIN):- setup_call_cleanup(((string_to_stream(String,In),new_sgml_parser(Parser, []))), prolog_must(( atom_length(String,Len), append(PARSER_DEFAULTS0,[],PARSER_DEFAULTS), must_maplist(set_sgml_parser(Parser),PARSER_DEFAULTS), string_parse_structure(Len, Parser, user:PARSER_DEFAULTS, XMLSTRUCTURES, In) )), (free_sgml_parser(Parser),close(In))),!,prolog_must(XMLSTRUCTURESIN=XMLSTRUCTURES). string_parse_structure(Len,Parser, M:Options, Document, In) :- quietly((catch(call(call,string_parse_structure_opts_547(Parser),In,M,Options,Options2),_,string_parse_structure_opts(Parser,In,M,Options,Options2)))), % quietly((string_parse_structure_opts(Parser,In,M,Options,Options2))), sgml:sgml_parse(Parser, [ document(Document), source(In), parse(input), content_length(Len) | Options2 ]). /* string_parse_structure_opts_547(Parser, _In, _M, Options,Options2):- sgml:set_parser_options(Parser, Options, Options1), Options2=Options1. */ string_parse_structure_opts(Parser,In,M,Options,Options2):- sgml:set_parser_options(Options, Parser, In, Options1), sgml:parser_meta_options(Options1, M, Options2). fileToLineInfoElements(Ctx,File,XMLSTRUCTURES):- atom_concat(File,'.term',Elis), ((fail,file_newer(Elis,File)) -> termFileContents(Ctx,Elis,XMLSTRUCTURES) ; fileToLineInfoElements0(Ctx,File,XMLSTRUCTURES)). termFileContents(_Ctx,File,termFileContents(File)):-!. %%,atrace. termFileContents(_Ctx,File,element(xml,[],XMLSTRUCTURES)):- %% another way to fileToLineInfoElements setup_call_cleanup((open(File, read, In, [])), findall(Elem,((repeat,read(In,Elem),((Elem\=end_of_file)->true;!))),XMLSTRUCTURES), close(In)),!. % gather line numbers fileToLineInfoElements0(Ctx,F0,XMLSTRUCTURES):- global_pathname(F0,File), retractall(t_l:lineInfoElem(File,_,_,_)), setup_call_cleanup((open(File, read, In, [type(binary)]),new_sgml_parser(Parser, [])), prolog_must(( get_sgml_parser_defs(PARSER_DEFAULTS,PARSER_CALLBACKS), must_maplist(set_sgml_parser(Parser),[file(File)|PARSER_DEFAULTS]), %% todo offset(Offset) sgml_parse(Parser,[source(In)|PARSER_CALLBACKS]))), (free_sgml_parser(Parser),close(In))),!, fileToLineInfoElements2(Ctx,File,XMLSTRUCTURES). % gather line contents fileToLineInfoElements2(Ctx,File,XMLSTRUCTURES):-!, get_sgml_parser_defs(PARSER_DEFAULTS,_PARSER_CALLBACKS), setup_call_cleanup(open(File, read, In, [type(binary)]),(load_structure(In,Whole, [file(File)|PARSER_DEFAULTS]),!, load_inner_xml_w_lineno(File,[],[],[],Ctx,Whole,XMLSTRUCTURES)),close(In)),!. load_inner_xml_w_lineno(_SrcFile,_OuterTag,_Parent,_Attributes,_Ctx,Atom,Atom):-(atomic(Atom);var(Atom)),!. load_inner_xml_w_lineno(SrcFile,OuterTag,Parent,Attributes,Ctx,[H|T],LL):-!, must_maplist(load_inner_xml_w_lineno(SrcFile,OuterTag,Parent,Attributes,Ctx),[H|T],LL),!. % % offset load_inner_xml_w_lineno(SrcFile,[OuterTag|PREV],Parent,Attributes,Ctx,element(Tag,Attribs,ContentIn),element(Tag,NewAttribs,ContentOut)):- Context=[Tag,OuterTag|_], MATCH = t_l:lineInfoElem(SrcFile,Line:Offset, Context, element(Tag, Attribs, no_content_yet)), MATCH,!, ignore(Line = nonfile), ignore(Offset = nonfile), appendAttributes(Ctx,Attributes,Attribs,RightAttribs), % % Src = element(Tag,Attribs,ContentIn), Src = nosrc, appendAttributes(Ctx,[srcfile=SrcFile:Line-Offset,srcinfo=Src],RightAttribs,NewAttribs), ignore(retract(MATCH)), (member(Tag,[xml,topic]) -> NextAttribs = NewAttribs ; NextAttribs = []), must_maplist(load_inner_xml_w_lineno(SrcFile,[Tag,OuterTag|PREV],Parent,NextAttribs,Ctx),ContentIn,ContentOut),!. load_inner_xml_w_lineno(SrcFile,MORE,Parent,Attributes,Ctx,element(Tag,Attribs,ContentIn),element(Tag,RightAttribs,ContentOut)):- appendAttributes(Ctx,Attributes,Attribs,RightAttribs), load_inner_xml_w_lineno(SrcFile,[Tag|MORE],Parent,[],Ctx,ContentIn,ContentOut),!. load_inner_xml_w_lineno(SrcFile,OuterTag,Parent,Attributes,_Ctx,L,L):- xml_error(load_inner_xml_w_lineno(SrcFile,OuterTag,Parent,Attributes,L)). addAttribsToXML(Attribs,element(Tag,Pre,Content),element(Tag,Post,Content)):-appendAttributes(_Ctx,Pre,Attribs,Post),!. addAttribsToXML(Attribs,[H|T],OUT):-must_maplist(addAttribsToXML(Attribs),[H|T],OUT),!. addAttribsToXML(Attribs,OUT,OUT):-!,debugFmt(addAttribsToXML(Attribs,OUT,OUT)),!. appendAttributes(_Ctx,L,R,AA):-hotrace((mergeAppend0(L,R,A),list_to_set_safe(A,AA))),!. mergeAppend0(L,R,R):-var(L),!,var(R),!. mergeAppend0(L,R,A):-var(R),append(L,R,A),!. mergeAppend0(L,R,A):-var(L),append(L,R,A),!. mergeAppend0(L,[R|RR],A):-eqmember(R,L),mergeAppend0(L,RR,A). mergeAppend0([L|LL],R,A):-eqmember(L,R),mergeAppend0(LL,R,A). mergeAppend0(L,R,A):-append(L,R,A). eqmember(E,List):-copy_term_numvars(E:List,E0:List0),member(E0,List0). list_to_set_safe(A,A):-(var(A);atomic(A)),!. list_to_set_safe([A|AA],BB):- (not(not(lastMember(A,AA))) -> list_to_set_safe(AA,BB) ; (list_to_set_safe(AA,NB),BB=[A|NB])),!. lastMember(E,List):-hotrace(lastMember0(E,List)). lastMember0(_E,List):-var(List),!,fail. lastMember0(E,[H|List]):-lastMember0(E,List);E=H. lastMember(E,List,Rest):-hotrace(lastMember0(E,List,Rest)). lastMember0(E,List,Rest):-lastMember0(E,List),!,delete_safe(List,E,Rest),!. lastMember0(E,List,Rest):-lastMember0(EE,List),!,lastMember0(E,EE,Rest),!,atrace. %%delete_safe(List,EE,Rest),!. delete_safe(List,_E,Rest):-var(List),!,Rest=List. delete_safe(List,E,Rest):-is_list(List),!,delete(List,E,Rest). delete_safe([H|List],E,Rest):- H==E,!,delete_safe(List,E,Rest). delete_safe([H|List],E,[H|Rest]):-delete_safe(List,E,Rest). getKeyValue(FullList,N=V):-lastMember(N=V,FullList),!. %%addKeyValue(FullList,N=V):-nonvar(N),!,append(_Closed,[N=V|_],FullList),!. addKeyValue(FullList,NV):- prolog_must((not(ground(FullList)),nonvar(NV))),append(_Closed,[NV|_],FullList),!. lastMember2(E,List):-to_open_list(_,Closed,_Open,List),reverse(Closed,Rev),member(E,Rev). %lastMember(End,List) :- append(_,[End|_],List). to_open_list(FullList,Closed,Open,FullList) :- append(Closed,Open,FullList),var(Open),!. to_open_list(Closed,Closed,Open,FullList) :- append(Closed,Open,FullList),!. copy_term_numvars(OLD,NEW):-copy_term(OLD,NEW),numbervars(NEW,0,_). error_catch(C,E,F):-E=error(E1,E2),!,catch(C,error(E1,E2),F). error_catch(C,E,F):-nonvar(E),!,catch(C,E,F). error_catch(C,E,F):-catch(C,E,(needs_rethrown(E),F)). needs_rethrown(E):- functor(xml_goto,E,_),!,throw(E). needs_rethrown(E):- functor(xml_novalue,E,_),!,throw(E). needs_rethrown(_). hotrace(G):- quietly(G). :-thread_local(in_xml_tag/1). :-thread_local(inLineNum/0). skipOver(_). xml_error(G):- wdmsg(xml_error(G)),!. immediateCall(_,G):- trace,call(G). :- use_module(library(memfile)). %% Use a memory-file. The resulting handling is closed using close/1. string_to_stream(String,InStream):- string(String),string_to_atom(String,Atom),!,string_to_stream(Atom,InStream). string_to_stream(Atom,InStream):- atom_to_memory_file(Atom, Handle),open_memory_file(Handle,read,InStream). % \n\n\n load_xml_structure(Ctx,O):-atomic(O),!,debugFmt(load_xml_structure(Ctx,O)),!. :- fixup_exports.