% This file is part of the Attempto Parsing Engine (APE). % Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). % % The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it % under the terms of the GNU Lesser General Public License as published by the Free Software % Foundation, either version 3 of the License, or (at your option) any later version. % % The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR % PURPOSE. See the GNU Lesser General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License along with the Attempto % Parsing Engine (APE). If not, see http://www.gnu.org/licenses/. :- module(ape, [ get_ape_results_timelimit/3, get_ape_results_timelimit/4, get_ape_results/2, get_ape_results/3 ]). /** Interface for the ACE tools (ACE parser, DRS verbalizer, ...) @author Kaarel Kaljurand @author Tobias Kuhn @version 2009-05-13 Usage with multiple results returned (i.e. multi-mode): == get_ape_results([text='Every man waits.', cparaphrase1=on], ContentType, Content). get_ape_results([text='Every man waits.', cparaphrase=on, cparaphrase1=on], ContentType, Content). get_ape_results([text='A man waits.', cinput=on, cdrs=on, cdrspp=on, cparaphrase=on, cparaphrase1=on, cparaphrase2=on, ctokens=on, csyntax=on, csyntaxpp=on, cfol=on], ContentType, Content). == Usage with a single result returned (i.e. solo-mode): == get_ape_results([text='A man sees a dog.', solo=drs], ContentType, Content). get_ape_results([text='Every man owns a dog.', solo=owlrdf], ContentType, Content). get_ape_results([text='Peeter likes Mary.', solo=owlfss], ContentType, Content). == @tbd: provide all outputs (1) as serialized Prolog terms, (2) as pretty-printed terms, (3) as XML, JSON, HTML, etc. */ % Default encoding used for opening files in text mode. :- set_prolog_flag(encoding, utf8). % Note: The following line fixes the testcase: 3.1415926536 approximates Pi. % Note that using 'f' instead of 'g' does not drop the trailing zeros. % @bug: sometimes weird digits are attached to the end. :- set_prolog_flag(float_format, '%.11g'). % Richard A. O'Keefe: to see full precision for IEEE doubles, do % :- set_prolog_flag(float_format, '%.18g'). % So it seems that values above 18 do not make sense. :- assert(user:file_search_path(ape, '.')). :- prolog_load_context(directory, Dir), asserta(user:file_search_path(ape, Dir)). :- use_module(ape('utils/morphgen'), [ acesentencelist_pp/2 ]). :- use_module(ape('utils/ace_niceace'), [ tokens_to_sentences/2 ]). :- use_module(ape('utils/drs_to_xml')). :- use_module(ape('utils/drs_to_fol_to_prenex')). :- use_module(ape('utils/drs_to_ascii')). :- use_module(ape('utils/drs_to_ace')). :- use_module(ape('utils/drs_to_coreace')). :- use_module(ape('utils/drs_to_npace')). :- use_module(ape('utils/drs_to_html')). :- use_module(ape('utils/drs_to_ruleml')). :- use_module(ape('utils/tree_utils')). :- use_module(ape('utils/trees_to_ascii')). :- use_module(ape('utils/drs_to_tptp')). :- use_module(ape('lexicon/clex')). :- use_module(ape('lexicon/ulex')). :- use_module(ape('parser/ace_to_drs')). :- use_module(ape('logger/error_logger')). :- use_module(ape('utils/xmlterm_to_xmlatom'), [ xmlterm_to_xmlatom/2, xmlterm_to_xmlatom/3 ]). :- use_module(ape('utils/serialize_term'), [ serialize_term_into_atom/2 ]). :- use_module(ape('utils/owlswrl/get_owl_output'), [ get_owl_output/6 ]). %% get_ape_results_timelimit(+Input:list, -Content:atom, +TimeLimit) is det. %% get_ape_results_timelimit(+Input:list, -ContentType:atom, -Content:atom, +TimeLimit) is det. % % There is call_with_time_limit(+Time, :Goal) defined in library(time), % part of clib package. On timeout this throws the exception time_limit_exceeded. % But we catch other exceptions as well... % % @param Input is a list of input parameters of the form Key=Value % @param ContentType is one of {text/plain, text/xml} % @param Content is the returned result % @param TimeLimit the timelimit in seconds % get_ape_results_timelimit(Input, Content, TimeLimit) :- get_ape_results_timelimit(Input, _ContentType, Content, TimeLimit). get_ape_results_timelimit(Input, ContentType, Content, TimeLimit) :- catch( call_with_time_limit( TimeLimit, get_ape_results(Input, ContentType, Content) ), CatchType, catchtype_errormessage(CatchType, ContentType, Content) ). %% catchtype_errormessage(+CatchType:atom, -ContentType:atom, -ErrorMessage:atom) is det. % % Returns an error message as XML. The error message is set by catch/3, % this predicate just formats the message. % This is a very toplevel error message indicating either that the time limit % was exceeded or that there were resource errors (out of stack, etc.) or programmer % errors (undefined predicate called, etc.). % % @param CatchType is one of {time_limit_exceeded, ...} % @param ContentType is always 'text/xml' % @param ErrorMessage is an end-user-level message that corresponds to the CatchType % catchtype_errormessage( time_limit_exceeded, 'text/xml', '' ) :- !. catchtype_errormessage(CatchType, 'text/xml', ErrorMessage) :- format(atom(CatchTypeAsAtom), "~w", [CatchType]), xmlterm_to_xmlatom(element(apeResult, [], [ element(messages, [], [ element(message, [ importance='error', type='ws', sentence='', token='', value=CatchTypeAsAtom, repair='Fatal error. Please send screenshot to APE developers.' ], []) ]) ]), ErrorMessage). %% get_ape_results(+Input:list, -Content:atom) is det. %% get_ape_results(+Input:list, -ContentType:atom, -Content:atom) is det. % % @param Input is a list of input parameters of the form Key=Value % @param ContentType is one of {text/plain, text/xml} % @param Content is the returned XML % get_ape_results(Input, Content) :- get_ape_results(Input, _ContentType, Content). get_ape_results(Input, ContentType, Content) :- clear_messages, init_clex(Input), load_ulex(Input), get_value(Input, text, ACEText), get_value(Input, guess, GuessOnOff), acetext_to_drs(ACEText, GuessOnOff, on, Tokens, Syntax, Drs, _Messages, [DT, DP, DR]), get_value(Input, uri, Uri, 'http://attempto.ifi.uzh.ch/ontologies/owlswrl/test'), TempResult = [ time=[DT, DP, DR], acetext=ACEText, tokens=Tokens, drs=Drs, syntax=Syntax, uri=Uri ], get_content(Input, TempResult, ContentType, Content), !. % @bug: This is sometimes called, but we should know why. % It must be here, just to catch errors. get_ape_results(_, 'text/plain', ''). %% output_type(?Type:atom) % % This predicate defines the supported output types and the order of the outputs for the multi mode. % output_type(input). output_type(tokens). output_type(sentences). output_type(drs). output_type(drsrt). output_type(syntax). output_type(syntaxpp). output_type(syntaxd). output_type(syntaxdpp). output_type(drspp). output_type(drsxml). output_type(drshtml). output_type(paraphrase). output_type(paraphrase1). output_type(paraphrase2). output_type(owlrdf). output_type(owlfss). output_type(owlfsspp). output_type(owlxml). output_type(ruleml). output_type(fol). output_type(pnf). output_type(tptp). %% get_content(+Input:list, +TempResult:list, -ContentType:atom, -Content:atom) is det. % % get_content(Input, TempResult, ContentType, Content) :- member(solo=SoloType, Input), !, get_solo_content(SoloType, TempResult, ContentType, Content). get_content(Input, TempResult, ContentType, Content) :- get_multi_content(Input, TempResult, ContentType, Content). %% get_solo_content(+SoloType:atom, +TempResult:list, -ContentType:atom, -Content:atom) is det. % % @param SoloType defines which output should be returned % @param TempResult is a list of outputs from the parser % @param ContentType is one of {text/xml, text/plain} % @param Content is the result (e.g. a syntax tree or a paraphrase) in XML or plain text % If there are APE error messages % then we print the messages and not the solo output. % Note: we do not care about the warning messages. get_solo_content(_, _TempResult, 'text/xml', Content) :- get_error_messages([M | ErrorMessages]), !, messages_xmlmessages([M | ErrorMessages], XmlErrorMessages), xmlterm_to_xmlatom(element(messages, [], XmlErrorMessages), Content). get_solo_content(SoloType, TempResult, ContentType, Content) :- output_type(SoloType), get_output(SoloType, TempResult, OutputContentType, OutputContent), ( get_error_messages([M | ErrorMessages]) -> messages_xmlmessages([M | ErrorMessages], XmlErrorMessages), xmlterm_to_xmlatom(element(messages, [], XmlErrorMessages), Content), ContentType = 'text/xml' ; Content = OutputContent, ContentType = OutputContentType ), !. get_solo_content(SoloType, _, 'text/plain', 'ERROR: Unexpected error.') :- output_type(SoloType), !. get_solo_content(_, _, 'text/plain', 'ERROR: Wrong solo type.'). %% get_multi_content(+Input:list, +TempResult:list, -ContentType:atom, -Content:atom) is det. % % @bug: we ignore the parser+refres messages, and get a fresh list % of messages from the messages repository (it will include the parser+refres % messages anyway). % get_multi_content(Input, TempResult, 'text/xml', Content) :- get_value(TempResult, time, [DTokenizer, DParser, DRefres]), with_output_to(atom(DT), format("~3f", [DTokenizer])), with_output_to(atom(DP), format("~3f", [DParser])), with_output_to(atom(DR), format("~3f", [DRefres])), findall(OutputElement, get_multi_output_element(Input, TempResult, OutputElement), OutputElements), get_messages_in_xml(Messages), append([element(duration, [tokenizer=DT, parser=DP, refres=DR], [])|OutputElements], [element(messages, [], Messages)], Elements), xmlterm_to_xmlatom(element(apeResult, [], Elements), [header(true)], Content). %% get_multi_output_element(+Input:list, +TempResult:list, -Content:element) % % Returns one output element if it is required by the input in multi mode. It succeeds as many times as there % are such multi typed outputs. % % In case an exception is thrown during the generation of the output, % then an empty atom is returned as the output. % get_multi_output_element(Input, TempResult, element(Type, [], [Output])) :- output_type(Type), atom_concat('c', Type, Key), memberchk(Key=on, Input), catch( get_output(Type, TempResult, _, Output), _Catcher, Output = '' ). %% get_output(+OutputType:atom, +TempResult:list, -ContentType:atom, -Content:atom) % % Returns the required output as an atom. This is used for both modes, solo and multi. % get_output(input, TempResult, 'text/plain', Output) :- get_value(TempResult, acetext, Output), !. get_output(tokens, TempResult, 'text/plain', Output) :- get_value(TempResult, tokens, Tokens), serialize_term_into_atom(Tokens, Output), !. get_output(sentences, TempResult, 'text/plain', Output) :- get_value(TempResult, tokens, Tokens), tokens_to_sentences(Tokens, Sentences), serialize_term_into_atom(Sentences, Output), !. get_output(syntax, TempResult, 'text/plain', Output) :- get_value(TempResult, syntax, Syntax1), unsplit_pronouns_in_tree(Syntax1, Syntax2), remove_gaps_in_tree(Syntax2, Syntax3), unify_coords_in_tree(Syntax3, Syntax), serialize_term_into_atom(Syntax, Output), !. get_output(syntaxpp, TempResult, 'text/plain', Output) :- get_value(TempResult, syntax, Syntax1), unsplit_pronouns_in_tree(Syntax1, Syntax2), remove_gaps_in_tree(Syntax2, Syntax3), unify_coords_in_tree(Syntax3, Syntax), trees_to_ascii:trees_to_ascii(Syntax, Output), !. get_output(syntaxd, TempResult, 'text/plain', Output) :- get_value(TempResult, syntax, Syntax), serialize_term_into_atom(Syntax, Output), !. get_output(syntaxdpp, TempResult, 'text/plain', Output) :- get_value(TempResult, syntax, Syntax), trees_to_ascii:trees_to_ascii(Syntax, Output), !. get_output(fol, TempResult, 'text/plain', Output) :- get_value(TempResult, drs, Drs), drs_fol_pnf:drs_fol(Drs, Fol), serialize_term_into_atom(Fol, Output), !. get_output(pnf, TempResult, 'text/plain', Output) :- get_value(TempResult, drs, Drs), drs_fol_pnf:drs_pnf(Drs, Pnf), serialize_term_into_atom(Pnf, Output), !. get_output(paraphrase, TempResult, 'text/plain', Output) :- get_value(TempResult, drs, Drs), drs_to_ace:drs_to_ace(Drs, Sentences), acesentencelist_pp(Sentences, Output), !. get_output(paraphrase1, TempResult, 'text/plain', Output) :- get_value(TempResult, drs, Drs), drs_to_coreace:bigdrs_to_coreace(Drs, Sentences), acesentencelist_pp(Sentences, Output), !. get_output(paraphrase2, TempResult, 'text/plain', Output) :- get_value(TempResult, drs, Drs), drs_to_npace:drs_to_npace(Drs, Sentences), acesentencelist_pp(Sentences, Output), !. get_output(ruleml, TempResult, 'text/xml', Output) :- get_value(TempResult, drs, Drs), drs_to_ruleml:drs_to_ruleml(Drs, Ruleml), xmlterm_to_xmlatom([Ruleml], Output), !. get_output(drs, TempResult, 'text/plain', Output) :- get_value(TempResult, drs, Drs), serialize_term_into_atom(Drs, Output), !. get_output(drspp, TempResult, 'text/plain', Output) :- get_value(TempResult, drs, Drs), drs_to_ascii:drs_to_ascii(Drs, Output), !. get_output(tptp, TempResult, 'text/plain', Output) :- get_value(TempResult, drs, Drs), drs_to_tptp:drs_to_tptplist(Drs, TptpList), with_output_to(atom(Output), tptplist_pp(TptpList)), !. get_output(drsxml, TempResult, 'text/xml', Output) :- get_value(TempResult, drs, Drs), drs_to_xmlatom(Drs, Output), !. get_output(drshtml, TempResult, 'text/xml', Output) :- get_value(TempResult, drs, Drs), drs_to_html:drs_to_html(Drs, Output), !. get_output(OwlOutputType, TempResult, ContentType, Output) :- get_value(TempResult, acetext, AceText), get_value(TempResult, drs, Drs), get_value(TempResult, uri, Uri), get_owl_output(OwlOutputType, AceText, Drs, Uri, ContentType, Output), !. %% init_clex(+Input:list) is det. % % init_clex(Input) :- get_value(Input, noclex, on), !, set_clex_switch(off). init_clex(_) :- set_clex_switch(on). %% load_ulex(+Input:list) is det. % % load_ulex(Input) :- get_value(Input, ulextext, Ulex), !, discard_ulex, setup_call_cleanup( open_string(Ulex, UlexStream), read_ulex(UlexStream), close(UlexStream)). load_ulex(_). %% get_value(+Map:list, +Key:atom, -Value:atom, +Default:atom) is det. % % Returns the value for a key from a map of key/value pairs. The default value is returned if % the key is not found. % % @param Map is a list key/value pairs of the form Key=Value % @param Key is the key to look for % @param Value is the value that is returned % @param Default is the default value for the case that the key is not found % get_value(Map, Key, Value, _Default) :- memberchk(Key=Value, Map), !. get_value(_, _, Default, Default). %% get_value(+Map:list, +Key:atom, -Value:atom) is det. % % The same as get_value/4 with '' as default value % get_value(Map, Key, Value) :- get_value(Map, Key, Value, '').