% This LaTeX document was generated using the LaTeX backend of PlDoc,
% The SWI-Prolog documentation system


\section{library(semweb/rdfa): Extract RDF from an HTML or XML DOM}

\label{sec:rdfa}

\begin{tags}
\mtag{See also}- \url{http://www.w3.org/TR/2013/REC-rdfa-core-20130822/} \\- \url{http://www.w3.org/TR/html-rdfa/}
\end{tags}

This module implements extraction of RDFa triples from parsed XML or
HTML documents. It has two interfaces: \predref{read_rdfa}{3} to read triples from
some input (stream, file, URL) and \predref{xml_rdfa}{3} to extract triples from an
HTML or XML document that is already parsed with \predref{load_html}{3} or
\predref{load_xml}{3}.\vspace{0.7cm}

\begin{description}
    \predicate[det]{read_rdfa}{3}{+Input, -Triples, +Options}
True when \arg{Triples} is a list of \verb$rdf(S,P,O)$ triples extracted from
\arg{Input}. \arg{Input} is either a stream, a file name, a URL referencing
a file name or a URL that is valid for \predref{http_open}{3}. \arg{Options} are
passed to \predref{open}{4}, \predref{http_open}{3} and \predref{xml_rdfa}{3}. If no base is
provided in \arg{Options}, a base is deduced from \arg{Input}.

    \predicate{xml_rdfa}{3}{+DOM, -RDF, +Options}
True when \arg{RDF} is a list of \verb$rdf(S,P,O)$ terms extracted from \arg{DOM}
according to the RDFa specification. \arg{Options} processed:

\begin{description}
    \termitem{base}{+BaseURI}
URI to use for ''. Normally set to the document URI.
    \termitem{anon_prefix}{+AnnonPrefix}
Prefix for blank nodes.
    \termitem{lang}{+Lang}
Default for \const{lang}
    \termitem{vocab}{+Vocab}
Default for \const{vocab}
    \termitem{markup}{+Markup}
\arg{Markup} language processed (xhtml, xml, ...)
\end{description}

\qpredicate[multifile]{rdf_db}{rdf_load_stream}{3}{+Format, +Stream, :Options}Register \file{library(semweb/rdfa)} as loader for HTML RDFa files.

\begin{tags}
    \tag{To be done}
Which options need to be forwarded to \predref{read_rdfa}{3}?
\end{tags}
\end{description}