1 package net.sourceforge.turtlescript.validator;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.io.StringReader;
6
7 import org.apache.log4j.Logger;
8 import org.apache.xmlbeans.XmlCursor;
9 import org.apache.xmlbeans.XmlException;
10 import org.apache.xmlbeans.XmlObject;
11
12 import com.hp.hpl.jena.rdf.model.Model;
13 import com.hp.hpl.jena.rdf.model.ModelFactory;
14 import org.cyberneko.html.parsers.DOMFragmentParser;
15 import org.apache.html.dom.HTMLDocumentImpl;
16 import org.w3c.dom.DocumentFragment;
17 import org.w3c.dom.html.HTMLDocument;
18 import org.xml.sax.InputSource;
19 import org.xml.sax.SAXException;
20
21 public class Validator {
22 public static Logger logger = Logger.getLogger(Validator.class);
23
24 public Model validate(InputStream html) {
25 Model model = ModelFactory.createDefaultModel();
26
27 DOMFragmentParser parser = new DOMFragmentParser();
28 HTMLDocument document = new HTMLDocumentImpl();
29 DocumentFragment fragment = document.createDocumentFragment();
30 try {
31 parser.parse(new InputSource(html),fragment);
32 XmlObject xmlBean = XmlObject.Factory.parse(fragment.getFirstChild());
33 XmlCursor cursor = xmlBean.newCursor();
34 cursor.toFirstContentToken();
35 cursor.selectPath("//SCRIPT[@type = 'application/rdf+n3']");
36 while(cursor.hasNextSelection()) {
37 cursor.toNextSelection();
38 model.read(new StringReader(cursor.getTextValue()), "", "N3");
39 }
40 cursor.toFirstContentToken();
41 cursor.selectPath("//SCRIPT[@type = 'application/rdf+ttl']");
42 while(cursor.hasNextSelection()) {
43 cursor.toNextSelection();
44 model.read(new StringReader(cursor.getTextValue()), "", "TTL");
45 }
46 cursor.dispose();
47 } catch (SAXException e) {
48 logger.error(e);
49 } catch (IOException e) {
50 logger.error(e);
51 } catch (XmlException e) {
52 logger.error(e);
53 }
54
55 return model;
56 }
57 }