1 package net.sourceforge.turtlescript.validator; 2 3 import java.io.IOException; 4 import java.io.InputStream; 5 import java.io.StringReader; 6 7 import org.apache.log4j.Logger; 8 import org.apache.xmlbeans.XmlCursor; 9 import org.apache.xmlbeans.XmlException; 10 import org.apache.xmlbeans.XmlObject; 11 12 import com.hp.hpl.jena.rdf.model.Model; 13 import com.hp.hpl.jena.rdf.model.ModelFactory; 14 import org.cyberneko.html.parsers.DOMFragmentParser; 15 import org.apache.html.dom.HTMLDocumentImpl; 16 import org.w3c.dom.DocumentFragment; 17 import org.w3c.dom.html.HTMLDocument; 18 import org.xml.sax.InputSource; 19 import org.xml.sax.SAXException; 20 21 public class Validator { 22 public static Logger logger = Logger.getLogger(Validator.class); 23 24 public Model validate(InputStream html) { 25 Model model = ModelFactory.createDefaultModel(); 26 27 DOMFragmentParser parser = new DOMFragmentParser(); 28 HTMLDocument document = new HTMLDocumentImpl(); 29 DocumentFragment fragment = document.createDocumentFragment(); 30 try { 31 parser.parse(new InputSource(html),fragment); 32 XmlObject xmlBean = XmlObject.Factory.parse(fragment.getFirstChild()); 33 XmlCursor cursor = xmlBean.newCursor(); 34 cursor.toFirstContentToken(); 35 cursor.selectPath("//SCRIPT[@type = 'application/rdf+n3']"); 36 while(cursor.hasNextSelection()) { 37 cursor.toNextSelection(); 38 model.read(new StringReader(cursor.getTextValue()), "", "N3"); 39 } 40 cursor.toFirstContentToken(); 41 cursor.selectPath("//SCRIPT[@type = 'application/rdf+ttl']"); 42 while(cursor.hasNextSelection()) { 43 cursor.toNextSelection(); 44 model.read(new StringReader(cursor.getTextValue()), "", "TTL"); 45 } 46 cursor.dispose(); 47 } catch (SAXException e) { 48 logger.error(e); 49 } catch (IOException e) { 50 logger.error(e); 51 } catch (XmlException e) { 52 logger.error(e); 53 } 54 55 return model; 56 } 57 }