Java XML教程 - Java DOM简介
DOM是标准的树结构,其中每个节点包含来自XML结构的一个组件。
XML文档中两种最常见的节点类型是元素节点和文本节点。
使用Java DOM API,我们可以创建节点,删除节点,更改其内容,并遍历节点层次结构。
何时使用DOM
文档对象模型标准是为XML文档操作而设计的。
DOM的用意是语言无关的。Java的DOM解析器没有利用Java的面向对象的特性优势。
混合内容模型
文本和元素在DOM层次结构中混合。这种结构在DOM模型中称为混合内容。
例如,我们有以下xml结构:
<yourTag>This is an <bold>important</bold> test.</yourTag>
DOM节点的层级如下,其中每行代表一个节点:
ELEMENT: yourTag
+ TEXT: This is an
+ ELEMENT: bold
+ TEXT: important
+ TEXT: test.
yourTag 元素包含文本,后跟一个子元素,后跟另外的文本。
节点类型
为了支持混合内容,DOM节点非常简单。标签元素的“内容"标识它是的节点的类型。
例如,<yourTag> 节点内容是元素 yourTag的名称。
DOM节点API定义 nodeValue(), nodeType()和 nodeName()方法。
对于元素节点< yourTag> nodeName()返回yourTag,而nodeValue()返回null。
对于文本节点 + TEXT:这是一个nodeName()返回#text,nodeValue()返回“This is an"。
例子
以下代码显示了如何使用DOM解析器来解析xml文件并获取一个 org.w3c.dom.Document 对象。
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
public class Main {
public static void main(String[] args) throws Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = null;
db = dbf.newDocumentBuilder();
Document doc = db.parse(new File("games.xml"));
}
}
例2
以下代码显示如何执行DOM转储。
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class Main{
static public void main(String[] arg) throws Exception{
String filename = "input.xml";
boolean validate = true;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setValidating(validate);
dbf.setNamespaceAware(true);
dbf.setIgnoringElementContentWhitespace(true);
DocumentBuilder builder = dbf.newDocumentBuilder();
builder.setErrorHandler(new MyErrorHandler());
InputSource is = new InputSource(filename);
Document doc = builder.parse(is);
TreeDumper td = new TreeDumper();
td.dump(doc);
}
}
class TreeDumper {
public void dump(Document doc) {
dumpLoop((Node)doc,"");
}
private void dumpLoop(Node node,String indent) {
switch(node.getNodeType()) {
case Node.CDATA_SECTION_NODE:
System.out.println(indent + "CDATA_SECTION_NODE");
break;
case Node.COMMENT_NODE:
System.out.println(indent + "COMMENT_NODE");
break;
case Node.DOCUMENT_FRAGMENT_NODE:
System.out.println(indent + "DOCUMENT_FRAGMENT_NODE");
break;
case Node.DOCUMENT_NODE:
System.out.println(indent + "DOCUMENT_NODE");
break;
case Node.DOCUMENT_TYPE_NODE:
System.out.println(indent + "DOCUMENT_TYPE_NODE");
break;
case Node.ELEMENT_NODE:
System.out.println(indent + "ELEMENT_NODE");
break;
case Node.ENTITY_NODE:
System.out.println(indent + "ENTITY_NODE");
break;
case Node.ENTITY_REFERENCE_NODE:
System.out.println(indent + "ENTITY_REFERENCE_NODE");
break;
case Node.NOTATION_NODE:
System.out.println(indent + "NOTATION_NODE");
break;
case Node.PROCESSING_INSTRUCTION_NODE:
System.out.println(indent + "PROCESSING_INSTRUCTION_NODE");
break;
case Node.TEXT_NODE:
System.out.println(indent + "TEXT_NODE");
break;
default:
System.out.println(indent + "Unknown node");
break;
}
NodeList list = node.getChildNodes();
for(int i=0; i<list.getLength(); i++)
dumpLoop(list.item(i),indent + " ");
}
}
class MyErrorHandler implements ErrorHandler {
public void warning(SAXParseException e) throws SAXException {
show("Warning", e);
throw (e);
}
public void error(SAXParseException e) throws SAXException {
show("Error", e);
throw (e);
}
public void fatalError(SAXParseException e) throws SAXException {
show("Fatal Error", e);
throw (e);
}
private void show(String type, SAXParseException e) {
System.out.println(type + ": " + e.getMessage());
System.out.println("Line " + e.getLineNumber() + " Column "
+ e.getColumnNumber());
System.out.println("System ID: " + e.getSystemId());
}
}
错误处理程序
以下代码显示了如何在使用DOM解析器解析XML时处理错误。
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class DOMCheck {
static public void main(String[] arg) {
boolean validate = true;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setValidating(validate);
dbf.setNamespaceAware(true);
try {
DocumentBuilder builder = dbf.newDocumentBuilder();
builder.setErrorHandler(new MyErrorHandler());
InputSource is = new InputSource("person.xml");
Document doc = builder.parse(is);
} catch (SAXException e) {
System.out.println(e);
} catch (ParserConfigurationException e) {
System.err.println(e);
} catch (IOException e) {
System.err.println(e);
}
}
}
class MyErrorHandler implements ErrorHandler {
public void warning(SAXParseException e) throws SAXException {
show("Warning", e);
throw (e);
}
public void error(SAXParseException e) throws SAXException {
show("Error", e);
throw (e);
}
public void fatalError(SAXParseException e) throws SAXException {
show("Fatal Error", e);
throw (e);
}
private void show(String type, SAXParseException e) {
System.out.println(type + ": " + e.getMessage());
System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
System.out.println("System ID: " + e.getSystemId());
}
}
例3
以下代码显示了如何递归访问DOM树中的所有节点。
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class Main {
public static void main(String[] argv) throws Exception{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("file.xml"));
visit(doc, 0);
}
public static void visit(Node node, int level) {
NodeList list = node.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
Node childNode = list.item(i);
visit(childNode, level + 1);
}
}
}
例4
下面的代码显示了如何将XML片段转换为DOM片段。
import java.io.File;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
public class Main {
public static void main(String[] argv) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
Document doc = factory.newDocumentBuilder().parse(new File("infilename.xml"));
String fragment = "<fragment>aaa</fragment>";
factory = DocumentBuilderFactory.newInstance();
Document d = factory.newDocumentBuilder().parse(new InputSource(new StringReader(fragment)));
Node node = doc.importNode(d.getDocumentElement(), true);
DocumentFragment docfrag = doc.createDocumentFragment();
while (node.hasChildNodes()) {
docfrag.appendChild(node.removeChild(node.getFirstChild()));
}
Element element = doc.getDocumentElement();
element.appendChild(docfrag);
}
}
例5
下面的代码显示了如何解析XML字符串:使用DOM和StringReader。
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.CharacterData;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
public class Main {
public static void main(String arg[]) throws Exception{
String xmlRecords = "<data><employee><name>A</name>"
+ "<title>Manager</title></employee></data>";
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xmlRecords));
Document doc = db.parse(is);
NodeList nodes = doc.getElementsByTagName("employee");
for (int i = 0; i < nodes.getLength(); i++) {
Element element = (Element) nodes.item(i);
NodeList name = element.getElementsByTagName("name");
Element line = (Element) name.item(0);
System.out.println("Name: " + getCharacterDataFromElement(line));
NodeList title = element.getElementsByTagName("title");
line = (Element) title.item(0);
System.out.println("Title: " + getCharacterDataFromElement(line));
}
}
public static String getCharacterDataFromElement(Element e) {
Node child = e.getFirstChild();
if (child instanceof CharacterData) {
CharacterData cd = (CharacterData) child;
return cd.getData();
}
return "";
}
}
上面的代码生成以下结果。




















