import java.io.IOException;
import java.net.MalformedURLException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequestSettings;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.util.FalsifyingWebConnection;
import com.gargoylesoftware.htmlunit.xml.XmlPage;
public class HtmlUnitParse {
/**
* @param args
*/
public static void main(String[] args) {
Map<String,String> map=new LinkedHashMap<String, String>();
final WebClient webClient = new WebClient();
try {
webClient.setWebConnection(new FalsifyingWebConnection(webClient) {
@Override
public WebResponse getResponse(WebRequestSettings settings) throws IOException {
WebResponse response = (WebResponse) super.getResponse(settings);
response = createWebResponse(settings, response.getContentAsString(), "text/xml");
return response;
}
});
final XmlPage page = webClient.getPage("http://localhost/rss.xml");
List <Node> nodes=(List<Node>) page.getByXPath("//item");
for(int i=0;i<nodes.size();i++){
Node n=nodes.get(i);
NodeList nlist=n.getChildNodes();
String title=null;
String url=null;
for(int j=0;j<nlist.getLength();j++){
Node child=nlist.item(j);
if(child.getNodeName().equals("title")){
title=child.getTextContent().trim();
}else if(child.getNodeName().equals("link")){
url=child.getTextContent().trim();
}
if(title!=null && url!=null){
map.put(title, url);
}
}
}
} catch (FailingHttpStatusCodeException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
//print linked hashlist
for(String title:map.keySet()){
System.out.println(title+","+map.get(title));
}
}
}