This java example will show how to get the contents of a webpage programmatically as a string using straight up java, java 7, guava and apache commons IOUtils. A similar example shows how to read the content of a webpage using groovy
Straight up Java
@Test
public void web_page_contents_java() throws IOException {
URL getUrlContent = new URL("http://www.example.com/");
BufferedReader in = new BufferedReader(new InputStreamReader(
getUrlContent.openStream()));
String webpageAsString;
while ((webpageAsString = in.readLine()) != null)
System.out.println(webpageAsString);
in.close();
logger.info(webpageAsString);
assertNotNull(webpageAsString);
}
Java 7 File I/O
@Test
public void web_page_contents_java7() throws IOException {
URL url = new URL("http://www.example.com/");
String readUrlContents;
try (InputStreamReader reader = new InputStreamReader(url.openStream(),
Charsets.UTF_8)) {
readUrlContents = CharStreams.toString(reader);
}
logger.info(readUrlContents);
assertNotNull(readUrlContents);
}
Google Guava
@Test
public void web_page_contents_guava() throws IOException {
URL url = new URL("http://www.example.com/");
String downloadWebpage = Resources.asByteSource(url)
.asCharSource(Charsets.UTF_8).read();
logger.info(downloadWebpage);
assertNotNull(downloadWebpage);
}
Apache Commons
@Test
public void web_page_contents_apache() throws IOException,
URISyntaxException {
URL url = new URL("http://www.example.com/");
String pageContents = IOUtils.toString(url, Charsets.UTF_8);
logger.info(pageContents);
assertNotNull(pageContents);
// or
URI uri = new URI("http://www.example.com/");
String siteContents = IOUtils.toString(uri, Charsets.UTF_8);
logger.info(siteContents);
assertNotNull(siteContents);
}