Download web page as string

This java example will show how to get the contents of a webpage programmatically as a string using straight up java, java 7, guava and apache commons IOUtils. A similar example shows how to read the content of a webpage using groovy

Straight up Java

@Test
public void web_page_contents_java() throws IOException {

    URL getUrlContent = new URL("http://www.example.com/");
    BufferedReader in = new BufferedReader(new InputStreamReader(
            getUrlContent.openStream()));

    String webpageAsString;
    while ((webpageAsString = in.readLine()) != null)
        System.out.println(webpageAsString);
    in.close();

    logger.info(webpageAsString);

    assertNotNull(webpageAsString);
}

Java 7 File I/O

@Test
public void web_page_contents_java7() throws IOException {

    URL url = new URL("http://www.example.com/");

    String readUrlContents;
    try (InputStreamReader reader = new InputStreamReader(url.openStream(),
            Charsets.UTF_8)) {
        readUrlContents = CharStreams.toString(reader);
    }

    logger.info(readUrlContents);

    assertNotNull(readUrlContents);
}

Google Guava

@Test
public void web_page_contents_guava() throws IOException {

    URL url = new URL("http://www.example.com/");

    String downloadWebpage = Resources.asByteSource(url)
            .asCharSource(Charsets.UTF_8).read();

    logger.info(downloadWebpage);

    assertNotNull(downloadWebpage);
}

Apache Commons

@Test
public void web_page_contents_apache() throws IOException,
        URISyntaxException {

    URL url = new URL("http://www.example.com/");
    String pageContents = IOUtils.toString(url, Charsets.UTF_8);

    logger.info(pageContents);

    assertNotNull(pageContents);

    // or

    URI uri = new URI("http://www.example.com/");
    String siteContents = IOUtils.toString(uri, Charsets.UTF_8);

    logger.info(siteContents);

    assertNotNull(siteContents);
}