Friday, April 11, 2008

Retrieving website content with Java

A simple program to retrieve any website content to a file.


import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

public class WebContent
{
URL url = null;
URLConnection urlConn = null;
BufferedReader br = null;

public WebContent(String urlStr)
{
try
{

if (urlStr != null)
{
url = new URL(urlStr);
urlConn = url.openConnection();
br = new BufferedReader(new InputStreamReader(urlConn.getInputStream()));
String line = null;
StringBuffer sbf = new StringBuffer();
File outFile = new File("out.txt");
FileWriter fw = new FileWriter(outFile);
while ((line = br.readLine()) != null)
{
fw.write(line);
fw.write("\n");

}
}
}
catch (MalformedURLException mue)
{
System.out.println("mue = " + mue.getMessage());
}
catch (IOException ioe)
{
System.out.println("ioe = " + ioe.getMessage());
}
finally
{
if (br != null)
{
try
{
br.close();
}
catch (IOException ioe)
{

}
}
}
}
public static void main(String args[])
{

try
{

WebContent webObj = new WebContent("http://www.yahoo.com");
}
catch (Exception e)
{
e.printStackTrace();
}
}

}

No comments: