前言:整理些,以前写的有用的Java编写的小程序。这篇是一个简单的网络爬虫获取Email地址。
package com.lianggzone.demo; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 网络爬虫获取Email地址 * @author LiangGzone */ public class EmailSpider { public static void main(String[] args){ URL url = null; String regex = "[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"; String tempStr = null; Pattern pattern = Pattern.compile(regex); Matcher matcher = null; try { // 获取URL地址 url = new URL("http://zhidao.baidu.com/question/92607106.html"); // 连接URL地址 HttpURLConnection urlConnection = (HttpURLConnection)url.openConnection(); urlConnection.connect(); // 定义输入流 InputStream in = urlConnection.getInputStream(); byte[] buf = new byte[4096]; while (in.read(buf) > 0) { tempStr = new String(buf); matcher = pattern.matcher(tempStr); while(matcher.find()) { System.out.println(matcher.group()); } } }catch (IOException e) { e.printStackTrace(); } } }备注:这里木有链接数据库,不是GUI界面,然后咧,修改url = new URL("http://zhidao.baidu.com/question/92607106.html");改成你想要的地址即可。
转自:CSDN
作者:LiangGzone
链接:http://blog.csdn.net/lianggzone/article/details/8871437