| 注册
请输入搜索内容

热门搜索

Java Linux MySQL PHP JavaScript Hibernate jQuery Nginx
admin
10年前发布

利用htmlunit下载网页上的文件

import java.io.FileOutputStream;  import java.io.InputStream;  import java.util.regex.Matcher;  import java.util.regex.Pattern;     import org.apache.commons.io.IOUtils;     import com.gargoylesoftware.htmlunit.Page;  import com.gargoylesoftware.htmlunit.WebClient;     public class DownloadFile {      public static void main(String[] args) throws Exception {          String baseUrl = "<a href="http://hanyu.iciba.com/hanzi/1.shtml";" target="_blank">http://hanyu.iciba.com/hanzi/1.shtml";</a>          String bihuaRegex = "class=\"guanggao\"[^<]*<[^<]*<param\\s*name=\"movie\"\\s*value=\"([^\"]*)";          String aSoundRegex = "class=\"js12\">ā.*?name=\"FlashVars\"\\s*value=\"f=([^\"]*)";          String eSoundRegex = "class=\"js12\">ē.*?name=\"FlashVars\"\\s*value=\"f=([^\"]*)";          WebClient client = new WebClient();          client.getOptions().setCssEnabled(false);          client.getOptions().setJavaScriptEnabled(false);          client.getOptions().setThrowExceptionOnFailingStatusCode(false);          client.getOptions().setThrowExceptionOnScriptError(false);          Page page = client.getPage(baseUrl);          String source = page.getWebResponse().getContentAsString();          Matcher mBihuan = Regex(source, bihuaRegex);          Matcher mA = Regex(source, aSoundRegex);          Matcher mE = Regex(source, eSoundRegex);          while(mBihuan.find()) {              String url = "<a href="http://hanyu.iciba.com/" + mBihuan.group" target="_blank">http://hanyu.iciba.com/" + mBihuan.group</a>(1);              page = client.getPage(url);              saveFile(page, "d:/testDownload/bihua.swf");          }          while(mA.find()) {              String url = mA.group(1);              page = client.getPage(url);              saveFile(page, "d:/testDownload/a.mp3");          }          while(mE.find()) {              String url = mE.group(1);              page = client.getPage(url);              saveFile(page, "d:/testDownload/e.mp3");          }      }             public static Matcher Regex(String source, String regex) {          Pattern p = Pattern.compile(regex, Pattern.DOTALL);          return p.matcher(source);      }             public static void saveFile(Page page, String file) throws Exception {          InputStream is = page.getWebResponse().getContentAsStream();          FileOutputStream output = new FileOutputStream(file);          IOUtils.copy(is, output);          output.close();      }  }