java|java正则表达式实战(获取网站中显示的超链接)

【java|java正则表达式实战(获取网站中显示的超链接)】有些网站反爬,可能会用不了,这个的主要目的是为了练习正则表达式

import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * FileName: WebSpider * Author:Jerry * Date:2020/4/24 16:40 * Description: 爬虫 */public class WebSpider {public staticString getURLContent(String urlStr,Charset charset){ StringBuilder sb = new StringBuilder(); try{ URL url = new URL(urlStr); BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream(), charset)); String temp=""; while((temp=reader.readLine())!=null){ sb.append(temp).append("\n"); } } catch (IOException e) { e.printStackTrace(); } return sb.toString(); }public static List> getMatherSubstrs(String destStr,String regexStr){ Pattern p = Pattern.compile(regexStr); List> result=new ArrayList<>(); Matcher m = p.matcher(destStr); while(m.find()){ result.add(m.group(1)); } return result; }public static void main(String[]args){String urlStr="https://www.163.com/"; String destStr = getURLContent(urlStr,Charset.forName("gbk")); //查看网站源代码,开始会有相关说明List> list=getMatherSubstrs(destStr,"href=https://www.it610.com/"(http[\\w\\s./:]+?)\""); for(String string:list){ System.out.println(string); }}}

    推荐阅读