python通过正则获取网页上的全部链接
import re, urllib htmlSource = urllib.urlopen("http://www.sharejs.com").read(200000) linksList = re.findall('<a href=(.*?)>.*?</a>',htmlSource) for link in linksList: print link
.*?',htmlSource) for link in linksList: print link">
import re, urllib htmlSource = urllib.urlopen("http://www.sharejs.com").read(200000) linksList = re.findall('<a href=(.*?)>.*?</a>',htmlSource) for link in linksList: print link