网络数据
import urllib.request
r=urllib.request.urlopen("http://www.njcie.com")
page=r.read()
page=page.decode('UTF-8-sig')
print(page)
抓取并剥离其中的论文链接
import urllib.request
import re
r = urllib.request.urlopen("http://www.njcie.com")
page = r.read()
page=page.decode('utf-8')
results = re.findall("href=\"papers/.+\"", page)
for result in results:
result=result.replace("href=\"papers/","http://www.njcie.com/papers/")
result=result.rstrip("\"")
print(result)