1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
import requests from lxml import etree
se = requests.session() headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0" }
class HBK(): def __init__(self): self.login_url = "http://www.heibanke.com/accounts/login" self.username = "somebody" self.password = "201627"
def getCsrf(self): res = se.get(url=self.login_url,headers=headers,timeout=30).text tree = etree.HTML(res) self.csrf = tree.xpath('/html/body/div/div/div[2]/form/input[@name="csrfmiddlewaretoken"]/@value')[0]
def login(self): self.getCsrf() data = { "csrfmiddlewaretoken":self.csrf, "username":self.username, "password":self.password } se.post(url=self.login_url,headers=headers,data=data,timeout=30) print ('登陆成功')
print ('开始闯关 - 第三关') spider = HBK() spider.login()
url = 'http://www.heibanke.com/lesson/crawler_ex02/' res = se.get(url,headers=headers,timeout=30).text tree = etree.HTML(res)
csrf = tree.xpath('/html/body/div/div/div[2]/form/input[@name="csrfmiddlewaretoken"]/@value')[0]
def guess(num=1): print ('guess',num) data = { "csrfmiddlewaretoken":csrf, "username":"somebody", "password":str(num) } res = se.post(url,headers=headers,data=data,timeout=30).text tree = etree.HTML(res) h3 = tree.xpath('/html/body/div/div/div[2]/h3/text()')[0] if not u'错误' in h3: print ('猜测到正确的密码为%d'%num) return num else: guess(num+1)
guess() print ('success')
|