python bs4 find return none -


i learning python web scraping,i practice on http://search.proquest.com/, site need purchase think universities have it.

from  urllib.request import urlopen  urllib.request import urlretrieve bs4 import beautifulsoup import urllib.error import http.cookiejar import requests import pymysql conn = pymysql.connect(host ='localhost',user = 'root',passwd = 'gaojia',db = 'mysql') cur = conn.cursor() # cur.execute("create database wsj") cur.execute("use wsj") # cur.execute("create table wsj.list (id int not null auto_increment, url varchar(255) not null, title varchar(1000) , abstract varchar(20000),created timestamp not null default current_timestamp, primary key (id))")  issues = set() # store url of desired issues articles = set() # store url of articles of given issue  def store(title,abstract,url):     cur.execute("insert pages (title,abstract,url) values (%s,%s,%s)",(title,abstract,url))     cur.connection.commit() # point monthly url   # session = requests.session() # r = requests.post(url, data=payload) # print (r.cookies)  try:         session = requests.session()         #url = 'http://search.proquest.com/publication.publicationissuebrowse:drilldown/month/%e5%85%ab%e6%9c%88/08/year/2016/parentmonth082016'         #payload = {'site':'news','t:ac':'publications_105983'}         url = 'http://search.proquest.com/publication.publicationissuebrowse:drilldown/month/%e5%85%ab%e6%9c%88/08/year/2016/parentmonth082016'         payload = {"site": "news","t:ac" : "publications_105983"}         headers = {'user-agent':'mozilla/5.0 (windows nt 6.1; wow64) applewebkit/537.36 (khtml, gecko) chrome/35.0.1916.153 safari/537.36 se 2.x metasr 1.0','accept':'text/javascript, text/html,application/xml, text/xml, */*',\         'accept-encoding':'gzip, deflate','accept-language':'zh-cn,zh;q=0.8','host':'search.proquest.com', 'content-type':'application/x-www-form-urlencoded; charset=utf-8', 'connection':'keep-alive','content-length':'0','origin':'http://search.proquest.com','referer':'http://search.proquest.com/news/publication/105983/citation/99d2c84d41804033pq/2?accountid=13818','x-prototype-version':'1.7','x-requested-with':'xmlhttprequest',\         'cookie':'availability-zone=us-east-1c; mwtbid=830706ae-9389-4bb4-812d-b597683b812e; _ga=ga1.2.1201070524.1446763952; fsr.r=%7b%22d%22%3a90%2c%22i%22%3a%22de07553-78769885-bcc1-4823-67c96%22%2c%22e%22%3a1467984529571%7d; fulltextshowall=yes; jsessionid=752eda8ba4c3b6791cde9abfeb2bb3a1.i-c1aa5150; authenticatedby=ip; os_vwo_country=cn; os_vwo_institution=13818; os_vwo_language=zho; os_vwo_my_research=false; os_vwo_referring_url="http://ourex.lib.sjtu.edu.cn/primo_library/libweb/action/display.do; os_vwo_requested_url="http://search.proquest.com/"; os_persistent="wrpztfjdrh0wiwt5czzs+cwlaauhjmhd++vls3rvx5e="; os_vwo_visitor_type=returning; awselb=c393a78d02ca3ee2799cf8894b23627240e8cace66375056e6d341d7da668019371e729bf574df4c7b461b13fcaae8a127ca655e3affda10d2742b23fd55f3b713f0a97e539c751ac7bd616c8d55def2cccf1762b2; onesearchtz=480; appversion=r20161.6.0.834.574; availability-zone=us-east-1c; _vwo_uuid_v2=0308785c38305f47209e7ec8811ac0a2|3ec2dd2ac5e7bfcc195a554e24406f22; ostimestamp=1472007458.508; wt_fpc=id=202.120.14.195-2899434048.30480412:lv=1471960659244:ss=1471959777563; fsr.s=%7b%22cp%22%3a%7b%22usage_session%22%3a%2220160824004842532%3a222017%22%2c%22cxreplayaws%22%3a%22true%22%2c%22error_page%22%3a%22no%22%2c%22no_results%22%3a%22no%22%2c%22my_research%22%3a%22no%22%2c%22advanced%22%3a%22no%22%2c%22professional%22%3a%22no%22%2c%22user_ip%22%3a%22202.120.19.182%22%2c%22session_id%22%3a%22752eda8ba4c3b6791cde9abfeb2bb3a1.i-c1aa5150%22%2c%22account_id%22%3a%2213818%22%7d%2c%22v1%22%3a-2%2c%22v2%22%3a-2%2c%22rid%22%3a%22de07553-78727480-20ae-90fb-1186c%22%2c%22ru%22%3a%22http%3a%2f%2fourex.lib.sjtu.edu.cn%2fprimo_library%2flibweb%2faction%2fdisplay.do%3bjsessionid%3d81b5c8f2b4e21e549adb7e9bac4c3c04%3ftabs%3ddetailstab%26ct%3ddisplay%26fn%3dsearch%26doc%3dsjtulibxw000061822%26indx%3d1%26recids%3dsjtulibxw000061822%26recidxs%3d0%26elementid%3d0%26rendermode%3dpoppedout%26displaymode%3dfull%26frbrversion%3d%26dscnt%3d0%26scp.scps%3dscope%253a%2528sjt%2529%252cscope%253a%2528sjtu_metadata%2529%252cscope%253a%2528sjtu_sfx%2529%252cscope%253a%2528sjtulibzw%2529%252cscope%253a%2528sjtulibxw%2529%252cduxiubook%26tab%3ddefault_tab%26dstmp%3d1471999665891%26vl(freetext0)%3dproquest%26vid%3dchinese%22%2c%22r%22%3a%22ourex.lib.sjtu.edu.cn%22%2c%22st%22%3a%22%22%2c%22to%22%3a5%2c%22pv%22%3a26%2c%22lc%22%3a%7b%22d0%22%3a%7b%22v%22%3a26%2c%22s%22%3atrue%7d%7d%2c%22cd%22%3a0%2c%22f%22%3a1472007439637%2c%22pn%22%3a0%2c%22sd%22%3a0%7d; _ga=ga1.3.1201070524.1446763952; _gat_ua-61126923-3=1'}         req = session.post(url,data = payload,headers = headers)         bs0bj = beautifulsoup(req.text,"html.parser")         try:             print (bs0bj.find(parentid ).get_text())#.encode("gb18030")         except attributeerror:             print ("attributes missing") except urllib.error.httperror reason:        print(reason) 

there result code , if use bs0bj.find("something").get_text(),it return none, there have lot of stuff in "bs0bj",what's wrong here


Comments

Popular posts from this blog

mysql - Dreamhost PyCharm Django Python 3 Launching a Site -

java - Sending SMS with SMSLib and Web Services -

java - How to resolve The method toString() in the type Object is not applicable for the arguments (InputStream) -