web scraping - Using python to scrape contents of jsp webpage -
using python , requests library, have list of zip codes, , these compile list of nearby cvs store addresses each. can extract address field without problem, cannot dynamically generate next page since there no "&zip=77098" (or equivalent) in url. each time visit page seemingly random "requestid" value.
http://www.cvs.com/store-locator/store-locator-landing.jsp?_requestid=1003175
if copy link , paste in browser routes me default cvs location. there way send zip code in url or otherwise dynamically set location search for?
this (not working) code 1 zip code. returns "default" locations, not locations specific zip in in header:
data = {"search":"77098"} urlx = 'http://www.cvs.com/store-locator/store-locator-landing.jsp' cookies = requests.get(urlx).cookies rx = requests.post(urlx, cookies=cookies,data=data, headers={'user-agent':'mozilla/5.0 (windows nt 6.1) applewebkit/537.36 (khtml, gecko) chrome/41.0.2228.0 safari/537.36'}) soupx = beautifulsoup(rx.content, "lxml-xml") addresslist = soupx.findall("div", { "class" : "address-wrap" }) distancelist = soupx.findall("span", { "class" : "store-miles" })
there quite bit whole going on, first need coordinates zip code enter can use them later post url gives search results:
urlx = 'http://www.cvs.com/store-locator/store-locator-landing.jsp' headers = { 'user-agent': 'mozilla/5.0 (windows nt 6.1) applewebkit/537.36 (khtml, gecko) chrome/41.0.2228.0 safari/537.36'} # params request coordinates later post. coord_params = { "output": "json", "key": "akezkydo-i6crmr6nw9y0ce_72t-osa8swddbgvfmsrkl47fvwqopjbrgw_on5aq", "$filter": "cvs_store_flag eq 'y'"} # provides coordinates. coords_url = "https://dev.virtualearth.net/rest/v1/locations" # post actual results url. post = "https://www.cvs.com/rest/bean/cvs/store/cvsstorelocatorservices/getsearchstore" zipcode = "77098" # template pass each zip in actual loop. template = "{zipcode},us" requests.session() s: s.get(urlx) # add query param passing in each zipcode coord_params["query"] = template.format(zipcode=zipcode) js = s.get(coords_url, params=coord_params).json() # parse latitude , longitude returned json. latitude, longitude =(js[u'resourcesets'][0][u'resources'][0]["point"][u'coordinates']) # search results. results = s.post(post, data={"latitude": latitude, "longitude":longitude}).json() pprint import pprint pp pp(results)
output:
{u'atgresponse': {u'sc': u'00', u'sd': [{u'ad': u'2111 w. alabama street', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'0.35', u'id': u'10554', u'nv': u'i', u'nz': u'', u'ph': u'713-874-1085', u'sn': u'myweeklyadstore10554', u'st': u'tx', u'wf': u'y', u'zp': u'77098'}, {u'ad': u'4700 kirby drive', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'0.43', u'id': u'5685', u'nv': u'i', u'nz': u'', u'ph': u'713-533-2200', u'sn': u'myweeklyadstore5685', u'st': u'tx', u'wf': u'y', u'zp': u'77098'}, {u'ad': u'2910 westheimer road', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'0.69', u'id': u'7126', u'nv': u'i', u'nz': u'', u'ph': u'713-526-0062', u'sn': u'myweeklyadstore7126', u'st': u'tx', u'wf': u'y', u'zp': u'77098'}, {u'ad': u'6011 kirby drive', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'1.17', u'id': u'7479', u'nv': u'i', u'nz': u'', u'ph': u'713-522-3983', u'sn': u'myweeklyadstore7479', u'st': u'tx', u'wf': u'y', u'zp': u'77005'}, {u'ad': u'1003 richmond avenue', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'1.41', u'id': u'7162', u'nv': u'i', u'nz': u'', u'ph': u'713-807-8491', u'sn': u'myweeklyadstore7162', u'st': u'tx', u'wf': u'y', u'zp': u'77006'}, {u'ad': u'1001 waugh drive, corner of dallas avenue', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'1.87', u'id': u'5840', u'nv': u'i', u'nz': u'', u'ph': u'713-807-7859', u'sn': u'myweeklyadstore5840', u'st': u'tx', u'wf': u'y', u'zp': u'77019'}, {u'ad': u'2266 west holcombe boulevard', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'1.93', u'id': u'5833', u'nv': u'i', u'nz': u'', u'ph': u'713-218-2180', u'sn': u'myweeklyadstore5833', u'st': u'tx', u'wf': u'y', u'zp': u'77030'}, {u'ad': u'4323 san felipe st', u'ci': u'houston', u'cv': u'', u'cz': u'', u'dt': u'2.23', u'id': u'16368', u'nv': u'', u'nz': u'', u'ph': u'713-331-0166', u'sn': u'myweeklyadstore16368', u'st': u'tx', u'wf': u'n', u'zp': u'77027'}, {u'ad': u'1000 elgin street', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'2.32', u'id': u'784', u'nv': u'i', u'nz': u'', u'ph': u'713-526-4478', u'sn': u'myweeklyadstore784', u'st': u'tx', u'wf': u'y', u'zp': u'77004'}, {u'ad': u'5401 washington avenue', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'2.46', u'id': u'7476', u'nv': u'i', u'nz': u'', u'ph': u'713-861-3883', u'sn': u'myweeklyadstore7476', u'st': u'tx', u'wf': u'y', u'zp': u'77007'}, {u'ad': u'3939 bellaire boulevard', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'2.54', u'id': u'6240', u'nv': u'i', u'nz': u'', u'ph': u'832-778-9025', u'sn': u'myweeklyadstore6240', u'st': u'tx', u'wf': u'y', u'zp': u'77025'}, {u'ad': u'4755 westhiemer road', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'2.58', u'id': u'2988', u'nv': u'i', u'nz': u'', u'ph': u'713-386-1091', u'sn': u'myweeklyadstore2988', u'st': u'tx', u'wf': u'y', u'zp': u'77027'}, {u'ad': u'402 gray street', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'2.59', u'id': u'5968', u'nv': u'i', u'nz': u'', u'ph': u'713-982-5527', u'sn': u'myweeklyadstore5968', u'st': u'tx', u'wf': u'y', u'zp': u'77002'}, {u'ad': u'7900 south main', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'2.77', u'id': u'7402', u'nv': u'i', u'nz': u'', u'ph': u'713-660-8934', u'sn': u'myweeklyadstore7402', u'st': u'tx', u'wf': u'y', u'zp': u'77030'}, {u'ad': u'8500 main st', u'ci': u'houston', u'cv': u'', u'cz': u'', u'dt': u'2.9', u'id': u'16676', u'nv': u'', u'nz': u'', u'ph': u'713-661-8213', u'sn': u'myweeklyadstore16676', u'st': u'tx', u'wf': u'n', u'zp': u'77025'}, {u'ad': u'5204 richmond avenue', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'3.14', u'id': u'3177', u'nv': u'i', u'nz': u'', u'ph': u'713-961-0874', u'sn': u'myweeklyadstore3177', u'st': u'tx', u'wf': u'y', u'zp': u'77056'}, {u'ad': u'2580 shearn st', u'ci': u'houston', u'cv': u'', u'cz': u'', u'dt': u'3.26', u'id': u'17181', u'nv': u'', u'nz': u'', u'ph': u'713-331-0377', u'sn': u'myweeklyadstore17181', u'st': u'tx', u'wf': u'n', u'zp': u'77007'}, {u'ad': u'5402 westheimer road', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'3.39', u'id': u'7753', u'nv': u'i', u'nz': u'', u'ph': u'713-877-1479', u'sn': u'myweeklyadstore7753', u'st': u'tx', u'wf': u'y', u'zp': u'77056'}, {u'ad': u'917 main street', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'3.4', u'id': u'6834', u'nv': u'i', u'nz': u'', u'ph': u'713-982-5565', u'sn': u'myweeklyadstore6834', u'st': u'tx', u'wf': u'y', u'zp': u'77002'}, {u'ad': u'9838 buffalo speedway', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'3.9', u'id': u'5258', u'nv': u'i', u'nz': u'', u'ph': u'713-218-4491', u'sn': u'myweeklyadstore5258', u'st': u'tx', u'wf': u'y', u'zp': u'77025'}, {u'ad': u'3811 old spanish trail', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'3.95', u'id': u'7198', u'nv': u'i', u'nz': u'', u'ph': u'713-741-7900', u'sn': u'myweeklyadstore7198', u'st': u'tx', u'wf': u'y', u'zp': u'77021'}, {u'ad': u'5430 bissonnet street, corner of chimney rock, corner of chimney rock', u'ci': u'bellaire', u'cv': u'i', u'cz': u'', u'dt': u'4.2', u'id': u'5273', u'nv': u'i', u'nz': u'', u'ph': u'713-218-2291', u'sn': u'myweeklyadstore5273', u'st': u'tx', u'wf': u'y', u'zp': u'77401'}, {u'ad': u'300 meyerland plaza mall', u'ci': u'houston', u'cv': u'', u'cz': u'', u'dt': u'4.33', u'id': u'17091', u'nv': u'', u'nz': u'', u'ph': u'713-292-0031', u'sn': u'myweeklyadstore17091', u'st': u'tx', u'wf': u'n', u'zp': u'77096'}, {u'ad': u'110 west 20th street', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'4.87', u'id': u'7092', u'nv': u'i', u'nz': u'', u'ph': u'832-673-7131', u'sn': u'myweeklyadstore7092', u'st': u'tx', u'wf': u'y', u'zp': u'77008'}, {u'ad': u'6545 westheimer road', u'ci': u'houston', u'cv': u'i', u'cz': u'', u'dt': u'5.12', u'id': u'5895', u'nv': u'i', u'nz': u'', u'ph': u'713-243-8050', u'sn': u'myweeklyadstore5895', u'st': u'tx', u'wf': u'y', u'zp': u'77057'}]}}
there call https://dev.virtualearth.net/rest/v1/locations bing maps microsoft api, suggest setup account , create own application allow key, took literally 2 minutes me it. far know free limit 30k requests per day should more enough.
Comments
Post a Comment