This post requires you to click the Likes button to read this content. http://a.pomf.se/pjmwvx.png """ OLX.ro scraper Gets name, phone no., Yahoo! & Skype addresses, where applicable http://a.pomf.se/pjmwvx.png """ import re import json import requests from bs4 import BeautifulSoup as b pages = 1 # How many pages should be scraped # Category URL, a.k.a. where to get the ads from catURL = "http://olx.ro/electronice-si-electrocasnice/laptop-calculator/" # Links to the Ajax requests ajaxNum = "http://olx.ro/ajax/misc/contact/phone/" ajaxYah = "http://olx.ro/ajax/misc/contact/communicator/" ajaxSky = "http://olx.ro/ajax/misc/contact/skype/" def getName(link): # Get the name from the ad page = requests.get(link) soup = b(page.text) match = soup.find(attrs={"class": "block color-5 brkword xx-large"}) name = re.search(">(.+)<", str(match)).group(1) return name def getPhoneNum(aID): # Get the phone number resp = requests.get("%s%s/" % (ajaxNum, aID)).text try: resp = json.loads(resp).get("value") except ValueError: return # No phone number if "span" in resp: # Multiple phone numbers nums = b(resp).find_all(text=True) for num in nums: if num != " ": return num else: return resp def getYahoo(aID): # Get the Yahoo! ID resp = requests.get("%s%s/" % (ajaxYah, aID)).text try: resp = json.loads(resp).get("value") except ValueError: return # No Yahoo! ID else: return resp def getSkype(aID): # Get the Skype ID resp = requests.get("%s%s/" % (ajaxSky, aID)).text try: resp = json.loads(resp).get("value") except ValueError: return # No Skype ID else: return resp def main(): for pageNum in range(1, pages+1): print("Page %d." % pageNum) page = requests.get(catURL + "?page=" + str(pageNum)) soup = b(page.text) links = soup.findAll(attrs={"class": "marginright5 link linkWithHash \ detailsLink"}) for a in links: aID = re.search('ID(.+)\.', a['href']).group(1) print("ID: %s" % aID) print("\tName: %s" % getName(a['href'])) if getPhoneNum(aID) != None: print("\tPhone: %s" % getPhoneNum(aID)) if getYahoo(aID) != None: print("\tYahoo: %s" % getYahoo(aID)) if getSkype(aID) != None: print("\tSkype: %s" % getSkype(aID)) if __name__ == "__main__": main() Tocmai scraper: https://rstforums.com/forum/98245-tocmai-ro-scraper-nume-oras-numar-telefon.rst