Jump to content
pr00f

OLX.ro Scraper (nume + nr. telefon + adrese yahoo/skype)

Recommended Posts

This post requires you to click the Likes button to read this content.

http://a.pomf.se/pjmwvx.png

"""
OLX.ro scraper
Gets name, phone no., Yahoo! & Skype addresses, where applicable
http://a.pomf.se/pjmwvx.png
"""

import re
import json
import requests
from bs4 import BeautifulSoup as b

pages = 1 # How many pages should be scraped

# Category URL, a.k.a. where to get the ads from
catURL = "http://olx.ro/electronice-si-electrocasnice/laptop-calculator/"

# Links to the Ajax requests
ajaxNum = "http://olx.ro/ajax/misc/contact/phone/"
ajaxYah = "http://olx.ro/ajax/misc/contact/communicator/"
ajaxSky = "http://olx.ro/ajax/misc/contact/skype/"


def getName(link):
# Get the name from the ad
page = requests.get(link)
soup = b(page.text)
match = soup.find(attrs={"class": "block color-5 brkword xx-large"})
name = re.search(">(.+)<", str(match)).group(1)
return name


def getPhoneNum(aID):
# Get the phone number
resp = requests.get("%s%s/" % (ajaxNum, aID)).text
try:
resp = json.loads(resp).get("value")
except ValueError:
return # No phone number
if "span" in resp: # Multiple phone numbers
nums = b(resp).find_all(text=True)
for num in nums:
if num != " ":
return num
else:
return resp


def getYahoo(aID):
# Get the Yahoo! ID
resp = requests.get("%s%s/" % (ajaxYah, aID)).text
try:
resp = json.loads(resp).get("value")
except ValueError:
return # No Yahoo! ID
else:
return resp


def getSkype(aID):
# Get the Skype ID
resp = requests.get("%s%s/" % (ajaxSky, aID)).text
try:
resp = json.loads(resp).get("value")
except ValueError:
return # No Skype ID
else:
return resp


def main():
for pageNum in range(1, pages+1):
print("Page %d." % pageNum)
page = requests.get(catURL + "?page=" + str(pageNum))
soup = b(page.text)

links = soup.findAll(attrs={"class":
"marginright5 link linkWithHash \
detailsLink"})

for a in links:
aID = re.search('ID(.+)\.', a['href']).group(1)
print("ID: %s" % aID)
print("\tName: %s" % getName(a['href']))
if getPhoneNum(aID) != None:
print("\tPhone: %s" % getPhoneNum(aID))
if getYahoo(aID) != None:
print("\tYahoo: %s" % getYahoo(aID))
if getSkype(aID) != None:
print("\tSkype: %s" % getSkype(aID))

if __name__ == "__main__":
main()

Tocmai scraper: https://rstforums.com/forum/98245-tocmai-ro-scraper-nume-oras-numar-telefon.rst

Edited by pr00f
  • Like 6
  • Upvote 2
Link to comment
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



×
×
  • Create New...