Jump to content
pr00f

Tocmai.ro scraper (nume, oras, numar telefon)

Recommended Posts

This post does not require you to click the Likes button to read this content.

http://a.pomf.se/usqyao.png

"""
tocmai.ro scraper
Gets name, city, phone no.
http://a.pomf.se/usqyao.png
"""

import re
import json
import requests
from bs4 import BeautifulSoup as b

pages = 1

catURL = "http://www.tocmai.ro/anunturi/electronice-si-electrocasnice/"

ajaxNum = "http://www.tocmai.ro/ajax_ad/call/%s/1/%s/"


def getName(link):
# Gets the name from the ad
soup = b(requests.get(link).text)
name = soup.find("a", attrs={"class": "name"}).text
return name


def getLoc(link):
# Gets the city
soup = b(requests.get(link).text)
loc = soup.find("small", attrs={"itemprop": "itemLocalitate"}).text
return loc


def getPhoneNum(link, aID):
# Gets the phone number
soup = b(requests.get(link).text)
try:
pHash = re.search("Ad\.phone\.show.*'(.+)'", str(soup)).group(1)
except AttributeError:
return None
else:
resp = requests.get(ajaxNum % (aID, pHash)).text
num = json.loads(resp).get("img")
return num


def main():
for pageNum in range(0, pages):
print("Page %d\n" % (pageNum+1,))
page = requests.get(catURL + "incepedela-" + str(pageNum*20))
soup = b(page.text)

links = soup.findAll("a", attrs={"class": "record_title"})

for item in links:
url = item['href']
aID = re.search(".*-(\d+)\.html", url).group(1)
print("%s" % aID)
print("\tName: %s" % (getName(url),))
print("\tCity: %s" % (getLoc(url),))
if getPhoneNum(url, aID) != None:
print("\tPhone: %s" % getPhoneNum(url, aID))

if __name__ == "__main__":
main()

OLX scraper: https://rstforums.com/forum/97868-olx-ro-scraper-nume-nr-telefon-adrese-yahoo-skype.rst

Edited by pr00f
  • Upvote 1
  • Downvote 1
Link to comment
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



×
×
  • Create New...