Jump to content

Leaderboard

Popular Content

Showing content with the highest reputation on 03/05/15 in all areas

  1. This post does not require you to click the Likes button to read this content. http://a.pomf.se/usqyao.png """ tocmai.ro scraper Gets name, city, phone no. http://a.pomf.se/usqyao.png """ import re import json import requests from bs4 import BeautifulSoup as b pages = 1 catURL = "http://www.tocmai.ro/anunturi/electronice-si-electrocasnice/" ajaxNum = "http://www.tocmai.ro/ajax_ad/call/%s/1/%s/" def getName(link): # Gets the name from the ad soup = b(requests.get(link).text) name = soup.find("a", attrs={"class": "name"}).text return name def getLoc(link): # Gets the city soup = b(requests.get(link).text) loc = soup.find("small", attrs={"itemprop": "itemLocalitate"}).text return loc def getPhoneNum(link, aID): # Gets the phone number soup = b(requests.get(link).text) try: pHash = re.search("Ad\.phone\.show.*'(.+)'", str(soup)).group(1) except AttributeError: return None else: resp = requests.get(ajaxNum % (aID, pHash)).text num = json.loads(resp).get("img") return num def main(): for pageNum in range(0, pages): print("Page %d\n" % (pageNum+1,)) page = requests.get(catURL + "incepedela-" + str(pageNum*20)) soup = b(page.text) links = soup.findAll("a", attrs={"class": "record_title"}) for item in links: url = item['href'] aID = re.search(".*-(\d+)\.html", url).group(1) print("%s" % aID) print("\tName: %s" % (getName(url),)) print("\tCity: %s" % (getLoc(url),)) if getPhoneNum(url, aID) != None: print("\tPhone: %s" % getPhoneNum(url, aID)) if __name__ == "__main__": main() OLX scraper: https://rstforums.com/forum/97868-olx-ro-scraper-nume-nr-telefon-adrese-yahoo-skype.rst
    0 points
  2. lifedj97@yahoo.it - Merci
    -1 points
  3. Salut si bine ai venit! ( corecteaza prezentarea )
    -1 points
×
×
  • Create New...