Jump to content
UnixDevel

Php Scrapper anuntul.ro

Recommended Posts

<?
$url="http://m.anuntul.ro/anunturi/19";
include_once'simple_html_dom.php';
function extrage_info($url){
$ch = curl_init();
$timeout = 30;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
curl_setopt($ch,CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1');
curl_setopt($ch,CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
$html = str_get_html($data);
foreach($html->find('div.txt') as $article) {
$item2['title'] = $article->find('a', 0)->plaintext;
/* $alabama= $article->find('p');
$item2['continut'] = $alabama[0]; */
$text[]=$item2;
}
return $text[0]['title'];
$html->clear();
unset($html);
}
function get_data($url){
$ch = curl_init();
$timeout = 30;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
curl_setopt($ch,CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1');
curl_setopt($ch,CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
$html = str_get_html($data);
foreach($html->find('div.of') as $article) {
$item['title'] = $article->find('div.of_tit', 0)->plaintext;
$item['intro'] = $article->find('div.flr', 0)->plaintext;
$lk=$article->find('div.of_end', 0)->innertext;
$ex=explode('<a href="/anunt/',$lk);
$ex2=explode('/',$ex[1]);
$item['lk']=$ex2[0];
$item['continut']=extrage_info('http://m.anuntul.ro/anunt/'.$ex2[0].'');
$articles[] = $item;
}

foreach($articles as $articole){
print_r($articole);
}
$html->clear();
unset($html);
}









$urls=array('http://m.anuntul.ro/anunturi/19&ord=date&sor=asc&p=1',
'http://m.anuntul.ro/anunturi/19&ord=date&sor=asc&p=2',
'http://m.anuntul.ro/anunturi/19&ord=date&sor=asc&p=3',
'http://m.anuntul.ro/anunturi/19&ord=date&sor=asc&p=4',
'http://m.anuntul.ro/anunturi/19&ord=date&sor=asc&p=5',

);

foreach($urls as $url){
$continut=get_data($url);
echo $continut;
}

?>

un mic php scrrapper facut de mine , sper sa va fie de folos ,foloseste simple_html_doom ce poate fi luat de pe sourceforge ...

aplicatia merge combinata cu scrappy php class

Link to comment
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



×
×
  • Create New...