paxnWo Posted August 13, 2010 Report Posted August 13, 2010 (edited) E scris cu picioarele, n-am mai lucrat la el, trebuia sa fie asynchronous, optional ii setai tu smtpuri, trebuia sa trimita mail cum extragea unul, trebuia sa scot ce e inutil de prin el, dar isi face treaba si daca aveti host se misca repede. Daca vreti lucrati pe el, va dati seama singur ce face, ce afiseaza, cum lucreaza. <?phpset_time_limit(0); function fread_url($url,$ref="") { if(function_exists("curl_init")){ $ch = curl_init(); $user_agent = "Mozilla/4.0 (compatible; MSIE 5.01; ". "Windows NT 5.0)"; $ch = curl_init(); curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); curl_setopt( $ch, CURLOPT_HTTPGET, 1 ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 ); curl_setopt( $ch, CURLOPT_URL, $url ); curl_setopt( $ch, CURLOPT_REFERER, $ref ); curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt'); $html = curl_exec($ch); curl_close($ch); } else{ $hfile = fopen($url,"r"); if($hfile){ while(!feof($hfile)){ $html.=fgets($hfile,1024); } } } return $html; }function getEmail($link){ $array3 = array();$var = fread_url($link);$res = preg_match_all( "/[a-z0-9]+([_\\.-][a-z0-9]+)*@([a-z0-9]+([\.-][a-z0-9]+)*)+\\.[a-z]{2,}/i", $var, $matches );$matches = $matches[0]; $listx = array(); foreach($matches as $var) { $array3[] = $var; }return array_unique($array3); }if (isset ($_POST['submit'])) {$start = time();$index = $_POST['index'];$stat = $_POST['stat'];$nr = strlen($index);if($nr == 3) $nrr = $index / 100;else if($nr == 4) $nrr = $index / 1000;$array1 = array(); $array2 = array(); $array3 = array(); for($i=1; $i<=$nrr; $i++) { if($nr == 3) $url = "http://".$stat.".craigslist.org/cta/index".$i."00".".html"; else if($nr == 4) $url = "http://".$stat.".craigslist.org/cta/index".$i."000".".html"; $var = fread_url($url); preg_match_all ("/a[\s]+[^>]*?href[\s]?=[\s\"\']+". "(.*?)[\"\']+.*?>"."([^<]+|.*?)?<\/a>/", $var, &$matches); $matches = $matches[1]; $list = array(); foreach($matches as $var) { $findme = $stat; $pos = strpos($var, $findme); if ($pos !== false) { //$a++; $array1[] = $var; //print($var."<br>"); } } }//echo "Intrari in celelalte indexuri : ".count($array1);////////////////////////////$urlx = "http://".$stat.".craigslist.org/cta/index.html"; $var = fread_url($url); preg_match_all ("/a[\s]+[^>]*?href[\s]?=[\s\"\']+". "(.*?)[\"\']+.*?>"."([^<]+|.*?)?<\/a>/", $var, &$matches); $matches = $matches[1]; $list = array(); foreach($matches as $var) { $findme = $stat; $pos = strpos($var, $findme); if ($pos !== false) { //$b++; $array2[] = $var; //print($var."<br>"); } }//echo "Intrari in primul index : ".count($array2);//echo "<br>"; echo $a + $b; echo "<br>";//echo count(array_values(array_unique(array_merge($array1, $array2))));echo "<br>";//echo "-----------------------------------------"; $unq = array_unique(array_merge($array1, $array2)); foreach($unq as $linkID => $link) {//if($x == 10) break; $x++;$mailuri = getEmail($link);foreach($mailuri as $linkIDx => $mails) {$c++;echo $mails."<br>";ob_flush();flush();}ob_end_flush();//echo $link."<br />"; }echo "Total linkuri sortate : ".$x."<br>";echo "Total mailuri : ".$c."<br>";echo "Din : ".$stat."<br>";echo "<FORM><INPUT type=button value=' Back ' onClick='history.back();'></FORM>";//////////////////////////////$end = time();echo "<br>";echo "Timp :"; echo $end - $start; }else { echo '<center><h4>Pax Craigawler</h4>'; echo '<form method="POST" action="'; echo htmlentities($_SERVER['PHP_SELF']); echo '" enctype="multipart/form-data"> Index:<input type="text" name="index"> Stat:<input type="text" name="stat"> <input type="submit" name="submit" value="Crawl !"> </form>'; echo '</center>'; }?> Edited August 13, 2010 by paxnWo Quote
napoletanii Posted October 24, 2010 Report Posted October 24, 2010 (permite-mi sa fac un post aiurea) Quote