Jump to content

Recommended Posts

pentru a extrage pana la ultima pagina poti proceda asa :


Delegate Sub SetTextDelegate(ByVal sText As String, ByVal c As Color)
Private SetText As New SetTextDelegate(AddressOf SetTextBoxText)
Private Sub SetTextBoxText(ByVal sText As String, ByVal color As Color)
With Me.RichTextBox1
.SelectionStart = .TextLength
.SelectionColor = color
.AppendText(sText & Environment.NewLine)
.SelectionColor = .ForeColor
End With
RichTextBox1.ScrollToCaret()
End Sub

1. functia


Public Function GBA_GOOGLE(ByRef strSource As String, ByRef strStart As String, ByRef strEnd As String, Optional ByRef startPos As Integer = 0) As List(Of String)
Dim iPos As Integer, iEnd As Integer, strResult As String, lenStart As Integer = strStart.Length
Dim L As New List(Of String)
Do Until iPos = -1
strResult = String.Empty
iPos = strSource.IndexOf(strStart, startPos)
iEnd = strSource.IndexOf(strEnd, iPos + lenStart)
If iPos <> -1 AndAlso iEnd <> -1 Then
strResult = strSource.Substring(iPos + lenStart, iEnd - (iPos + lenStart))
L.Add(strResult)
startPos = iPos + lenStart
End If
Loop
Return L
End Function

2. functia

Private Function GetBetween(ByVal sSearch As String, ByVal sStart As  String, ByVal sStop As String, Optional ByVal lSearch As Integer = 1) As  String
Dim lTemp As Long
lSearch = InStr(lSearch, sSearch, sStart)
If lSearch > 0 Then
lSearch = lSearch + Len(sStart)
lTemp = InStr(lSearch, sSearch, sStop)
If lTemp > lSearch Then Return Trim(Mid$(sSearch, lSearch, lTemp - lSearch))
End If
Return vbNullString
End Function

3. asta il lasezi in thread nou , dar trebuie sa le controlezi daca faci un for each

    Public Sub Crwl()
For Each Dork As String In Dorks
If CheckBox1.Checked = True Then
Dim X As New System.Threading.Thread(AddressOf Gcrawl)
X.IsBackground = True
X.Start(TextBox2.Text.Replace("%dork%", Dork))
Invoke(SetText, New Object() {"Thread started for : " & TextBox2.Text.Replace("%dork%", Dork), Color.Lime})
CurrentThreads += 1
Do While CurrentThreads >= MaxThreads
System.Threading.Thread.Sleep(1000)
Loop
End If
Next
End Sub

Public MaxThreads as integer = 5

CurrentThreads : Public CurrentThreads as integer = 0

cand lansezi un thread faci CurrentThreads +=1

in thread , inainte de END SUB pui CurrentThreads -=1

4. crawl thread


Public Sub Gcrawl(ByVal URL As Object)
Dim x As New System.Threading.Thread(AddressOf GetGoogleResult)
x.IsBackground = True
x.Start(URL)
System.Threading.Thread.Sleep(3000)
CurrentThreads -= 1
End Sub

5. crawl extract thread


Public Sub GetGoogleResult(ByVal URL As String)
Dim Http As New Chilkat.Http
Dim success As Boolean
success = Http.UnlockComponent("Anything for 30-day trial")
If (success <> True) Then
Invoke(SetText, New Object() {"Failed to activate Chilkat Component.", Color.Red})
Invoke(SetText, New Object() {"Thread exit.", Color.Red})
Exit Sub
End If
Dim html As String
Http.UserAgent = "Mozilla/5.0 (Windows NT 5.1; WOW86; rv:26.0) Gecko/21100101 Firefox/25.0"
Http.FollowRedirects = True
html = Http.QuickGetStr(URL)
If (html = vbNullString) Then
Invoke(SetText, New Object() {"Google has detected our crawl.", Color.Red})
GooGL = True
Exit Sub
End If
html = html.Replace(Chr(34), "'")
Dim nextq As String = String.Empty
Try
nextq = GetBetween(html, "</a></td><td class='b navend'><a href='", "' class='pn' id='pnnext' style='text-decoration:none;text-align:left'>").Replace("&", "&")
Catch ex As Exception
nextq = String.Empty
Invoke(SetText, New Object() {"Search ended !", Color.Lime})
End Try
Invoke(SetText, New Object() {"Next URL : http://" & Http.GetDomain(URL) & nextq, Color.Lime})
For Each result As String In GBA_GOOGLE(html, "<h3 class='r'><a href='", "' onmousedown='return")
If Not result.Contains("http") Then
result = "http://" & result
End If
For Each ign As String In Ignore
If Not result.Contains(ign.ToString) And Not found.Contains(result) Then
Invoke(SetText, New Object() {"Found " & result, Color.Blue})
found.Add(result)
o.WriteLine(result)
o.Flush()
End If
Next
Next
If html.Contains("may be sending automated queries") Then
Invoke(SetText, New Object() {"Google has detected our crawl.", Color.Red})
Exit Sub
ElseIf String.IsNullOrEmpty(nextq) Then
Invoke(SetText, New Object() {"Search ended !", Color.Lime})
CurrentThreads -= 1
Exit Sub
Else
Do While CurrentThreads >= MaxThreads
System.Threading.Thread.Sleep(5000)
Loop
System.Threading.Thread.Sleep(3000)
Dim X As New System.Threading.Thread(AddressOf Gcrawl)
X.IsBackground = True
X.Start("http://" & Http.GetDomain(URL) & nextq)
CurrentThreads += 1
End If
Http.Dispose()
End Sub

asta intra pe google, ia linkurile , extrage url-ul de sub NEXT (adica url-ul cu pagina urmatoare) si lanseaza un thread now crwl pentru linkul NEXT (se face un loop ca sa zic asa, care se opreste cand NEXT url nu mai este gasit)

bine eu ti-am dat un exemplu gen thread -> thread -> thread(crawl) , un thread este lasat cam degeaba dar isi face treaba (Tu il poti simplifica)

eu am folosit chilkat pentru .NET Chilkat Zip Component, .NET Email Component, SMTP Component, Encryption, Compression, IMAP, POP3, SSH, SFTP Component, FTP, Zip, S/MIME, XML, ActiveX, Assembly, HTTP, C#, VB.NET, ASP.NET, C++, Delphi, Library, Upload , crack aici : https://rstforums.com/forum/73805-rst-chilkat-net-component-crack.rst

recomand chilkat pentru ca poti seta proxy cu usurinta.incearca sa folosesti tor onion la crawl ; dupa fiecare crawl faci un sleep , te conectezi la tor (am uitat portu pentru semnale tor) , trimiti "NEWNYM" (new identity) dupa care treci la urmatorul dork .. si tot asa.

Spor

Edited by Maximus
Link to comment
Share on other sites

Guest
This topic is now closed to further replies.


×
×
  • Create New...