Jump to content
Vhaerun

Primii pasi spre creearea unui browser

Recommended Posts

Posted

Nu am mai postat de ceva timp un tutorial de programare , so here goes . Asta va fi in Java , because . O sa va arat doua variante de a accesa situri din codul vostru , prima fiind cu clasele puse la dispozitie de Sun care ascunde detaliile de utilizatorul de rand , si una mai low-level facuta prin socket-uri . Prima varianta ar fi urmatoarea :


import java.io.*;
import java.net.*;
import java.util.*;

public class WebClient {

private String line="";
private String content="";
private URL url;
private URLConnection urlConn;
private BufferedReader reader;

public WebClient() {

}

public void getPrint(String site) throws Exception {

url = new URL(site);
urlConn = url.openConnection();
reader = new BufferedReader(new InputStreamReader(urlConn.getInputStream()));

while((line = reader.readLine()) != null) {

System.out.println(line);

}

}

public String get(String site) throws Exception {

url = new URL(site);
urlConn = url.openConnection();
reader = new BufferedReader(new InputStreamReader(urlConn.getInputStream()));

while((line = reader.readLine()) != null) {

content += line;

}

return content;

}

public static void main(String[] args) throws Exception {

WebClient wc = new WebClient();
//wc.getPrint("http://www.google.ro");
System.out.println(wc.get("http://www.google.ro"));
}

}

Avantajul acestei abordari ar fi :

- aceste clase sunt testate si rastestate , deci vor functiona

- clasa URL seteaza singura host si document , deci nu mai trebuie sa te chinui cu expresii regulate / substring / whatever , pentru a obtine ce te intereseaza dintr-un url

- clasa URLConnection se conecteaza singura , si cere paginile , in conformitate cu standardul HTTP , deci nici aici nu trebuie sa stii cum functioneaza HTTP

Dezavantajul ar fi :

- un utilizator de rand nu stie ce se intampla in spate

A doua abordare ar fi ceva scris de la zero .


import java.io.*;
import java.util.StringTokenizer;
import java.net.*;


public class SimpleClient {

private boolean succes;
private int responseCode;
private String content="";
private Socket s;
private PrintWriter pw;
private BufferedReader br;
private String line="";

public SimpleClient() {

}

public boolean isSuccess() {
return (responseCode>=200 && responseCode<400);
}

public String getDoc(String url) {

String doc="/";

int wPos=url.indexOf("www.");
int hPos=url.indexOf("http://");
int sPos=url.indexOf("/",wPos+4);
int ssPos=url.indexOf("/",hPos+7);

if(hPos != -1) {

if(wPos != -1) {

if(sPos != -1) {

doc = url.substring(sPos);

}

}

else {

if(sPos != -1) {

doc = url.substring(ssPos);
}

}
}
else {

if(wPos != -1) {

if(sPos != -1) {

doc = url.substring(sPos);

}

}

else {

int sPos2 = url.indexOf("/");

if(sPos2 != -1) {

doc = url.substring(sPos2);

}


}


}


return doc;

}

public String getHost(String url) {

String host="";

if(url.startsWith("http://")) {

int tpos=url.indexOf("http://");
int slashPos=url.indexOf("/",tpos+8);

if(slashPos == -1) {

host = url.substring(tpos+7);

if(host.startsWith("www.")) {

int wPos=host.indexOf("www.");
host = host.substring(wPos+4);

}

}
else {
host = url.substring(tpos+7,slashPos);

if(host.startsWith("www.")) {

int wPos=host.indexOf("www.");
host = host.substring(wPos+4);
}

}

}

else {
int tpos=url.indexOf("www.");

if(tpos == -1) {
int slashPos=url.indexOf("/");
if(slashPos != -1) {
host = url.substring(0, slashPos);
}
else {
host = url.substring(0);
}

}
else {

int slashPos=url.indexOf("/");
if(slashPos != -1) {
host = url.substring(tpos+4,slashPos);
}
else {
host = url.substring(tpos+4);
}

}


}

return host;

}

public void get(String site) {
try {
s=new Socket(getHost(site),80);
pw=new PrintWriter(s.getOutputStream());
br=new BufferedReader(new InputStreamReader(s.getInputStream()));
pw.println("GET "+getDoc(site)+" HTTP/1.0");
pw.println("Host: "+getHost(site));
pw.println();
pw.println();
pw.flush();

while((line=br.readLine())!=null) {
content+=line+"\n";
}
s.close();
parseResponseCode(content);
}
catch(Exception e) {
e.printStackTrace();
}
}

private void parseResponseCode(String content) {

String line=content.substring(0,content.indexOf("\n"));
StringTokenizer st=new StringTokenizer(line);
st.nextToken();
responseCode=Integer.parseInt(st.nextToken());

}

public String extractTextBetween(String text,String start,String end) {

int pos1=text.indexOf(start)+1;
int pos2=text.indexOf(end)+1;

return text.substring(pos1, pos2);

}

private void setContent(String content) {
this.content = content;
}

private String getContent() {
return content;
}

private void setResponseCode(int responseCode) {
this.responseCode = responseCode;
}

private int getResponseCode() {
return responseCode;
}

public static void main(String[] args) {
SimpleClient sc=new SimpleClient();
sc.get("s2.bitefight.ro/bite/uebersicht.php");
System.out.println(sc.getContent()+"\n"+sc.getResponseCode());

}

}

Codul in sine nu e greu de inteles , poate partea cea mai complicata ar fi functiile getDoc si getHost . Am evitat sa folosesc expresiile regulate din java , pentru ca nu prea sunt PCRE . Acest cod , scris cu socket-uri nu urmeaza redirecturi , poate nici URLConnection nu le urmeaza , nu stiu sigur .

Pentru a afla mai multe informatii despre HTTP si browsere in general , eu recomand cartea "Web Client programming in Perl" , care poate fi gasita aici : http://www.oreilly.com/openbook/webclient/ . Daca nu esti interesat de perl , poti citi doar primele 2 capitole , care iti vor explica tot ce ai nevoie sa stii :)

Have fun

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



×
×
  • Create New...