Jump to content
Vhaerun

Primii pasi spre creearea unui browser

Recommended Posts

Nu am mai postat de ceva timp un tutorial de programare , so here goes . Asta va fi in Java , because . O sa va arat doua variante de a accesa situri din codul vostru , prima fiind cu clasele puse la dispozitie de Sun care ascunde detaliile de utilizatorul de rand , si una mai low-level facuta prin socket-uri . Prima varianta ar fi urmatoarea :


import java.io.*;
import java.net.*;
import java.util.*;

public class WebClient {

private String line="";
private String content="";
private URL url;
private URLConnection urlConn;
private BufferedReader reader;

public WebClient() {

}

public void getPrint(String site) throws Exception {

url = new URL(site);
urlConn = url.openConnection();
reader = new BufferedReader(new InputStreamReader(urlConn.getInputStream()));

while((line = reader.readLine()) != null) {

System.out.println(line);

}

}

public String get(String site) throws Exception {

url = new URL(site);
urlConn = url.openConnection();
reader = new BufferedReader(new InputStreamReader(urlConn.getInputStream()));

while((line = reader.readLine()) != null) {

content += line;

}

return content;

}

public static void main(String[] args) throws Exception {

WebClient wc = new WebClient();
//wc.getPrint("http://www.google.ro");
System.out.println(wc.get("http://www.google.ro"));
}

}

Avantajul acestei abordari ar fi :

- aceste clase sunt testate si rastestate , deci vor functiona

- clasa URL seteaza singura host si document , deci nu mai trebuie sa te chinui cu expresii regulate / substring / whatever , pentru a obtine ce te intereseaza dintr-un url

- clasa URLConnection se conecteaza singura , si cere paginile , in conformitate cu standardul HTTP , deci nici aici nu trebuie sa stii cum functioneaza HTTP

Dezavantajul ar fi :

- un utilizator de rand nu stie ce se intampla in spate

A doua abordare ar fi ceva scris de la zero .


import java.io.*;
import java.util.StringTokenizer;
import java.net.*;


public class SimpleClient {

private boolean succes;
private int responseCode;
private String content="";
private Socket s;
private PrintWriter pw;
private BufferedReader br;
private String line="";

public SimpleClient() {

}

public boolean isSuccess() {
return (responseCode>=200 && responseCode<400);
}

public String getDoc(String url) {

String doc="/";

int wPos=url.indexOf("www.");
int hPos=url.indexOf("http://");
int sPos=url.indexOf("/",wPos+4);
int ssPos=url.indexOf("/",hPos+7);

if(hPos != -1) {

if(wPos != -1) {

if(sPos != -1) {

doc = url.substring(sPos);

}

}

else {

if(sPos != -1) {

doc = url.substring(ssPos);
}

}
}
else {

if(wPos != -1) {

if(sPos != -1) {

doc = url.substring(sPos);

}

}

else {

int sPos2 = url.indexOf("/");

if(sPos2 != -1) {

doc = url.substring(sPos2);

}


}


}


return doc;

}

public String getHost(String url) {

String host="";

if(url.startsWith("http://")) {

int tpos=url.indexOf("http://");
int slashPos=url.indexOf("/",tpos+8);

if(slashPos == -1) {

host = url.substring(tpos+7);

if(host.startsWith("www.")) {

int wPos=host.indexOf("www.");
host = host.substring(wPos+4);

}

}
else {
host = url.substring(tpos+7,slashPos);

if(host.startsWith("www.")) {

int wPos=host.indexOf("www.");
host = host.substring(wPos+4);
}

}

}

else {
int tpos=url.indexOf("www.");

if(tpos == -1) {
int slashPos=url.indexOf("/");
if(slashPos != -1) {
host = url.substring(0, slashPos);
}
else {
host = url.substring(0);
}

}
else {

int slashPos=url.indexOf("/");
if(slashPos != -1) {
host = url.substring(tpos+4,slashPos);
}
else {
host = url.substring(tpos+4);
}

}


}

return host;

}

public void get(String site) {
try {
s=new Socket(getHost(site),80);
pw=new PrintWriter(s.getOutputStream());
br=new BufferedReader(new InputStreamReader(s.getInputStream()));
pw.println("GET "+getDoc(site)+" HTTP/1.0");
pw.println("Host: "+getHost(site));
pw.println();
pw.println();
pw.flush();

while((line=br.readLine())!=null) {
content+=line+"\n";
}
s.close();
parseResponseCode(content);
}
catch(Exception e) {
e.printStackTrace();
}
}

private void parseResponseCode(String content) {

String line=content.substring(0,content.indexOf("\n"));
StringTokenizer st=new StringTokenizer(line);
st.nextToken();
responseCode=Integer.parseInt(st.nextToken());

}

public String extractTextBetween(String text,String start,String end) {

int pos1=text.indexOf(start)+1;
int pos2=text.indexOf(end)+1;

return text.substring(pos1, pos2);

}

private void setContent(String content) {
this.content = content;
}

private String getContent() {
return content;
}

private void setResponseCode(int responseCode) {
this.responseCode = responseCode;
}

private int getResponseCode() {
return responseCode;
}

public static void main(String[] args) {
SimpleClient sc=new SimpleClient();
sc.get("s2.bitefight.ro/bite/uebersicht.php");
System.out.println(sc.getContent()+"\n"+sc.getResponseCode());

}

}

Codul in sine nu e greu de inteles , poate partea cea mai complicata ar fi functiile getDoc si getHost . Am evitat sa folosesc expresiile regulate din java , pentru ca nu prea sunt PCRE . Acest cod , scris cu socket-uri nu urmeaza redirecturi , poate nici URLConnection nu le urmeaza , nu stiu sigur .

Pentru a afla mai multe informatii despre HTTP si browsere in general , eu recomand cartea "Web Client programming in Perl" , care poate fi gasita aici : http://www.oreilly.com/openbook/webclient/ . Daca nu esti interesat de perl , poti citi doar primele 2 capitole , care iti vor explica tot ce ai nevoie sa stii :)

Have fun

Link to comment
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



×
×
  • Create New...