Using a blocking TClientSocket Connection for HTML
Transfer
The hardest thing about creating your own socket
connection for use with the web is simply getting started. From the
help files, one can quickly learn how to connect a TClientSocket (from
the Internet group) to a given server. But once you've achieved a
connection, what do you do with it?
After connecting to your server, you must create
a TWinSocketStream. The TWinSocketStream is the stream that you will
use to actually read and write to the server with. Once the TWinSocketStream
is in place, you can begin making your commands to the server to retrieve
the html that you want to process.
Using the Read and Write methods, you can get all
of the information you need. The only problem with a blocking connection,
is that there is no way to tell when the server is done sending information.
This can be handled by only taking a certain number of lines at a time,
or by parsing the text for an end html tag.
To begin reading from a server, a command of this
form must be sent:
GET path/file.html nrnr
This command, sent with your TWinSocketStream's
Write method, tells the server to being sending the file to you.
It is your job to know when the file is complete.
//---------------------------------------------------------------------------
//Begin MainForm.h
#ifndef MainFormH
#define MainFormH
//---------------------------------------------------------------------------
#include <Classes.hpp>
#include <Controls.hpp>
#include <StdCtrls.hpp>
#include <Forms.hpp>
#include <ScktComp.hpp>
#include <ExtCtrls.hpp>
#include <ComCtrls.hpp>
//---------------------------------------------------------------------------
class TForm1 : public TForm
{
__published: // IDE-managed Components
TClientSocket *ClientSocket1;
TPanel *Panel1;
TEdit *Edit1;
TLabel *Label1;
TPanel *Panel2;
TButton *Button1;
TButton *Button2;
TLabel *Label2;
TLabel *lblURL;
TLabel *Label3;
TLabel *lblStatus;
TButton *Button3;
TLabel *Label4;
TLabel *Label5;
TLabel *lblCount;
TRichEdit *Memo1;
TEdit *edtMax;
void __fastcall ClientSocket1Connect(TObject
*Sender,
TCustomWinSocket
*Socket);
void __fastcall ClientSocket1Connecting(TObject
*Sender,
TCustomWinSocket
*Socket);
void __fastcall Button1Click(TObject *Sender);
void __fastcall Button2Click(TObject *Sender);
void __fastcall ClientSocket1Disconnect(TObject
*Sender,
TCustomWinSocket
*Socket);
void __fastcall Button3Click(TObject *Sender);
private:
AnsiString URL; // User declarations
public: // User declarations
__fastcall TForm1(TComponent* Owner);
};
//---------------------------------------------------------------------------
extern PACKAGE TForm1 *Form1;
//---------------------------------------------------------------------------
#endif
//---------------------------------------------------------------------------
//End MainForm.h
//Begin MainForm.cpp
//---------------------------------------------------------------------------
#include <vcl.h>
#pragma hdrstop
#include "URLUtil.h"
#include "MainForm.h"
#define LENGTH 80
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TForm1 *Form1;
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner)
: TForm(Owner)
{
}
//---------------------------------------------------------------------------
void __fastcall TForm1::ClientSocket1Connect(TObject *Sender,
TCustomWinSocket *Socket)
{
Label1->Caption="HTML Grab:
Connected";
lblCount->Caption=(0);
AnsiString Directory(ExtractURLPath(Edit1->Text));
TWinSocketStream* wnscktstrmMain = new TWinSocketStream(Socket,
10000);
AnsiString buf="Type in a valid
URL and hit connect...";
if(Memo1->Lines->Strings[0]==buf)
Memo1->Lines->Delete(0);
//HTML commands must be made in the form of "GET
foo/index.html nrnr"
buf="GET "+Directory; //GET
command for html transfer
buf+=(" nrnr"); //required
as end transfer tag
if(wnscktstrmMain->Write(buf.c_str(),buf.Length())){
//if
write succeeded, read from stream
int i=1;
char point[LENGTH]={"o"};
if(wnscktstrmMain->Read(point,LENGTH)){
point[LENGTH]='0';
lblStatus->Caption="Success";
Memo1->Lines->Add("");
Memo1->Lines->Add("Source
code from "+Edit1->Text);
Memo1->Lines->Add("");
Memo1->Lines->Add("-------------------------------------");
buf=point;
Memo1->Text=Memo1->Text+buf;
lblCount->Caption=(lblCount->Caption.ToInt()+1);
//incrementing
line counter
int j=0;
while(i>0&&i<(edtMax->Text.ToInt())){
lblCount->Caption=(lblCount->Caption.ToInt()+1); //incrementing
line counter
wnscktstrmMain->Read(point,LENGTH);
//reading from stream
buf=point;
if(buf.AnsiPos("</html>")){//testing
for end html tag
j=buf.AnsiPos("</html>");
Memo1->Text=Memo1->Text+buf.SubString(1,j+6);
i=0;
//end read
}
if(buf.AnsiPos("</HTML>")){//testing
for end html tag
j=buf.AnsiPos("</HTML>");
Memo1->Text=Memo1->Text+buf.SubString(1,j+6);
i=0;
//end read
}
int p=0;
while(i!=0&&p>=0&&p<10){ //p
is the # of lines to be
//checked from the bottom of the page for </html> tag
if(Memo1->Lines->Strings[(Memo1->Lines->Count)-p].AnsiPos("</html>")){
//checking for truncated /html tag
j=Memo1->Lines->Strings[(Memo1->Lines->Count)-p].AnsiPos("</html>");
for(int k=1;k<p;k++)
Memo1->Lines->Delete((Memo1->Lines->Count)-1);//cleaning
up garbage values
i=0;
//end read
}
else if(Memo1->Lines->Strings[(Memo1->Lines->Count)-p].AnsiPos("</HTML>")){
j=Memo1->Lines->Strings[(Memo1->Lines->Count)-p].AnsiPos("</HTML>");
for(int k=1;k<p;k++)
Memo1->Lines->Delete((Memo1->Lines->Count)-1);
i=0;
//end read
}
p++;
}
if(i!=0){
Memo1->Text=Memo1->Text+buf; //add text
as normal
i++;
}
}
if(i==((edtMax->Text.ToInt()))){
Memo1->Lines->Add("");
Memo1->Lines->Add("");
Memo1->Lines->Add("*** Max Lines Reached! ***");
Memo1->Lines->Add("");
}
Memo1->Lines->Add("----------------------------");
}
else
ShowMessage("No
reply");
}
else{
ShowMessage("Connection
timed out");
}
delete wnscktstrmMain;
//important
}
//---------------------------------------------------------------------------
void __fastcall TForm1::ClientSocket1Connecting(TObject *Sender,
TCustomWinSocket *Socket)
{
Label1->Caption="HTML Grab:
Connecting";
lblURL->Caption=ClientSocket1->Host;
Cursor=crHourGlass;
}
//---------------------------------------------------------------------------
void __fastcall TForm1::Button1Click(TObject *Sender)
{
AnsiString buf;
ClientSocket1->Close();
URL=Edit1->Text;
ClientSocket1->Host=ExtractURLHost(URL);
buf=ExtractURLPort(URL);
ClientSocket1->Port=buf.ToInt();
ClientSocket1->Open();
}
//---------------------------------------------------------------------------
void __fastcall TForm1::Button2Click(TObject *Sender)
{
Label1->Caption="HTML Grab:
Disconnecting";
ClientSocket1->Close();
}
//---------------------------------------------------------------------------
void __fastcall TForm1::ClientSocket1Disconnect(TObject *Sender,
TCustomWinSocket *Socket)
{
lblURL->Caption="None";
lblStatus->Caption="Idle";
Label1->Caption="HTML Grab:
Disconnected";
}
//---------------------------------------------------------------------------
void __fastcall TForm1::Button3Click(TObject *Sender)
{
Memo1->Text="";
lblCount->Caption=(0);
}
//---------------------------------------------------------------------------
//End MainForm.cpp
//Begin URLUtil.h
//---------------------------------------------------------------------------
#ifndef URLUtil_H
#define URLUtil_H
//---------------------------------------------------------------------------
AnsiString ExtractURLProtocol(const AnsiString& URL);
AnsiString ExtractURLPort(const AnsiString& URL);
AnsiString ExtractURLHost(const AnsiString& URL);
AnsiString ExtractURLPath(const AnsiString& URL);
#endif
//---------------------------------------------------------------------------
//End URLUtil.h
//Begin URLUtil.cpp
//---------------------------------------------------------------------------
#include <vcl.h>
#pragma hdrstop
#include "URLUtil.h"
//---------------------------------------------------------------------------
#pragma package(smart_init)
AnsiString ExtractURLProtocol(const AnsiString& URL)
{
int pos = URL.AnsiPos("://");
if(pos == 0)
return "";
else
return URL.SubString(1, pos-1);
}
//---------------------------------------------------------------------------
AnsiString ExtractURLPort(const AnsiString& URL)
{
if(ExtractURLProtocol(URL) == "")
{
int pos1 = URL.AnsiPos(":");
if(pos1 == 0)
return
"80";
int pos2 = URL.AnsiPos("/");
return URL.SubString(pos1,
pos2-pos1);
} else {
int pos1 = URL.AnsiPos("://");
pos1 += 3;
AnsiString sub = URL.SubString(pos1,
URL.Length());
pos1 = sub.AnsiPos(":");
if(pos1 == 0)
return "80";
else {
int pos2
= sub.AnsiPos("/");
if(pos2
== 0) {
pos2 = sub.Length() + 1;
return
sub.SubString(pos1+1, pos2-pos1);
} else
return
sub.SubString(pos1+1, pos2-pos1-1);
}
}
}
//---------------------------------------------------------------------------
AnsiString ExtractURLHost(const AnsiString& URL)
{
if(ExtractURLProtocol(URL) == "")
{
int pos = URL.AnsiPos(":");
if(pos == 0)
pos = URL.AnsiPos("/");
if(pos == 0)
pos = URL.Length()+1;
return URL.SubString(1, pos-1);
} else {
int pos1 = URL.AnsiPos("://");
pos1 += 3;
AnsiString sub = URL.SubString(pos1,
URL.Length());
int pos2 = sub.AnsiPos(":");
if(pos2 == 0)
pos2 = sub.AnsiPos("/");
if(pos2 == 0)
pos2 = sub.Length()+1;
return sub.SubString(1, pos2-1);
}
}
//---------------------------------------------------------------------------
AnsiString ExtractURLPath(const AnsiString& URL)
{
AnsiString sub;
int pos;
if(ExtractURLProtocol(URL) == "")
sub = URL;
else {
pos = URL.AnsiPos("://");
pos += 3;
sub = URL.SubString(pos, URL.Length());
}
pos = sub.AnsiPos("/");
if(pos == 0)
return "/";
else
return sub.SubString(pos,
URL.Length() - pos);
}
//---------------------------------------------------------------------------