Using a blocking TClientSocket Connection for HTML Transfer

By: Nate Lieby

Abstract: This TI explains how to use a blocking TClientSocket in conjunction with a TwinSocketStream to send HTML commands to a server, in order to recieve files. Explains the actual commands as well.

Using a blocking TClientSocket Connection for HTML Transfer

    The hardest thing about creating your own socket connection for use with the web is simply getting started.  From the help files, one can quickly learn how to connect a TClientSocket (from the Internet group) to a given server.  But once you've achieved a connection, what do you do with it?

    After connecting to your server, you must create a TWinSocketStream.  The TWinSocketStream is the stream that you will use to actually read and write to the server with.  Once the TWinSocketStream is in place, you can begin making your commands to the server to retrieve the html that you want to process.

    Using the Read and Write methods, you can get all of the information you need.  The only problem with a blocking connection, is that there is no way to tell when the server is done sending information.  This can be handled by only taking a certain number of lines at a time, or by parsing the text for an end html tag.

    To begin reading from a server, a command of this form must be sent:
        GET path/file.html nrnr
    This command, sent with your TWinSocketStream's Write method, tells the server to being sending the file to you.  It is your job to know when the file is complete.

//---------------------------------------------------------------------------
//Begin MainForm.h
#ifndef MainFormH
#define MainFormH
//---------------------------------------------------------------------------
#include <Classes.hpp>
#include <Controls.hpp>
#include <StdCtrls.hpp>
#include <Forms.hpp>
#include <ScktComp.hpp>
#include <ExtCtrls.hpp>
#include <ComCtrls.hpp>
//---------------------------------------------------------------------------
class TForm1 : public TForm
{
__published: // IDE-managed Components
   TClientSocket *ClientSocket1;
   TPanel *Panel1;
   TEdit *Edit1;
   TLabel *Label1;
   TPanel *Panel2;
   TButton *Button1;
   TButton *Button2;
   TLabel *Label2;
   TLabel *lblURL;
   TLabel *Label3;
   TLabel *lblStatus;
   TButton *Button3;
   TLabel *Label4;
   TLabel *Label5;
   TLabel *lblCount;
   TRichEdit *Memo1;
   TEdit *edtMax;
   void __fastcall ClientSocket1Connect(TObject *Sender,
          TCustomWinSocket *Socket);
   void __fastcall ClientSocket1Connecting(TObject *Sender,
          TCustomWinSocket *Socket);
   void __fastcall Button1Click(TObject *Sender);
   void __fastcall Button2Click(TObject *Sender);
   void __fastcall ClientSocket1Disconnect(TObject *Sender,
          TCustomWinSocket *Socket);
   void __fastcall Button3Click(TObject *Sender);
private:
   AnsiString URL; // User declarations
public// User declarations
   __fastcall TForm1(TComponent* Owner);
};
//---------------------------------------------------------------------------
extern PACKAGE TForm1 *Form1;
//---------------------------------------------------------------------------
#endif
//---------------------------------------------------------------------------

//End MainForm.h

//Begin MainForm.cpp
 

//---------------------------------------------------------------------------

#include <vcl.h>
#pragma hdrstop

#include "URLUtil.h"
#include "MainForm.h"

#define LENGTH 80

//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TForm1 *Form1;
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner)
   : TForm(Owner)
{
}
//---------------------------------------------------------------------------

void __fastcall TForm1::ClientSocket1Connect(TObject *Sender,
      TCustomWinSocket *Socket)
{
   Label1->Caption="HTML Grab: Connected";
   lblCount->Caption=(0);
   AnsiString Directory(ExtractURLPath(Edit1->Text));

   TWinSocketStream* wnscktstrmMain = new TWinSocketStream(Socket, 10000);

   AnsiString buf="Type in a valid URL and hit connect...";

   if(Memo1->Lines->Strings[0]==buf)
      Memo1->Lines->Delete(0);
                            //HTML commands must be made in the form of "GET foo/index.html nrnr"
   buf="GET "+Directory;    //GET command for html transfer
   buf+=(" nrnr");     //required as end transfer tag
    if(wnscktstrmMain->Write(buf.c_str(),buf.Length())){ //if write succeeded, read from stream
      int i=1;
      char point[LENGTH]={"o"};
      if(wnscktstrmMain->Read(point,LENGTH)){
         point[LENGTH]='0';
         lblStatus->Caption="Success";
         Memo1->Lines->Add("");
         Memo1->Lines->Add("Source code from "+Edit1->Text);
         Memo1->Lines->Add("");
         Memo1->Lines->Add("-------------------------------------");
         buf=point;
         Memo1->Text=Memo1->Text+buf;
         lblCount->Caption=(lblCount->Caption.ToInt()+1); //incrementing line counter
         int j=0;
         while(i>0&&i<(edtMax->Text.ToInt())){
            lblCount->Caption=(lblCount->Caption.ToInt()+1); //incrementing line counter
            wnscktstrmMain->Read(point,LENGTH);             //reading from stream
            buf=point;
           if(buf.AnsiPos("</html>")){//testing for end html tag
               j=buf.AnsiPos("</html>");
               Memo1->Text=Memo1->Text+buf.SubString(1,j+6);
               i=0;                                         //end read
            }
           if(buf.AnsiPos("</HTML>")){//testing for end html tag
               j=buf.AnsiPos("</HTML>");
               Memo1->Text=Memo1->Text+buf.SubString(1,j+6);
               i=0;                                         //end read
            }
            int p=0;
            while(i!=0&&p>=0&&p<10){    //p is the # of lines to be
                                        //checked from the bottom of the page for </html> tag
             if(Memo1->Lines->Strings[(Memo1->Lines->Count)-p].AnsiPos("</html>")){
                                     //checking for truncated /html tag
                  j=Memo1->Lines->Strings[(Memo1->Lines->Count)-p].AnsiPos("</html>");
                  for(int k=1;k<p;k++)
                  Memo1->Lines->Delete((Memo1->Lines->Count)-1);//cleaning up garbage values
                  i=0;                                       //end read
               }
            else if(Memo1->Lines->Strings[(Memo1->Lines->Count)-p].AnsiPos("</HTML>")){
                  j=Memo1->Lines->Strings[(Memo1->Lines->Count)-p].AnsiPos("</HTML>");
               for(int k=1;k<p;k++)
                  Memo1->Lines->Delete((Memo1->Lines->Count)-1);
                  i=0;                                       //end read
               }
               p++;
            }

           if(i!=0){
                 Memo1->Text=Memo1->Text+buf;  //add text as normal
                i++;
            }
         }
         if(i==((edtMax->Text.ToInt()))){
            Memo1->Lines->Add("");
            Memo1->Lines->Add("");
            Memo1->Lines->Add("*** Max Lines Reached! ***");
            Memo1->Lines->Add("");
         }
         Memo1->Lines->Add("----------------------------");
      }
      else
         ShowMessage("No reply");
   }
   else{
      ShowMessage("Connection timed out");

   }
   delete wnscktstrmMain;                                    //important

}
//---------------------------------------------------------------------------

void __fastcall TForm1::ClientSocket1Connecting(TObject *Sender,
      TCustomWinSocket *Socket)
{
   Label1->Caption="HTML Grab: Connecting";
   lblURL->Caption=ClientSocket1->Host;
   Cursor=crHourGlass;

}
//---------------------------------------------------------------------------

void __fastcall TForm1::Button1Click(TObject *Sender)
{
   AnsiString buf;

   ClientSocket1->Close();
   URL=Edit1->Text;
   ClientSocket1->Host=ExtractURLHost(URL);
   buf=ExtractURLPort(URL);
   ClientSocket1->Port=buf.ToInt();
   ClientSocket1->Open();
}
//---------------------------------------------------------------------------

void __fastcall TForm1::Button2Click(TObject *Sender)
{
   Label1->Caption="HTML Grab: Disconnecting";
   ClientSocket1->Close();
}
//---------------------------------------------------------------------------

void __fastcall TForm1::ClientSocket1Disconnect(TObject *Sender,
      TCustomWinSocket *Socket)
{
   lblURL->Caption="None";
   lblStatus->Caption="Idle";
   Label1->Caption="HTML Grab: Disconnected";

}
//---------------------------------------------------------------------------

void __fastcall TForm1::Button3Click(TObject *Sender)
{
   Memo1->Text="";
   lblCount->Caption=(0);
}
//---------------------------------------------------------------------------

//End MainForm.cpp

//Begin URLUtil.h

//---------------------------------------------------------------------------

#ifndef URLUtil_H
#define URLUtil_H
//---------------------------------------------------------------------------

AnsiString ExtractURLProtocol(const AnsiString& URL);
AnsiString ExtractURLPort(const AnsiString& URL);
AnsiString ExtractURLHost(const AnsiString& URL);
AnsiString ExtractURLPath(const AnsiString& URL);

#endif
//---------------------------------------------------------------------------

//End URLUtil.h

//Begin URLUtil.cpp
 

//---------------------------------------------------------------------------

#include <vcl.h>
#pragma hdrstop

#include "URLUtil.h"

//---------------------------------------------------------------------------

#pragma package(smart_init)

AnsiString ExtractURLProtocol(const AnsiString& URL)
{
   int pos = URL.AnsiPos("://");
   if(pos == 0)
      return "";
   else
      return URL.SubString(1, pos-1);

}
//---------------------------------------------------------------------------
AnsiString ExtractURLPort(const AnsiString& URL)
{
   if(ExtractURLProtocol(URL) == "") {
      int pos1 = URL.AnsiPos(":");
      if(pos1 == 0)
         return "80";
      int pos2 = URL.AnsiPos("/");
      return URL.SubString(pos1, pos2-pos1);
   } else {
      int pos1 = URL.AnsiPos("://");
      pos1 += 3;
      AnsiString sub = URL.SubString(pos1, URL.Length());
      pos1 = sub.AnsiPos(":");
      if(pos1 == 0)
         return "80";
      else {
         int pos2 = sub.AnsiPos("/");
         if(pos2 == 0) {
            pos2 = sub.Length() + 1;
           return sub.SubString(pos1+1, pos2-pos1);
         } else
           return sub.SubString(pos1+1, pos2-pos1-1);
      }
   }
}
//---------------------------------------------------------------------------
AnsiString ExtractURLHost(const AnsiString& URL)
{
   if(ExtractURLProtocol(URL) == "") {
      int pos = URL.AnsiPos(":");
      if(pos == 0)
         pos = URL.AnsiPos("/");
      if(pos == 0)
         pos = URL.Length()+1;
      return URL.SubString(1, pos-1);
   } else {
      int pos1 = URL.AnsiPos("://");
      pos1 += 3;
      AnsiString sub = URL.SubString(pos1, URL.Length());
      int pos2 = sub.AnsiPos(":");
      if(pos2 == 0)
         pos2 = sub.AnsiPos("/");
      if(pos2 == 0)
         pos2 = sub.Length()+1;
      return sub.SubString(1, pos2-1);
   }
}
//---------------------------------------------------------------------------
AnsiString ExtractURLPath(const AnsiString& URL)
{
   AnsiString sub;
   int pos;
   if(ExtractURLProtocol(URL) == "")
      sub = URL;
   else {
      pos = URL.AnsiPos("://");
      pos += 3;
      sub = URL.SubString(pos, URL.Length());
   }

   pos = sub.AnsiPos("/");
   if(pos == 0)
      return "/";
   else
      return sub.SubString(pos, URL.Length() - pos);
}
//---------------------------------------------------------------------------
 
 
 


Server Response from: ETNASC03