Putting It All Together
Remember our class skeleton, which heretofore did nothing but get input (see Listing 69)?
Listing 69Class Skeleton Revisited
using System; class vampirebot { string base_url, folder; vampirebot(string url, string dir) { int slash_loc; slash_loc = url.LastIndexOf("/"); base_url = url.Substring(0, slash_loc+1); folder = dir; } public static void Main() { string url, dir; vampirebot vbot; Console.Write("Enter starting URL: "); url=Console.ReadLine(); Console.Write("Destination folder? "); dir=Console.ReadLine(); vbot = new vampirebot(url,dir); } }
To complete the vampire bot, add all the namespaces and methods used (see Listing 70):
Listing 70Class Skeleton with Methods Added
using System; using System.Net; using System.IO; using System.Collections; class vampirebot { string base_url, folder; vampirebot(string url, string dir) { int slash_loc; slash_loc = url.LastIndexOf("/"); base_url = url.Substring(0, slash_loc+1); folder = dir; } public string URLtoRawHTML(string URL) { WebRequest req; WebResponse res; Stream str; string RawHTML; int ch; req = WebRequest.Create(URL); res = req.GetResponse(); str = res.GetResponseStream(); RawHTML = ""; while ((ch=str.ReadByte())!=-1) RawHTML=RawHTML+Convert.ToChar(ch); str.Close(); res.Close(); return RawHTML; } public ArrayList RawHTMLtoImageList(string raw_html) { string patt, spat, epat; int ploc, sloc, eloc; string file; ArrayList list; patt=".gif"; spat="\"" ; epat="\"" ; list = new ArrayList(); ploc=raw_html.IndexOf (patt, 0); while (ploc>=0) { sloc=raw_html.LastIndexOf(spat, ploc)+1; eloc=raw_html.IndexOf (epat, sloc)-1; file=raw_html.Substring (sloc, eloc-sloc+1); ploc=raw_html.IndexOf (patt, eloc); list.Add(file); } return list; } public void ImageListtoFiles(ArrayList file_list) { int i; string filename; FileStream fs; WebRequest req; WebResponse res; Stream str; int ch; for (i=0; i < file_list.Count; i++) { filename=Convert.ToString(file_list[i]); filename=filename.Replace("/", "_"); filename= folder+"/"+filename; fs=new FileStream(filename, FileMode.Create); req = WebRequest.Create(base_url+file_list[i]); res = req.GetResponse(); str = res.GetResponseStream(); while ((ch=str.ReadByte())!=-1) fs.WriteByte(Convert.ToByte(ch)); str.Close(); res.Close(); fs.Close(); } } public static void Main() { string url, dir; vampirebot vbot; Console.Write("Enter starting URL: "); url=Console.ReadLine(); Console.Write("Destination folder? "); dir=Console.ReadLine(); vbot = new vampirebot(url,dir); } }
With the methods added, simply call them in Main() (see Listing 71):
Listing 71Class Skeleton with Methods Added and Calls to Those Methods
using System; using System.Net; using System.IO; using System.Collections; class vampirebot { string base_url, folder; vampirebot(string url, string dir) { int slash_loc; slash_loc = url.LastIndexOf("/"); base_url = url.Substring(0, slash_loc+1); folder = dir; } public string URLtoRawHTML(string URL) { WebRequest req; WebResponse res; Stream str; string RawHTML; int ch; req = WebRequest.Create(URL); res = req.GetResponse(); str = res.GetResponseStream(); RawHTML = ""; while ((ch=str.ReadByte())!=-1) RawHTML=RawHTML+Convert.ToChar(ch); str.Close(); res.Close(); return RawHTML; } public ArrayList RawHTMLtoImageList(string raw_html) { string patt, spat, epat; int ploc, sloc, eloc; string file; ArrayList list; patt=".gif"; spat="\"" ; epat="\"" ; list = new ArrayList(); ploc=raw_html.IndexOf (patt, 0); while (ploc>=0) { sloc=raw_html.LastIndexOf(spat, ploc)+1; eloc=raw_html.IndexOf (epat, sloc)-1; file=raw_html.Substring (sloc, eloc-sloc+1); ploc=raw_html.IndexOf (patt, eloc); list.Add(file); } return list; } public void ImageListtoFiles(ArrayList file_list) { int i; string filename; FileStream fs; WebRequest req; WebResponse res; Stream str; int ch; for (i=0; i < file_list.Count; i++) { filename=Convert.ToString(file_list[i]); filename=filename.Replace("/", "_"); filename= folder+"/"+filename; fs=new FileStream(filename, FileMode.Create); req = WebRequest.Create(base_url+file_list[i]); res = req.GetResponse(); str = res.GetResponseStream(); while ((ch=str.ReadByte())!=-1) fs.WriteByte(Convert.ToByte(ch)); str.Close(); res.Close(); fs.Close(); } } public static void Main() { string url, dir; vampirebot vbot; string rawHTML; ArrayList alist; Console.Write("Enter starting URL: "); url=Console.ReadLine(); Console.Write("Destination folder? "); dir=Console.ReadLine(); vbot = new vampirebot(url,dir); rawHTML = vbot.URLtoRawHTML(url); alist = vbot.RawHTMLtoImageList(rawHTML); vbot.ImageListtoFiles(alist); } }
Note that we defined two variables to hold the methods' return values, rawHTML (a string), and alist (an ArrayList). Our completed vampire bot is shown in Listing 72.
Listing 72Completed Vampire Bot
using System; using System.Net; using System.IO; using System.Collections; class vampirebot { string base_url, folder; vampirebot(string url, string dir) { int slash_loc; slash_loc = url.LastIndexOf("/"); base_url = url.Substring(0, slash_loc+1); folder = dir; } public string URLtoRawHTML(string URL) { WebRequest req; WebResponse res; Stream str; string RawHTML; int ch; req = WebRequest.Create(URL); res = req.GetResponse(); str = res.GetResponseStream(); RawHTML = ""; while ((ch=str.ReadByte())!=-1) RawHTML=RawHTML+Convert.ToChar(ch); str.Close(); res.Close(); return RawHTML; } public ArrayList RawHTMLtoImageList(string raw_html) { string patt, spat, epat; int ploc, sloc, eloc; string file; ArrayList list; patt=".gif"; spat="\"" ; epat="\"" ; list = new ArrayList(); ploc=raw_html.IndexOf (patt, 0); while (ploc>=0) { sloc=raw_html.LastIndexOf(spat, ploc)+1; eloc=raw_html.IndexOf (epat, sloc)-1; file=raw_html.Substring (sloc, eloc-sloc+1); ploc=raw_html.IndexOf (patt, eloc); list.Add(file); } return list; } public void ImageListtoFiles(ArrayList file_list) { int i; string filename; FileStream fs; WebRequest req; WebResponse res; Stream str; int ch; for (i=0; i < file_list.Count; i++) { filename=Convert.ToString(file_list[i]); filename=filename.Replace("/", "_"); filename= folder+"/"+filename; fs=new FileStream(filename, FileMode.Create); req = WebRequest.Create(base_url+file_list[i]); res = req.GetResponse(); str = res.GetResponseStream(); while ((ch=str.ReadByte())!=-1) fs.WriteByte(Convert.ToByte(ch)); str.Close(); res.Close(); fs.Close(); } } public static void Main() { string url, dir; vampirebot vbot; string rawHTML; ArrayList alist; Console.Write("Enter starting URL: "); url=Console.ReadLine(); Console.Write("Destination folder? "); dir=Console.ReadLine(); vbot = new vampirebot(url,dir); rawHTML = vbot.URLtoRawHTML(url); alist = vbot.RawHTMLtoImageList(rawHTML); vbot.ImageListtoFiles(alist); } }
That's it! Clearly there's plenty of room for improving this basic vampire botthe basic vampire bot only reads GIF files from one web page. One change you could make is to have it read JPG and other image file formats. Another change is to have the bot traverse other links on the page once it has finished downloading all the images on the current web page. Finally, there is plenty of error-handling code you could add. For example, if the user specifies a web site address without a web page, such as http://www.professorf.com versus http://www.professorf.com/planets.html, the calculation of the base_url variable changes. For an easy change, try making the bot print out the image that it's currently downloading.
If you would like these topics covered in a future article, let us know. Of course, we prefer that you figure them out yourselves, as this is the point of code improvisation and recreational programming in general. Until next time, keep on coding and having fun while you do so!