Homepage

How to easily parse HTML without RegEx

May 6th, 2008

I recently discovered an absolutely amazing HTML parsing library for .NET called HtmlAgilityPack. It completely takes away the pain of parsing complicated HTML with regular expressions.

Here’s a very simple example of what you could do with it - I’m just extracting inner HTML from any element inside a HTML file which has a css class called “scrape” assigned to it:

using HtmlAgilityPack;

public partial class _Default : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {
        HtmlDocument doc = new HtmlDocument();
        doc.Load(Server.MapPath(filePath));
        Parse(doc.DocumentNode);
    }
    private void Parse(HtmlNode n)
    {
        foreach (HtmlAttribute atr in n.Attributes)
        {
            if (atr.Name == “class” && atr.Value == “scrape”)
            {
                Response.Write(n.InnerHtml);
            }
        }

        if (n.HasChildNodes)
        {
            foreach (HtmlNode cn in n.ChildNodes)
            {
                Parse(cn);
            }
        }
    }
}

That’s just a very small part of what it could do. I’ll expand upon this and post a few more examples in the future showing some interesting things you could do with this.

J | Programming | No comments Jump to the top of this page

How To Create A Random Fact Generator Using XML

April 23rd, 2008

This is a simple little random fact generator which will show a new fact every time the page loads. After the initial load it will store the XML in the cache until the file is changed again.

XML: (Facts.xml)

<?xml version=”1.0″ encoding=”utf-8″ ?>
<facts>
  <fact>
    The numbers ‘172′ can be found on the back of the U.S. $5 dollar
    bill in the bushes at the base of the Lincoln Memorial.
  </fact>
  <fact>
    President Kennedy was the fastest random speaker in the world
    with upwards of 350 words per minute.
  </fact>
  <fact>
    In the average lifetime, a person will walk the equivalent of 5
    times around the equator.
  </fact>
    .
    .
    .
</facts>

Code: (RandomFact.ascx.cs)

using System;

using System.Data;

using System.Configuration;

using System.Collections;

using System.Web;

using System.Web.Caching;

using System.Web.Security;

using System.Web.UI;

using System.Web.UI.WebControls;

using System.Web.UI.WebControls.WebParts;

using System.Web.UI.HtmlControls;

using System.Xml;

using System.IO;

using System.ComponentModel;

using System.Drawing.Design;

 

public partial class _controls_RandomFact : System.Web.UI.UserControl

{

    private string _xmlDataSource;

    [UrlProperty()]

    public string XMLDataSource

    {

        get { return _xmlDataSource; }

        set { _xmlDataSource = value; }

    }

    protected void Page_Load(object sender, EventArgs e)

    {

        litFact.Text = getRandomFact();

    }

    private string getRandomFact()

    {

        Random rndIndex = new Random();

        XmlDocument xmlDocFacts = new XmlDocument();

        string strFact = string.Empty;

 

        try

        {

            if (Cache[“xmlDocFacts”] != null)

            {

                xmlDocFacts = (XmlDocument)Cache[“xmlDocFacts”];

            }

            else

            {

                xmlDocFacts.Load(Server.MapPath(XMLDataSource));

                Cache.Insert(“xmlDocFacts”, xmlDocFacts, new CacheDependency(Server.MapPath(XMLDataSource)));

            }

 

            XmlNodeList xmlNodesMessage = xmlDocFacts.SelectNodes(“//fact”);

            int rnd = rndIndex.Next(0, xmlNodesMessage.Count);

            strFact = Server.HtmlEncode(xmlNodesMessage[rnd].InnerText);

        }

        catch (Exception ex)

        {

            strFact = string.Format(“<b>Error:</b> {0}”, ex.Message);

        }

 

        return strFact;

    }

}

Usage: (Default.aspx)

<uc1:RandomFact ID=”RandomFact1″ runat=”server” XMLDataSource=”App_Data/Facts.xml” />

J | Programming | No comments Jump to the top of this page

How to manipulate video in .NET using ffmpeg (updated)

April 22nd, 2008

Based on the reader comments on my previous entry on this topic I was able to fix some of the issues that others were experiencing.

I changed how the output is read, instead of reading the entire stream at once, its now read line-by-line as ErrorDataReceived and OutputDataReceived events are raised. Also added an extra option in the command line (-ar 44100) to explicitly set the audio frequency to default since it wasn’t being applied to some video formats resulting in an error. And lastly, the console window is now set as hidden.

private void ConvertVideo(string srcURL, string destURL)
{
    string ffmpegURL = “~/project/tools/ffmpeg.exe”;
    DirectoryInfo directoryInfo = new DirectoryInfo(Path.GetDirectoryName(Server.MapPath(ffmpegURL)));

    ProcessStartInfo startInfo = new ProcessStartInfo();
    startInfo.FileName = Server.MapPath(ffmpegURL);
    startInfo.Arguments = string.Format(“-i \”{0}\” -aspect 1.7777 -ar 44100 -f flv \”{1}\””, srcURL, destURL);
    startInfo.WorkingDirectory = directoryInfo.FullName;
    startInfo.UseShellExecute = false;
    startInfo.RedirectStandardOutput = true;
    startInfo.RedirectStandardInput = true;
    startInfo.RedirectStandardError = true;
    startInfo.CreateNoWindow = true;
    startInfo.WindowStyle = ProcessWindowStyle.Hidden;

    using (Process process = new Process())
    {
        process.StartInfo = startInfo;
        process.EnableRaisingEvents = true;
        process.ErrorDataReceived += new DataReceivedEventHandler(process_ErrorDataReceived);
        process.OutputDataReceived += new DataReceivedEventHandler(process_OutputDataReceived);
        process.Exited += new EventHandler(process_Exited);

        try
        {
            process.Start();
            process.BeginErrorReadLine();
            process.BeginOutputReadLine();
            process.WaitForExit();
        }
        catch (Exception ex)
        {
            lblError.Text = ex.ToString();
        }
        finally
        {
            process.ErrorDataReceived -= new DataReceivedEventHandler(process_ErrorDataReceived);
            process.OutputDataReceived -= new DataReceivedEventHandler(process_OutputDataReceived);
            process.Exited -= new EventHandler(process_Exited);
        }
    }
}
void process_OutputDataReceived(object sender, DataReceivedEventArgs e)
{
    if (e.Data != null)
    {
        lblStdout.Text += e.Data.ToString() + “<br />”;
    }
}
void process_ErrorDataReceived(object sender, DataReceivedEventArgs e)
{
    if (e.Data != null)
    {
        lblStderr.Text += e.Data.ToString() + “<br />”;
    }
}
void process_Exited(object sender, EventArgs e)
{
    //Post-processing code goes here
}

J | Programming | No comments Jump to the top of this page

Three quick ways optimize AJAX driven websites in ASP.NET

April 18th, 2008

Recently I was involved in a project where I had to make heavy use of AJAX. I realized there are a few simple things you could do to improve performance.

1) Combine scripts

<ajaxToolkit:ToolkitScriptManager ID=”TSM1” runat=”Server”
EnablePartialRendering=”true”
CombineScriptsHandlerUrl=”~/CombineScriptsHandler.ashx” />

As the name of the property suggests, it will pretty much combine all the needed JS files into one which in turn will reduce the number of requests sent to the server. You can find a detailed discussion about this here.

It is pretty easy to implement; instead of using the regular ScriptManager, just switch to the ToolkitScriptManager which comes with the AjaxToolkit and then set its CombineScriptsHandlerUrl property as shown above and throw the CombineScriptsHandler.ashx (included in the “SampleWebSite” directory of AjaxControlToolkit’s release package) into the root.

2) Run in release mode

The debug versions of the AJAX library have their source formatting preserved, as well as some debug asserts. By running it in release mode you can shave off some bytes off your requests.

<ajaxToolkit:ToolkitScriptManager ID=”TSM1″ runat=”Server” 
EnablePartialRendering=”true” ScriptMode=”Release” />

Although, its important to note that some versions of Safari don’t seem to be compatible with this and could cause many strange side effects as this person and I have experienced in the past.

On a side note, ASP.NET AJAX Control Toolkit officially does not support Macs with PowerPC processors, its good to know that piece of information if a client ever demands an explanation as for why AJAX powered functionality seems to be broken or not functioning as expected in that environment.

3) Enable script caching and compression in web.config


<system.web.extensions>
  <scripting>
    <scriptResourceHandler enableCompression=true
     enableCaching=true/>
  </scripting>
</system.web.extensions>

This will compress and cache all the script files which are embedded as resources in an assembly, localization objects, and scripts that are served by the script resource handler.

But like the previous tip, there is a exception to this one too. Some versions of IE6 have a bug where they cant’t handle GZIP’d script files correctly. The RTM version of ASP.NET AJAX works around this by explicitly not compressing files for these versions of IE. Although if you are still having a problem, it just might be a safe bet to explicitly set the enableCompression property to false in the web.config.

J | Programming | 2 comments Jump to the top of this page

Request.Browser.Crawler

April 8th, 2008

In my previous post about exception logging, I show how to log several different parameters related to the exception in the database. Request.Browser.Crawler is one of them and its used to track browser crawlers. It warrants its own separate entry since it requires some extra bit of setup in the web.config to get it to work correctly.

You’ll have to add the following code in the section of your web.config file:

<!– This section is used by Request.Browser.Crawler property to detect search engine crawlers –>
<browserCaps>
  <filter>
    <!– SEARCH ENGINES GROUP –>
    <!– check Google (Yahoo uses this as well) –>
    <case match=”^Googlebot(\-Image)?/(?’version’(?’major’\d+)(?’minor’\.\d+)).*”>
      browser=Google
      version=${version}
      majorversion=${major}
      minorversion=${minor}
      crawler=true
    </case>
    <!– check Alta Vista (Scooter) –>
    <case match=”^Scooter(/|-)(?’version’(?’major’\d+)(?’minor’\.\d+)).*”>
      browser=AltaVista
      version=${version}
      majorversion=${major}
      minorversion=${minor}
      crawler=true
    </case>
    <!– check Alta Vista (Mercator) –>
    <case match=”Mercator”>
      browser=AltaVista
      crawler=true
    </case>
    <!– check Slurp (Yahoo uses this as well) –>
    <case match=”Slurp”>
      browser=Slurp
      crawler=true
    </case>
    <!– check MSN –>
    <case match=”MSNBOT”>
      browser=MSN
      crawler=true
    </case>
    <!– check Northern Light –>
    <case match=”^Gulliver/(?’version’(?’major’\d+)(?’minor’\.\d+)).*”>
      browser=NorthernLight
      version=${version}
      majorversion=${major}
      minorversion=${minor}
      crawler=true
    </case>
    <!– check Excite –>
    <case match=”ArchitextSpider”>
      browser=Excite
      crawler=true
    </case>
    <!– Lycos –>
    <case match=”Lycos_Spider”>
      browser=Lycos
      crawler=true
    </case>
    <!– Ask Jeeves –>
    <case match=”Ask Jeeves”>
      browser=AskJeaves
      crawler=true
    </case>
    <!– check Fast –>
    <case match=”^FAST-WebCrawler/(?’version’(?’major’\d+)(?’minor’\.\d+)).*”>
      browser=Fast
      version=${version}
      majorversion=${major}
      minorversion=${minor}
      crawler=true
    </case>
    <!– IBM Research Web Crawler –>
    <case match=”http\:\/\/www\.almaden.ibm.com\/cs\/crawler”>
      browser=IBMResearchWebCrawler
      crawler=true
    </case>
  </filter>
</browserCaps>

Now what does it all mean? Well, IIS uses that information in the <browserCaps> section of your config file to detect whether the client browser is a crawler or not. If you look at it closely, its basically a regular expression filter. I presume you could add more filters in a similar format to detect other kinds of crawlers.

Update: For the most accurate and updated version of browserCaps and other useful browser testing/detection resources you can go to one of these sites:

http://slingfive.com/pages/code/browserCaps/

http://ocean.accesswa.net/browsercaps/

http://browsers.garykeith.com/downloads.asp

J | Programming | No comments Jump to the top of this page

Exception Logging Using The Database

April 8th, 2008

This is a simple technique I use to log exceptions in all my web applications.

First lets start by adding the following to the web.config:

<appSettings>
<add key=”LogUnhandledExceptions” value=”true”/>
</appSettings>

Second, we add the following bit inside the Application_Error event of the Global.asax file. This will capture all the unhandled exceptions and log them into the database:

private static bool logUnhandledExceptions = Convert.ToBoolean(ConfigurationManager.AppSettings[“LogUnhandledExceptions”]);
.
.
.
void Application_Error(object sender, EventArgs e)
{
    if (logUnhandledExceptions)
    {
        if (Context != null)
        {
            if (Server.GetLastError() != null)
            {
                //Get reference to the source of the exception chain
                Exception ex = Context.Server.GetLastError().GetBaseException();

                YourCompany.Helpers.ExceptionHandler.Log(ex);
            }
        }
    }
}

And now finally the main part. This is the class which will log all the relavent information related to the exception in the DB.

using System;
using System.Data;
using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Diagnostics;
using System.Data.SqlClient;
using System.Web.Configuration;

namespace YourCompany.Helpers
{
    public static class ExceptionHandler
    {
        public static void Log(Exception ex)
        {
            if (ex.GetBaseException() != null)
            {
                try
                {
                    HttpContext context = HttpContext.Current;
                    HttpBrowserCapabilities browser = context.Request.Browser;

                    string referer = String.Empty;

                    if (context.Request.UrlReferrer != null)
                    {
                        referer = context.Request.UrlReferrer.ToString();
                    }

                    using (SqlConnection sqlConnection = new SqlConnection(WebConfigurationManager.ConnectionStrings[“ConnectionString”].ConnectionString))
                    {
                        using (SqlCommand sqlCommand = new SqlCommand())
                        {
                            sqlCommand.Connection = sqlConnection;
                            sqlCommand.CommandType = CommandType.StoredProcedure;
                            sqlCommand.CommandText = “EventLog_Insert”;

                            sqlCommand.Parameters.Add(“@Source”, SqlDbType.NVarChar).Value = ex.Source;
                            sqlCommand.Parameters.Add(“@Message”, SqlDbType.NVarChar).Value = ex.Message;
                            sqlCommand.Parameters.Add(“@Form”, SqlDbType.NVarChar).Value = context.Request.Form.ToString();
                            sqlCommand.Parameters.Add(“@Path”, SqlDbType.NVarChar).Value = context.Request.Path.ToString();
                            sqlCommand.Parameters.Add(“@QueryString”, SqlDbType.NVarChar).Value = context.Request.QueryString.ToString();
                            sqlCommand.Parameters.Add(“@TargetSite”, SqlDbType.NVarChar).Value = ex.TargetSite.ToString();
                            sqlCommand.Parameters.Add(“@StackTrace”, SqlDbType.NVarChar).Value = ex.StackTrace.ToString();
                            sqlCommand.Parameters.Add(“@Referer”, SqlDbType.NVarChar).Value = referer;
                            sqlCommand.Parameters.Add(“@MachineName”, SqlDbType.NVarChar).Value = context.Server.MachineName.ToString();
                            sqlCommand.Parameters.Add(“@IPAddress”, SqlDbType.NVarChar).Value = context.Request.UserHostAddress;
                            sqlCommand.Parameters.Add(“@BrowserType”, SqlDbType.NVarChar).Value = browser.Type;
                            sqlCommand.Parameters.Add(“@BrowserName”, SqlDbType.NVarChar).Value = browser.Browser;
                            sqlCommand.Parameters.Add(“@BrowserVersion”, SqlDbType.NVarChar).Value = browser.Version;
                            sqlCommand.Parameters.Add(“@BrowserPlatform”, SqlDbType.NVarChar).Value = browser.Platform;
                            sqlCommand.Parameters.Add(“@SupportsCookies”, SqlDbType.Bit).Value = browser.Cookies;
                            sqlCommand.Parameters.Add(“@IsCrawler”, SqlDbType.Bit).Value = browser.Crawler;

                            sqlConnection.Open();

                            sqlCommand.ExecuteNonQuery();
                        }
                    }
                }
                catch
                {
                    // database error, not much you can do here except logging the error in the windows event log
                    EventLog.WriteEntry(ex.Source, “Database Error From Exception Handler!”, EventLogEntryType.Error);
                }
            }
        }
    }
}

J | Programming | No comments Jump to the top of this page

The Mysterious “Invalid Parameter Used” Error

March 29th, 2008

Recently I was trying to write a little C# function for cropping images. I expected it to be a quick 5 minute task but I ended up spending a huge amount of time getting it to work correctly. I kept getting a very stubborn and mysterious error - “Invalid Parameter Used” - whenever I tried to save my newly cropped image by doing bitmap.Save(). I tried various suggestions I found on forums and blogs to no avail.

Finally I figured out what the problem was. I was encapulatning the Bitmap and Graphics objects inside a “using” statement. So the Bitmap object was being prematurely disposed before I returned it back to the caller.

So in the end my solution looked like this. I just god rid of the “usings”.

public Bitmap CropImage(Image image, Rectangle cropRect)
{
    Bitmap bitmap = new Bitmap(cropRect.Width, cropRect.Height, PixelFormat.Format24bppRgb);
    bitmap.SetResolution(image.HorizontalResolution, image.VerticalResolution);
    Graphics graphics = Graphics.FromImage(bitmap);
    graphics.DrawImage(image, 0, 0, cropRect, GraphicsUnit.Pixel);
    return bitmap;
}

Usage -

Bitmap croppedBmp = CropImage(“~/uploads/test.jpg”, new Rectangle(x, y, width, height));
croppedBmp.Save();

Moral of the story: If you are getting this error while calling Bitmap.Save() then make sure you are not disposing your Bitmap object prematurely. Hope this helps.

J | Programming | No comments Jump to the top of this page

How to unzip files in .NET using SharpZipLib

March 12th, 2008

SharpZipLib is a great open source library for handeling all kinds of gzip/zip compression/decompression. More Info - http://www.icsharpcode.net/OpenSource/SharpZipLib/

In the following example I’m passing the HtmlInputFile object directly into the ZipInputStream to decompress the PostedFile and save its contents on the server.

.
.
using System.IO;
using ICSharpCode.SharpZipLib.Zip
.
.
private void UnzipAndSave(HtmlInputFile objFileUpload)
{
    ZipInputStream s = new ZipInputStream(objFileUpload.PostedFile.InputStream);

    ZipEntry theEntry;
    string virtualPath = “~/uploads/”;
    string fileName = string.Empty;
    string fileExtension = string.Empty;
    string fileSize = string.Empty;

    while ((theEntry = s.GetNextEntry()) != null)
    {
        fileName = Path.GetFileName(theEntry.Name);
        fileExtension = Path.GetExtension(fileName);

        if (!string.IsNullOrEmpty(fileName))
        {
            try
            {
                FileStream streamWriter = File.Create(Server.MapPath(virtualPath + fileName));
                int size = 2048;
                byte[] data = new byte[2048];

                do
                {
                    size = s.Read(data, 0, data.Length);
                    streamWriter.Write(data, 0, size);
                } while (size > 0);

                fileSize = Convert.ToDecimal(streamWriter.Length / 1024).ToString() + ” KB”;

                streamWriter.Close();

                //Add custom code here to add each file to the DB, etc.
            }
            catch (Exception ex)
            {
                Response.Write(ex.ToString());
            }
        }
    }

    s.Close();
}

J | Programming | No comments Jump to the top of this page

How to manipulate video in .NET using ffmpeg

March 12th, 2008

In this case I’m resizing the video and converting it to FLV format. For more ffmpeg commandline options - http://ffmpeg.mplayerhq.hu/ffmpeg-doc.html

private void ConvertVideo(string srcURL, string destURL)
{
    string ffmpegURL = “~/project/tools/ffmpeg.exe”;
    DirectoryInfo directoryInfo = new DirectoryInfo(Path.GetDirectoryName(Server.MapPath(ffmpegURL)));

    ProcessStartInfo startInfo = new ProcessStartInfo();
    startInfo.FileName = Server.MapPath(ffmpegURL);
    startInfo.Arguments = string.Format(“-i \”{0}\” -s 368×216 -aspect 1.7777 \”{1}\”", srcURL, destURL);
    startInfo.WorkingDirectory = directoryInfo.FullName;
    startInfo.UseShellExecute = false;
    startInfo.RedirectStandardOutput = true;
    startInfo.RedirectStandardInput = true;
    startInfo.RedirectStandardError = true;

    using (Process process = new Process())
    {
        process.StartInfo = startInfo;

        try
        {
            process.Start();
            StreamReader standardOutput = process.StandardOutput;
            StreamWriter standardInput = process.StandardInput;
            StreamReader standardError = process.StandardError;
            process.WaitForExit();

            lblError.Text = standardError.ReadToEnd();
            lblOutput.Text = standardOutput.ReadToEnd();
        }
        catch (Exception ex)
        {
            Response.Write(ex.ToString());
        }
    }
}

J | Programming | 7 comments Jump to the top of this page

Event Driven Developement using ASP.NET & C#

February 16th, 2008

Until recently I wasn’t entirely familiar with the concepts of Event Driven Programming, Event Bubbling, etc. Being a .NET developer I’ve been exposed to events and delegates but I never really understood the concept in its entirety. Somehow I had a hard time readily finding good sources online focusing on event driven programming especially using ASP.NET/C#. But I had to get myself more familiarized with the concept since I need to use a lot of that in my current project. So after doing some digging I found couple of good links which explain the concept quite well -

Hope that helps.

J | Programming | No comments Jump to the top of this page


Recently on Flickr

  • IMG_0319
  • IMG_0318
  • IMG_0317
  • IMG_0311
  • IMG_0310
  • IMG_0308
  • IMG_0307
  • IMG_0306

Switch Theme

Meta