Parsing HTML string with ease
You need Microsoft.mshtml.dll reference. Along with follwing piece of code to load a html string into DOM model
string MsgBodyHtml = "<html>your html string data goes here</html>";
object[] oPageText = { MsgBodyHtml };
HTMLDocument doc = new HTMLDocumentClass();
IHTMLDocument2 doc2 = (IHTMLDocument2)doc;
doc2.write(MsgBodyHtml);
//Now doc2 can be parsed as we do in javascript style.
//Here we go
// In this sample HTML page contains mulitple tables followed by span ( category is written in them)
foreach (IHTMLElement ef in (IHTMLElementCollection)doc2.all.tags("SPAN"))
{
HTMLSpanElement il = (HTMLSpanElement)ef;
if (il.outerText != null && il.outerText.Length > 0 && il.outerText.Contains("Category "))
{
try
{
HTMLTableClass htTable = (HTMLTableClass)((HTMLDTElement)il.parentElement.parentElement).nextSibling;
Category_Body = htTable.outerText;
}
catch (Exception exx) { }
}
}
No comments:
Post a Comment