<?php
function lego_clean($text) {
$text = implode("\r",$text);
// normalize white space
$text = eregi_replace("[[:space:]]+", " ", $text);
$text = str_replace("> <",">\r\r<",$text);
$text = str_replace("<br>","<br>\r",$text);
// remove everything before <body>
$text = strstr($text,"<body");
// keep tags, strip attributes
$text = ereg_replace("<p [^>]*BodyTextIndent[^>]*>([^\n|\n\015|\015\n]*)</p>","<p>\\1</p>",$text);
$text = eregi_replace("<p [^>]*margin-left[^>]*>([^\n|\n\015|\015\n]*)</p>","<blockquote>\\1</blockquote>",$text);
$text = str_replace(" ","",$text);
//clean up whatever is left inside <p> and <li>
$text = eregi_replace("<p [^>]*>","<p>",$text);
$text = eregi_replace("<li [^>]*>","<li>",$text);
// kill unwanted tags
$text = eregi_replace("</?span[^>]*>","",$text);
$text = eregi_replace("</?body[^>]*>","",$text);
$text = eregi_replace("</?div[^>]*>","",$text);
$text = eregi_replace("<\![^>]*>","",$text);
$text = eregi_replace("</?[a-z]\:[^>]*>","",$text);
// kill style and on mouse* tags
$text = eregi_replace("([ \f\r\t\n\'\"])style=[^>]+", "\\1", $text);
$text = eregi_replace("([ \f\r\t\n\'\"])on[a-z]+=[^>]+", "\\1", $text);
//remove empty paragraphs
$text = str_replace("<p></p>","",$text);
//remove closing </html>
$text = str_replace("</html>","",$text);
//clean up white space again
$text = eregi_replace("[[:space:]]+", " ", $text);
$text = str_replace("> <",">\r\r<",$text);
$text = str_replace("<br>","<br>\r",$text);
}
?>