jsFlags = $jsFlags; $this->htmlOptions = $htmlOptions; } function HTMLDocument($input, $insert='', $inject=false, $footer='') { // // Apply parsing that only needs to be done once.. // // Remove titles if option is enabled if ( $this->htmlOptions['stripTitle'] ) { $input = preg_replace('##is', '', $input, 1); } // Remove and record a href $input = preg_replace_callback('#]{1,1000}))(?(1)\\1|)[^>]*>#i', 'html_stripBase', $input, 1); // Proxify url= values in meta redirects $input = preg_replace_callback('#content\s*=\s*(["\\\'])?[0-9]+\s*;\s*url=([\\\'"]|&\#39;)?((?(?<=")[^"]+|(?(?<=\\\')[^\\\']+|[^\\\'" >]+)))(?(2)\\2|)(?(1)\\1|)#i', 'html_metaRefresh', $input, 1); // Process forms $input = preg_replace_callback('#]*)>(.*?)#is', 'html_form', $input); // Remove scripts blocks (avoids individual processing below) if ( $this->htmlOptions['stripJS'] ) { $input = preg_replace('#]*>.*?#is', '', $input); } // // Split up the document into its different types and parse them // // Build up new document into this var $new = ''; $offset = 0; // Find instances of script or style blocks while ( preg_match('#<(s(?:cript|tyle))[^>]*>#i', $input, $match, PREG_OFFSET_CAPTURE, $offset) ) { // What type of block is this? $block = strtolower($match[1][0]); // Start position of content $outerStart = $match[0][1]; $innerStart = $outerStart + strlen($match[0][0]); // Determine type of end tag and find it's position $endTag = ""; $innerEnd = stripos($input, $endTag, $innerStart); $outerEnd = $innerEnd + strlen($endTag); // Parse everything up till here and add to the new document $new .= $this->HTML(substr($input, $offset, $innerStart - $offset)); // Find parsing function $parseFunction = $block == 'style' ? 'CSS' : 'JS' ; // Add the parsed block $new .= $this->$parseFunction(substr($input, $innerStart, $innerEnd - $innerStart)); // Move offset to new position $offset = $innerEnd; } // And add the final chunk (between last script/style block and end of doc) $new .= $this->HTML(substr($input, $offset)); // Replace input with the updated document $input = $new; // // Now add our own code bits // // Insert our mini form after the if ( $insert !== false ) { // Check for a frameset if ( ( $useFrames = stripos($input, ']+src\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)#i', 'html_flagFrames', $input); } // Attempt to add after body $input = preg_replace('#(]*>)#i', '$1' . $insert, $input, 1, $tmp); // Check it inserted and append (if not a frameset) if ( ! $tmp && ! $useFrames ) { $input = $insert . $input; } } // Insert our javascript library if ( $inject ) { // Generate javascript to insert $inject = injectionJS(); // Add our proxy javascript after $input = preg_replace('#(]*>)#i', '$1' . $inject, $input, 1, $tmp); // If no , just prepend if ( ! $tmp ) { $input = $inject . $input; } } // Add anything to the footer? if ( $footer ) { $input = preg_replace('#(]*>)#i', $footer . '$1', $input, 1, $tmp); // If no , just append the footer if ( ! $tmp ){ $input .= $footer; } } // Return new document return $input; } // Parse HTML sections function HTML($input) { // Removing objects? Follow spec and display inner content of object tags instead. if ( $this->htmlOptions['stripObjects'] ) { // Remove all object tags (including those deprecated but still common) $input = preg_replace('#<(?>object|applet|param|embed)[^>]*>#i', '', $input, -1, $tmp); // Found any? Remove the corresponding end tags if ( $tmp ) { $input = preg_replace('#object|applet|param|embed)>#i', '', $input, $tmp); } } else { // Parse tags $input = preg_replace_callback('#]+value\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)[^>]*>#i', 'html_paramValue', $input); // To do: proxify object related URLs } // Show content within