getElementsByTagName("body")->item(0); // Iterate over words. $words = new DOMWordsIterator($body); foreach ($words as $word) { // If we have exceeded the limit, we delete the remainder of the content. if ($words->key() >= $limit) { // Grab current position. $currentWordPosition = $words->currentWordPosition(); $curNode = $currentWordPosition[0]; $offset = $currentWordPosition[1]; $words = $currentWordPosition[2]; $curNode->nodeValue = substr( $curNode->nodeValue, 0, $words[$offset][1] + strlen($words[$offset][0]) ); self::removeProceedingNodes($curNode, $body); if (!empty($ellipsis)) { self::insertEllipsis($curNode, $ellipsis); } break; } } return self::innerHTML($body); } /** * Safely truncates HTML by a given number of letters. * @param string $html Input HTML. * @param integer $limit Limit to how many letters we preserve. * @param string $ellipsis String to use as ellipsis (if any). * @return string Safe truncated HTML. */ public static function truncateLetters($html, $limit = 0, $ellipsis = "") { if ($limit <= 0) { return $html; } $dom = self::htmlToDomDocument($html); // Grab the body of our DOM. $body = $dom->getElementsByTagName("body")->item(0); // Iterate over letters. $letters = new DOMLettersIterator($body); foreach ($letters as $letter) { // If we have exceeded the limit, we want to delete the remainder of this document. if ($letters->key() >= $limit) { $currentText = $letters->currentTextPosition(); $currentText[0]->nodeValue = substr($currentText[0]->nodeValue, 0, $currentText[1] + 1); self::removeProceedingNodes($currentText[0], $body); if (!empty($ellipsis)) { self::insertEllipsis($currentText[0], $ellipsis); } break; } } return self::innerHTML($body); } /** * Builds a DOMDocument object from a string containing HTML. * @param string $html HTML to load * @returns DOMDocument Returns a DOMDocument object. */ public static function htmlToDomDocument($html) { if (!$html) { $html = '

'; } // Transform multibyte entities which otherwise display incorrectly. $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); // Internal errors enabled as HTML5 not fully supported. libxml_use_internal_errors(true); // Instantiate new DOMDocument object, and then load in UTF-8 HTML. $dom = new DOMDocument(); $dom->encoding = 'UTF-8'; $dom->loadHTML($html); return $dom; } /** * Removes all nodes after the current node. * @param DOMNode|DOMElement $domNode * @param DOMNode|DOMElement $topNode * @return void */ private static function removeProceedingNodes($domNode, $topNode) { $nextNode = $domNode->nextSibling; if ($nextNode !== null) { self::removeProceedingNodes($nextNode, $topNode); $domNode->parentNode->removeChild($nextNode); } else { //scan upwards till we find a sibling $curNode = $domNode->parentNode; while ($curNode !== $topNode) { if ($curNode->nextSibling !== null) { $curNode = $curNode->nextSibling; self::removeProceedingNodes($curNode, $topNode); $curNode->parentNode->removeChild($curNode); break; } $curNode = $curNode->parentNode; } } } /** * Inserts an ellipsis * @param DOMNode|DOMElement $domNode Element to insert after. * @param string $ellipsis Text used to suffix our document. * @return void */ private static function insertEllipsis($domNode, $ellipsis) { $avoid = array('a', 'strong', 'em', 'h1', 'h2', 'h3', 'h4', 'h5'); //html tags to avoid appending the ellipsis to if (in_array($domNode->parentNode->nodeName, $avoid) && $domNode->parentNode->parentNode !== null) { // Append as text node to parent instead $textNode = new DOMText($ellipsis); if ($domNode->parentNode->parentNode->nextSibling) { $domNode->parentNode->parentNode->insertBefore($textNode, $domNode->parentNode->parentNode->nextSibling); } else { $domNode->parentNode->parentNode->appendChild($textNode); } } else { // Append to current node $domNode->nodeValue = rtrim($domNode->nodeValue) . $ellipsis; } } /** * Returns the innerHTML of a particular DOMElement * * @param $element * @return string */ private static function innerHTML($element) { $innerHTML = ""; $children = $element->childNodes; foreach ($children as $child) { $tmp_dom = new DOMDocument(); $tmp_dom->appendChild($tmp_dom->importNode($child, true)); $innerHTML.=trim($tmp_dom->saveHTML()); } return $innerHTML; } }