34 protected $_blocks =
'address article area aside blockquote caption col colgroup dd 35 details div dl dt fieldset figure figcaption footer form h1 h2 h3 h4 h5 h6 header 36 hr hgroup legend map math menu nav noscript p pre section select style summary 37 table tbody td tfoot th thead tr ul ol option li';
42 protected $_inlines =
'a abbr audio b button canvas caption cite code command datalist 43 del dfn em embed i iframe img input ins kbd keygen label map mark meter object 44 output progress q rp rt ruby s samp script select small source span strong style 45 sub sup textarea time var video wbr';
52 protected $_descendList =
'article aside blockquote body details div footer form 60 protected $_alterList =
'article aside blockquote body details div footer header 72 $this->_blocks = preg_split(
'@\\s+@', $this->_blocks);
73 $this->_descendList = preg_split(
'@\\s+@', $this->_descendList);
74 $this->_alterList = preg_split(
'@\\s+@', $this->_alterList);
75 $this->_inlines = preg_split(
'@\\s+@', $this->_inlines);
76 $this->_unique = md5(__FILE__);
92 $html = str_replace([
"\r\n",
"\r"],
"\n",
$html);
95 $html = str_replace(
'&', $this->_unique .
'AMP',
$html);
97 $this->_doc =
new DOMDocument();
101 $use_internal_errors = libxml_use_internal_errors(
true);
104 $disable_load_entities = libxml_disable_entity_loader(
true);
106 if (!$this->_doc->loadHTML(
"<html><meta http-equiv='content-type' " 107 .
"content='text/html; charset={$this->encoding}'><body>{$html}</body>" 109 libxml_use_internal_errors($use_internal_errors);
110 libxml_disable_entity_loader($disable_load_entities);
114 libxml_use_internal_errors($use_internal_errors);
115 libxml_disable_entity_loader($disable_load_entities);
117 $this->_xpath =
new DOMXPath($this->_doc);
120 $nodeList = $this->_xpath->query(
'//body[1]');
121 if ($nodeList->item(0) instanceof DOMText) {
124 $this->_xpath =
new DOMXPath($this->_doc);
125 $nodeList = $this->_xpath->query(
'//body[1]');
127 if ($nodeList->item(0) instanceof DOMText) {
129 throw new \RuntimeException(
'DOMXPath::query for BODY element returned a text node');
135 $html = $this->_doc->saveHTML();
140 $html = preg_replace(
'/(' . $this->_unique .
'NL){2,}/',
'</autop><autop>', $html);
141 $html = str_replace([$this->_unique .
'BR', $this->_unique .
'NL',
'<br>'],
144 $html = str_replace(
'<br /></autop>',
'</autop>', $html);
150 $use_internal_errors = libxml_use_internal_errors(
true);
153 $disable_load_entities = libxml_disable_entity_loader(
true);
155 if (!$this->_doc->loadHTML($html)) {
156 libxml_use_internal_errors($use_internal_errors);
157 libxml_disable_entity_loader($disable_load_entities);
161 libxml_use_internal_errors($use_internal_errors);
162 libxml_disable_entity_loader($disable_load_entities);
165 $this->_xpath =
new DOMXPath($this->_doc);
168 foreach ($this->_xpath->query(
'//autop') as $autop) {
171 if (trim($autop->textContent) !==
'') {
174 foreach ($autop->childNodes as $node) {
175 if ($node->nodeType === XML_ELEMENT_NODE) {
183 $autop->setAttribute(
"r",
"1");
188 foreach ($this->_xpath->query(
'//div') as $el) {
190 $autops = $this->_xpath->query(
'./autop', $el);
191 if ($autops->length === 1) {
192 $firstAutop = $autops->item(0);
194 $firstAutop->setAttribute(
"r",
"1");
198 $html = $this->_doc->saveHTML();
201 $bodyStart = strpos($html,
'<body>');
202 $bodyEnd = strpos($html,
'</body>', $bodyStart + 6);
203 $html = substr($html, $bodyStart + 6, $bodyEnd - $bodyStart - 6);
206 $html = preg_replace(
'@<autop r="1">(.*?)</autop>@',
'\\1', $html);
209 $html = str_replace(
'<autop>',
"\n<p>", $html);
210 $html = str_replace(
'</autop>',
"</p>\n", $html);
212 $html = str_replace(
'<br>',
'<br />', $html);
213 $html = str_replace($this->_unique .
'AMP',
'&', $html);
225 $elsToProcess = [$el];
226 $inlinesToProcess = [];
227 while ($el = array_shift($elsToProcess)) {
230 $alterInline = in_array($el->nodeName, $this->_alterList);
234 $ltrimFirstTextNode =
true;
241 $isFollowingBr =
false;
243 $node = $el->firstChild;
244 while (null !== $node) {
249 $autop = $el->insertBefore($this->_doc->createElement(
'autop'), $node);
253 $isElement = ($node->nodeType === XML_ELEMENT_NODE);
255 $isBlock = in_array($node->nodeName, $this->_blocks);
258 $ltrimFirstTextNode =
false;
265 $isText = ($node->nodeType === XML_TEXT_NODE);
266 $isLastInline = (! $node->nextSibling
267 || ($node->nextSibling->nodeType === XML_ELEMENT_NODE
268 && in_array($node->nextSibling->nodeName, $this->_blocks)));
270 $isFollowingBr = ($node->nodeName ===
'br');
274 $nodeText = $node->nodeValue;
276 if ($ltrimFirstTextNode) {
278 $nodeText = ltrim($nodeText);
279 $ltrimFirstTextNode =
false;
281 if ($isFollowingBr && preg_match(
'@^[ \\t]*\\n[ \\t]*@', $nodeText, $m)) {
283 $nodeText = substr($nodeText, strlen($m[0]));
287 $nodeText = rtrim($nodeText);
289 $nodeText = str_replace(
"\n", $this->_unique .
'NL', $nodeText);
291 $node = $node->nextSibling;
294 $tmpNode->nodeValue = $nodeText;
295 $autop->appendChild($tmpNode);
300 if ($isBlock || ! $node->nextSibling) {
302 if (in_array($node->nodeName, $this->_descendList)) {
303 $elsToProcess[] = $node;
308 $ltrimFirstTextNode =
true;
313 if ($isElement &&
false !== strpos($tmpNode->textContent,
"\n")) {
314 $inlinesToProcess[] = $tmpNode;
316 $node = $node->nextSibling;
317 $autop->appendChild($tmpNode);
322 $node = $node->nextSibling;
328 while ($el = array_shift($inlinesToProcess)) {
329 $ignoreLeadingNewline =
false;
330 foreach ($el->childNodes as $node) {
331 if ($node->nodeType === XML_ELEMENT_NODE) {
332 if ($node->nodeValue ===
'BR') {
333 $ignoreLeadingNewline =
true;
335 $ignoreLeadingNewline =
false;
336 if (
false !== strpos($node->textContent,
"\n")) {
337 $inlinesToProcess[] = $node;
341 }
elseif ($node->nodeType === XML_TEXT_NODE) {
342 $text = $node->nodeValue;
343 if (
$text[0] ===
"\n" && $ignoreLeadingNewline) {
345 $ignoreLeadingNewline =
false;
347 $node->nodeValue = str_replace(
"\n", $this->_unique .
'BR',
$text);
process($html)
Create wrapper P and BR elements in HTML depending on newlines.
if($item instanceof\ElggEntity) elseif($item instanceof\ElggRiverItem) elseif(is_callable([$item, 'getType']))
addParagraphs(DOMElement $el)
Add P and BR elements as necessary.
__construct()
Constructor.