28 protected $_blocks =
'address article area aside blockquote caption col colgroup dd
29 details div dl dt fieldset figure figcaption footer form h1 h2 h3 h4 h5 h6 header
30 hr hgroup legend map math menu nav noscript p pre section select style summary
31 table tbody td tfoot th thead tr ul ol option li';
36 protected $_inlines =
'a abbr audio b button canvas caption cite code command datalist
37 del dfn em embed i iframe img input ins kbd keygen label map mark meter object
38 output progress q rp rt ruby s samp script select small source span strong style
39 sub sup textarea time var video wbr';
46 protected $_descendList =
'article aside blockquote body details div footer form
54 protected $_alterList =
'article aside blockquote body details div footer header
64 $this->_blocks = preg_split(
'@\\s+@', $this->_blocks);
65 $this->_descendList = preg_split(
'@\\s+@', $this->_descendList);
66 $this->_alterList = preg_split(
'@\\s+@', $this->_alterList);
67 $this->_inlines = preg_split(
'@\\s+@', $this->_inlines);
68 $this->_unique = md5(__FILE__);
84 $html = str_replace(array(
"\r\n",
"\r"),
"\n",
$html);
87 $html = str_replace(
'&', $this->_unique .
'AMP',
$html);
89 $this->_doc =
new DOMDocument();
93 libxml_use_internal_errors(
true);
96 $disable_load_entities = libxml_disable_entity_loader(
true);
98 if (!$this->_doc->loadHTML(
"<html><meta http-equiv='content-type' "
99 .
"content='text/html; charset={$this->encoding}'><body>{$html}</body>"
102 libxml_disable_entity_loader($disable_load_entities);
106 libxml_disable_entity_loader($disable_load_entities);
108 $this->_xpath =
new DOMXPath($this->_doc);
110 $nodeList = $this->_xpath->query(
'//body[1]');
114 $html = $this->_doc->saveHTML();
119 $html = preg_replace(
'/(' . $this->_unique .
'NL){2,}/',
'</autop><autop>',
$html);
120 $html = str_replace(array($this->_unique .
'BR', $this->_unique .
'NL',
'<br>'),
123 $html = str_replace(
'<br /></autop>',
'</autop>',
$html);
128 $disable_load_entities = libxml_disable_entity_loader(
true);
130 if (!$this->_doc->loadHTML(
$html)) {
131 libxml_disable_entity_loader($disable_load_entities);
135 libxml_disable_entity_loader($disable_load_entities);
138 $this->_xpath =
new DOMXPath($this->_doc);
141 foreach ($this->_xpath->query(
'//autop') as $autop) {
144 if (trim($autop->textContent) !==
'') {
147 foreach ($autop->childNodes as $node) {
148 if ($node->nodeType === XML_ELEMENT_NODE) {
156 $autop->setAttribute(
"r",
"1");
161 foreach ($this->_xpath->query(
'//div') as $el) {
163 $autops = $this->_xpath->query(
'./autop', $el);
164 if ($autops->length === 1) {
165 $firstAutop = $autops->item(0);
167 $firstAutop->setAttribute(
"r",
"1");
171 $html = $this->_doc->saveHTML();
174 $bodyStart = strpos(
$html,
'<body>');
175 $bodyEnd = strpos(
$html,
'</body>', $bodyStart + 6);
176 $html = substr(
$html, $bodyStart + 6, $bodyEnd - $bodyStart - 6);
179 $html = preg_replace(
'@<autop r="1">(.*?)</autop>@',
'\\1',
$html);
182 $html = str_replace(
'<autop>',
"\n<p>",
$html);
183 $html = str_replace(
'</autop>',
"</p>\n",
$html);
186 $html = str_replace($this->_unique .
'AMP',
'&',
$html);
198 $elsToProcess = array($el);
199 $inlinesToProcess = array();
200 while ($el = array_shift($elsToProcess)) {
203 $alterInline = in_array($el->nodeName, $this->_alterList);
207 $ltrimFirstTextNode =
true;
214 $isFollowingBr =
false;
216 $node = $el->firstChild;
217 while (
null !== $node) {
222 $autop = $el->insertBefore($this->_doc->createElement(
'autop'), $node);
226 $isElement = ($node->nodeType === XML_ELEMENT_NODE);
228 $isBlock = in_array($node->nodeName, $this->_blocks);
234 $isText = ($node->nodeType === XML_TEXT_NODE);
235 $isLastInline = (! $node->nextSibling
236 || ($node->nextSibling->nodeType === XML_ELEMENT_NODE
237 && in_array($node->nextSibling->nodeName, $this->_blocks)));
239 $isFollowingBr = ($node->nodeName ===
'br');
243 $nodeText = $node->nodeValue;
244 if ($ltrimFirstTextNode) {
245 $nodeText = ltrim($nodeText);
246 $ltrimFirstTextNode =
false;
248 if ($isFollowingBr && preg_match(
'@^[ \\t]*\\n[ \\t]*@', $nodeText,
$m)) {
250 $nodeText = substr($nodeText, strlen(
$m[0]));
253 $nodeText = rtrim($nodeText);
255 $nodeText = str_replace(
"\n", $this->_unique .
'NL', $nodeText);
257 $node = $node->nextSibling;
260 $tmpNode->nodeValue = $nodeText;
261 $autop->appendChild($tmpNode);
266 if ($isBlock || ! $node->nextSibling) {
268 if (in_array($node->nodeName, $this->_descendList)) {
269 $elsToProcess[] = $node;
274 $ltrimFirstTextNode =
true;
279 if ($isElement &&
false !== strpos($tmpNode->textContent,
"\n")) {
280 $inlinesToProcess[] = $tmpNode;
282 $node = $node->nextSibling;
283 $autop->appendChild($tmpNode);
288 $node = $node->nextSibling;
294 while ($el = array_shift($inlinesToProcess)) {
295 $ignoreLeadingNewline =
false;
296 foreach ($el->childNodes as $node) {
297 if ($node->nodeType === XML_ELEMENT_NODE) {
298 if ($node->nodeValue ===
'BR') {
299 $ignoreLeadingNewline =
true;
301 $ignoreLeadingNewline =
false;
302 if (
false !== strpos($node->textContent,
"\n")) {
303 $inlinesToProcess[] = $node;
307 } elseif ($node->nodeType === XML_TEXT_NODE) {
308 $text = $node->nodeValue;
309 if (
$text[0] ===
"\n" && $ignoreLeadingNewline) {
311 $ignoreLeadingNewline =
false;
313 $node->nodeValue = str_replace(
"\n", $this->_unique .
'BR',
$text);
process($html)
Create wrapper P and BR elements in HTML depending on newlines.
addParagraphs(DOMElement $el)
Add P and BR elements as necessary.
__construct()
Constructor.
foreach($emails as $email) $html