28 protected $_blocks =
'address article area aside blockquote caption col colgroup dd
29 details div dl dt fieldset figure figcaption footer form h1 h2 h3 h4 h5 h6 header
30 hr hgroup legend map math menu nav noscript p pre section select style summary
31 table tbody td tfoot th thead tr ul ol option li';
36 protected $_inlines =
'a abbr audio b button canvas caption cite code command datalist
37 del dfn em embed i iframe img input ins kbd keygen label map mark meter object
38 output progress q rp rt ruby s samp script select small source span strong style
39 sub sup textarea time var video wbr';
46 protected $_descendList =
'article aside blockquote body details div footer form
54 protected $_alterList =
'article aside blockquote body details div footer header
64 $this->_blocks = preg_split(
'@\\s+@', $this->_blocks);
65 $this->_descendList = preg_split(
'@\\s+@', $this->_descendList);
66 $this->_alterList = preg_split(
'@\\s+@', $this->_alterList);
67 $this->_inlines = preg_split(
'@\\s+@', $this->_inlines);
68 $this->_unique = md5(__FILE__);
84 $html = str_replace(array(
"\r\n",
"\r"),
"\n",
$html);
87 $html = str_replace(
'&', $this->_unique .
'AMP',
$html);
89 $this->_doc =
new DOMDocument();
93 $use_internal_errors = libxml_use_internal_errors(
true);
96 $disable_load_entities = libxml_disable_entity_loader(
true);
98 if (!$this->_doc->loadHTML(
"<html><meta http-equiv='content-type' "
99 .
"content='text/html; charset={$this->encoding}'><body>{$html}</body>"
102 libxml_use_internal_errors($use_internal_errors);
103 libxml_disable_entity_loader($disable_load_entities);
107 libxml_use_internal_errors($use_internal_errors);
108 libxml_disable_entity_loader($disable_load_entities);
110 $this->_xpath =
new DOMXPath($this->_doc);
112 $nodeList = $this->_xpath->query(
'//body[1]');
116 $html = $this->_doc->saveHTML();
121 $html = preg_replace(
'/(' . $this->_unique .
'NL){2,}/',
'</autop><autop>',
$html);
122 $html = str_replace(array($this->_unique .
'BR', $this->_unique .
'NL',
'<br>'),
125 $html = str_replace(
'<br /></autop>',
'</autop>',
$html);
131 $use_internal_errors = libxml_use_internal_errors(
true);
134 $disable_load_entities = libxml_disable_entity_loader(
true);
136 if (!$this->_doc->loadHTML(
$html)) {
137 libxml_use_internal_errors($use_internal_errors);
138 libxml_disable_entity_loader($disable_load_entities);
142 libxml_use_internal_errors($use_internal_errors);
143 libxml_disable_entity_loader($disable_load_entities);
146 $this->_xpath =
new DOMXPath($this->_doc);
149 foreach ($this->_xpath->query(
'//autop') as
$autop) {
152 if (trim(
$autop->textContent) !==
'') {
155 foreach (
$autop->childNodes as $node) {
156 if ($node->nodeType === XML_ELEMENT_NODE) {
164 $autop->setAttribute(
"r",
"1");
169 foreach ($this->_xpath->query(
'//div') as $el) {
171 $autops = $this->_xpath->query(
'./autop', $el);
172 if ($autops->length === 1) {
173 $firstAutop = $autops->item(0);
175 $firstAutop->setAttribute(
"r",
"1");
179 $html = $this->_doc->saveHTML();
182 $bodyStart = strpos(
$html,
'<body>');
183 $bodyEnd = strpos(
$html,
'</body>', $bodyStart + 6);
184 $html = substr(
$html, $bodyStart + 6, $bodyEnd - $bodyStart - 6);
187 $html = preg_replace(
'@<autop r="1">(.*?)</autop>@',
'\\1',
$html);
190 $html = str_replace(
'<autop>',
"\n<p>",
$html);
191 $html = str_replace(
'</autop>',
"</p>\n",
$html);
194 $html = str_replace($this->_unique .
'AMP',
'&',
$html);
206 $elsToProcess = array($el);
207 $inlinesToProcess = array();
208 while ($el = array_shift($elsToProcess)) {
211 $alterInline = in_array($el->nodeName, $this->_alterList);
215 $ltrimFirstTextNode =
true;
222 $isFollowingBr =
false;
224 $node = $el->firstChild;
225 while (
null !== $node) {
230 $autop = $el->insertBefore($this->_doc->createElement(
'autop'), $node);
234 $isElement = ($node->nodeType === XML_ELEMENT_NODE);
236 $isBlock = in_array($node->nodeName, $this->_blocks);
239 $ltrimFirstTextNode =
false;
246 $isText = ($node->nodeType === XML_TEXT_NODE);
247 $isLastInline = (! $node->nextSibling
248 || ($node->nextSibling->nodeType === XML_ELEMENT_NODE
249 && in_array($node->nextSibling->nodeName, $this->_blocks)));
251 $isFollowingBr = ($node->nodeName ===
'br');
255 $nodeText = $node->nodeValue;
257 if ($ltrimFirstTextNode) {
259 $nodeText = ltrim($nodeText);
260 $ltrimFirstTextNode =
false;
262 if ($isFollowingBr && preg_match(
'@^[ \\t]*\\n[ \\t]*@', $nodeText,
$m)) {
264 $nodeText = substr($nodeText, strlen(
$m[0]));
268 $nodeText = rtrim($nodeText);
270 $nodeText = str_replace(
"\n", $this->_unique .
'NL', $nodeText);
272 $node = $node->nextSibling;
275 $tmpNode->nodeValue = $nodeText;
276 $autop->appendChild($tmpNode);
281 if ($isBlock || ! $node->nextSibling) {
283 if (in_array($node->nodeName, $this->_descendList)) {
284 $elsToProcess[] = $node;
289 $ltrimFirstTextNode =
true;
294 if ($isElement &&
false !== strpos($tmpNode->textContent,
"\n")) {
295 $inlinesToProcess[] = $tmpNode;
297 $node = $node->nextSibling;
298 $autop->appendChild($tmpNode);
303 $node = $node->nextSibling;
309 while ($el = array_shift($inlinesToProcess)) {
310 $ignoreLeadingNewline =
false;
311 foreach ($el->childNodes as $node) {
312 if ($node->nodeType === XML_ELEMENT_NODE) {
313 if ($node->nodeValue ===
'BR') {
314 $ignoreLeadingNewline =
true;
316 $ignoreLeadingNewline =
false;
317 if (
false !== strpos($node->textContent,
"\n")) {
318 $inlinesToProcess[] = $node;
322 } elseif ($node->nodeType === XML_TEXT_NODE) {
323 $text = $node->nodeValue;
324 if (
$text[0] ===
"\n" && $ignoreLeadingNewline) {
326 $ignoreLeadingNewline =
false;
328 $node->nodeValue = str_replace(
"\n", $this->_unique .
'BR',
$text);
process($html)
Create wrapper P and BR elements in HTML depending on newlines.
addParagraphs(DOMElement $el)
Add P and BR elements as necessary.
__construct()
Constructor.
foreach($emails as $email) $html