Elgg  Version 6.3
HtmlFormatter.php
Go to the documentation of this file.
1 <?php
2 
3 namespace Elgg\Views;
4 
8 use Elgg\Traits\Loggable;
10 use Pelago\Emogrifier\CssInliner;
11 use Pelago\Emogrifier\HtmlProcessor\CssToAttributeConverter;
12 use Pelago\Emogrifier\HtmlProcessor\CssVariableEvaluator;
13 
18 
19  use Loggable;
20 
39  public const MENTION_REGEX = '/<a[^>]*?>.*?<\/a>|<.*?>|(^|\s|\!|\.|\?|>|\G)+(@([^\s<&]+))/iu';
40 
46  protected const BLOCK_LEVEL_ELEMENTS = [
47  'address',
48  'article',
49  'aside',
50  'blockquote',
51  'br', // not a block level element, but we still want a space
52  'canvas',
53  'dd',
54  'div',
55  'dl',
56  'dt',
57  'fieldset',
58  'figcaption',
59  'figure',
60  'footer',
61  'form',
62  'h[1-6]',
63  'header',
64  'hr',
65  'li',
66  'main',
67  'nav',
68  'noscript',
69  'ol',
70  'p',
71  'pre',
72  'section',
73  'table',
74  'tfoot',
75  'ul',
76  'video',
77  ];
78 
86  public function __construct(
87  protected ViewsService $views,
88  protected EventsService $events,
89  protected AutoParagraph $autop
90  ) {
91  }
92 
106  public function formatBlock(string $html, array $options = []): string {
107  $options = array_merge([
108  'parse_urls' => true,
109  'parse_emails' => true,
110  'parse_mentions' => true,
111  'sanitize' => true,
112  'autop' => true,
113  ], $options);
114 
115  $params = [
116  'options' => $options,
117  'html' => $html,
118  ];
119 
120  $params = $this->events->triggerResults('prepare', 'html', [], $params);
121 
122  $html = (string) elgg_extract('html', $params);
123  $options = (array) elgg_extract('options', $params);
124 
125  if (elgg_extract('parse_urls', $options)) {
126  $html = $this->parseUrls($html);
127  }
128 
129  if (elgg_extract('parse_emails', $options)) {
130  $html = $this->parseEmails($html);
131  }
132 
133  if (elgg_extract('parse_mentions', $options)) {
134  $html = $this->parseMentions($html);
135  }
136 
137  if (elgg_extract('sanitize', $options)) {
139  }
140 
141  if (elgg_extract('autop', $options)) {
142  $html = $this->addParagaraphs($html);
143  }
144 
145  return $html;
146  }
147 
155  public function parseUrls(string $text): string {
156 
157  $linkify = new \Misd\Linkify\Linkify();
158 
159  return $linkify->processUrls($text, ['attr' => ['rel' => 'nofollow']]);
160  }
161 
170  public function parseEmails(string $text): string {
171  $linkify = new \Misd\Linkify\Linkify();
172 
173  return $linkify->processEmails($text, ['attr' => ['rel' => 'nofollow']]);
174  }
175 
184  public function parseMentions(string $text): string {
185  $callback = function (array $matches) {
186  $source = elgg_extract(0, $matches);
187  $preceding_char = elgg_extract(1, $matches);
188  $username = elgg_extract(3, $matches);
189 
190  if (empty($username)) {
191  return $source;
192  }
193 
194  try {
195  _elgg_services()->accounts->assertValidUsername($username);
196  } catch (RegistrationException $e) {
197  return $source;
198  }
199 
201 
202  // Catch the trailing period when used as punctuation and not a username.
203  $period = '';
204  if (!$user && str_ends_with($username, '.')) {
205  $user = elgg_get_user_by_username(substr($username, 0, -1));
206  $period = '.';
207  }
208 
209  if (!$user) {
210  return $source;
211  }
212 
213  if (elgg_get_config('mentions_display_format') === 'username') {
214  $replacement = elgg_view_url($user->getURL(), "@{$user->username}");
215  } else {
216  $replacement = elgg_view_url($user->getURL(), $user->getDisplayName());
217  }
218 
219  return $preceding_char . $replacement . $period;
220  };
221 
222  return preg_replace_callback(self::MENTION_REGEX, $callback, $text) ?? $text;
223  }
224 
232  public function addParagaraphs(string $string): string {
233  try {
234  $result = $this->autop->process($string);
235  if ($result !== false) {
236  return $result;
237  }
238  } catch (\RuntimeException $e) {
239  $this->getLogger()->warning('AutoParagraph failed to process the string: ' . $e->getMessage());
240  }
241 
242  return $string;
243  }
244 
269  public function formatAttributes(array $attrs = []): string {
270  if (empty($attrs)) {
271  return '';
272  }
273 
274  $attributes = [];
275 
276  foreach ($attrs as $attr => $val) {
277  if (!str_starts_with($attr, 'data-') && str_contains($attr, '_')) {
278  // this is probably a view $vars variable not meant for output
279  continue;
280  }
281 
282  $attr = strtolower($attr);
283 
284  if (!isset($val) || $val === false) {
285  continue;
286  }
287 
288  if ($val === true) {
289  $val = $attr; //e.g. checked => true ==> checked="checked"
290  }
291 
292  if (is_array($val) && empty($val)) {
293  //e.g. ['class' => []]
294  continue;
295  }
296 
297  if (is_scalar($val)) {
298  $val = [$val];
299  }
300 
301  if (!is_array($val)) {
302  continue;
303  }
304 
305  // Check if array contains non-scalar values and bail if so
306  $filtered_val = array_filter($val, function($e) {
307  return is_scalar($e);
308  });
309 
310  if (count($val) != count($filtered_val)) {
311  continue;
312  }
313 
314  $val = implode(' ', $val);
315 
316  $val = htmlspecialchars($val, ENT_QUOTES, 'UTF-8', false);
317  $attributes[] = "$attr=\"$val\"";
318  }
319 
320  return implode(' ', $attributes);
321  }
322 
350  public function formatElement(string $tag_name, array $attributes = [], string $text = '', array $options = []): string {
351  if ($tag_name === '') {
352  throw new InvalidArgumentException('$tag_name is required');
353  }
354 
355  // from http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
356  $is_void = $options['is_void'] ?? in_array(strtolower($tag_name), [
357  'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem',
358  'meta', 'param', 'source', 'track', 'wbr'
359  ]);
360 
361  if (!empty($options['encode_text']) && is_string($text)) {
362  $double_encode = !empty($options['double_encode']);
363  $text = htmlspecialchars($text, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8', $double_encode);
364  }
365 
366  $attrs = '';
367  if (!empty($attributes)) {
368  $attrs = $this->formatAttributes($attributes);
369  if ($attrs !== '') {
370  $attrs = " $attrs";
371  }
372  }
373 
374  if ($is_void) {
375  return empty($options['is_xml']) ? "<{$tag_name}{$attrs}>" : "<{$tag_name}{$attrs} />";
376  }
377 
378  return "<{$tag_name}{$attrs}>$text</$tag_name>";
379  }
380 
391  public function stripTags(string $string, ?string $allowable_tags = null): string {
392  $params = [
393  'original_string' => $string,
394  'allowable_tags' => $allowable_tags,
395  ];
396 
397  $space_placeholder = '{{elgg_space}}';
398  $string = preg_replace('/(\S)<(' . implode('|', self::BLOCK_LEVEL_ELEMENTS) . ')([ >\/])/', '$1' . $space_placeholder . '<$2$3', $string);
399  $string = strip_tags($string, $allowable_tags);
400  $string = preg_replace('/(' . $space_placeholder . ')+/', ' ', $string);
401 
402  return (string) $this->events->triggerResults('format', 'strip_tags', $params, $string);
403  }
404 
432  public function decode(string $string): string {
433  $string = str_replace(
434  ['&gt;', '&lt;', '&amp;', '&quot;', '&#039;'],
435  ['&amp;gt;', '&amp;lt;', '&amp;amp;', '&amp;quot;', '&amp;#039;'],
436  $string
437  );
438  $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8');
439  return str_replace(
440  ['&amp;gt;', '&amp;lt;', '&amp;amp;', '&amp;quot;', '&amp;#039;'],
441  ['&gt;', '&lt;', '&amp;', '&quot;', '&#039;'],
442  $string
443  );
444  }
445 
457  public function inlineCss(string $html, string $css, bool $body_only = false): string {
458  if (empty($html) || empty($css)) {
459  return $html;
460  }
461 
462  $html_with_inlined_css = CssInliner::fromHtml($html)->disableStyleBlocksParsing()->inlineCss($css)->render();
463  $html_with_css_variables = CssVariableEvaluator::fromHtml($html_with_inlined_css)->evaluateVariables()->render();
464  $inlined_attribute_converter = CssToAttributeConverter::fromHtml($html_with_css_variables)->convertCssToVisualAttributes();
465 
466  return $body_only ? $inlined_attribute_converter->renderBodyContent() : $inlined_attribute_converter->render();
467  }
468 
478  public function normalizeUrls(string $text): string {
479  $pattern = '/\s(?:href|src)=([\'"]\S+[\'"])/i';
480 
481  // find all matches
482  $matches = [];
483  preg_match_all($pattern, $text, $matches);
484 
485  if (empty($matches) || !isset($matches[1])) {
486  return $text;
487  }
488 
489  // go through all the matches
490  $urls = $matches[1];
491  $urls = array_unique($urls);
492 
493  foreach ($urls as $url) {
494  // remove wrapping quotes from the url
495  $real_url = substr($url, 1, -1);
496  // normalize url
497  $new_url = elgg_normalize_url($real_url);
498  // make the correct replacement string
499  $replacement = str_replace($real_url, $new_url, $url);
500 
501  // replace the url in the content
502  $text = str_replace($url, $replacement, $text);
503  }
504 
505  return $text;
506  }
507 }
$username
Definition: delete.php:23
$params
Saves global plugin settings.
Definition: save.php:13
$attributes
Elgg AJAX loader.
Definition: ajax_loader.php:10
$source
$user
Definition: ban.php:7
$text
Definition: button.php:33
Events service.
Could not register a new user for whatever reason.
Exception thrown if an argument is not of the expected type.
Exception thrown if an error which can only be found on runtime occurs.
Views service.
Create wrapper P and BR elements in HTML depending on newlines.
Various helper methods for formatting and sanitizing output.
formatAttributes(array $attrs=[])
Converts an associative array into a string of well-formed HTML/XML attributes Returns a concatenated...
parseUrls(string $text)
Takes a string and turns any URLs into formatted links.
normalizeUrls(string $text)
Replaces relative urls in href or src attributes in text.
__construct(protected ViewsService $views, protected EventsService $events, protected AutoParagraph $autop)
Output constructor.
parseEmails(string $text)
Takes a string and turns any email addresses into formatted links.
decode(string $string)
Decode HTML markup into a raw text string.
addParagaraphs(string $string)
Create paragraphs from text with line spacing.
const MENTION_REGEX
Mentions regex.
parseMentions(string $text)
Takes a string and turns any @ mentions into a formatted link.
const BLOCK_LEVEL_ELEMENTS
Set of block level HTML elements used for stripTags()
stripTags(string $string, ?string $allowable_tags=null)
Strip tags and offer plugins the chance.
inlineCss(string $html, string $css, bool $body_only=false)
Adds inline style to html content.
formatElement(string $tag_name, array $attributes=[], string $text='', array $options=[])
Format an HTML element.
formatBlock(string $html, array $options=[])
Prepare HTML output.
elgg_get_config(string $name, $default=null)
Get an Elgg configuration value.
if($who_can_change_language==='nobody') elseif($who_can_change_language==='admin_only' &&!elgg_is_admin_logged_in()) $options
Definition: language.php:20
foreach($plugin_guids as $guid) if(empty($deactivated_plugins)) $url
Definition: deactivate.php:39
$views
Definition: item.php:17
if($alt_image) $tag_name
Definition: image_block.php:45
_elgg_services()
Get the global service provider.
Definition: elgglib.php:337
elgg_extract($key, $array, $default=null, bool $strict=true)
Checks for $array[$key] and returns its value if it exists, else returns $default.
Definition: elgglib.php:240
elgg_sanitize_input($input)
Filter input from a given string based on registered events.
Definition: input.php:77
elgg_get_user_by_username(string $username, bool $try_email=false)
Get a user by username.
Definition: users.php:31
$css
Definition: install.css.php:5
elgg_view_url(string $href, ?string $text=null, array $options=[])
Helper function for outputting urls.
Definition: views.php:1430
try
Definition: login_as.php:33
if(empty($title) &&empty($body)) if(!empty($link)) $attrs
Definition: message.php:28
elgg_normalize_url(string $url)
Definition: output.php:163
$html
A wrapper to render a section of the page shell.
Definition: section.php:9
if(parse_url(elgg_get_site_url(), PHP_URL_PATH) !=='/') if(file_exists(elgg_get_root_path() . 'robots.txt'))
Set robots.txt.
Definition: robots.php:10