Elgg  Version master
HtmlFormatter.php
Go to the documentation of this file.
1 <?php
2 
3 namespace Elgg\Views;
4 
8 use Elgg\Traits\Loggable;
10 use Pelago\Emogrifier\CssInliner;
11 use Pelago\Emogrifier\HtmlProcessor\CssToAttributeConverter;
12 
17 
18  use Loggable;
19 
38  public const MENTION_REGEX = '/<a[^>]*?>.*?<\/a>|<.*?>|(^|\s|\!|\.|\?|>|\G)+(@([^\s<&]+))/iu';
39 
47  public function __construct(
48  protected ViewsService $views,
49  protected EventsService $events,
50  protected AutoParagraph $autop
51  ) {
52  }
53 
67  public function formatBlock(string $html, array $options = []): string {
68  $options = array_merge([
69  'parse_urls' => true,
70  'parse_emails' => true,
71  'parse_mentions' => true,
72  'sanitize' => true,
73  'autop' => true,
74  ], $options);
75 
76  $params = [
77  'options' => $options,
78  'html' => $html,
79  ];
80 
81  $params = $this->events->triggerResults('prepare', 'html', [], $params);
82 
83  $html = (string) elgg_extract('html', $params);
84  $options = (array) elgg_extract('options', $params);
85 
86  if (elgg_extract('parse_urls', $options)) {
87  $html = $this->parseUrls($html);
88  }
89 
90  if (elgg_extract('parse_emails', $options)) {
91  $html = $this->parseEmails($html);
92  }
93 
94  if (elgg_extract('parse_mentions', $options)) {
95  $html = $this->parseMentions($html);
96  }
97 
98  if (elgg_extract('sanitize', $options)) {
100  }
101 
102  if (elgg_extract('autop', $options)) {
103  $html = $this->addParagaraphs($html);
104  }
105 
106  return $html;
107  }
108 
116  public function parseUrls(string $text): string {
117 
118  $linkify = new \Misd\Linkify\Linkify();
119 
120  return $linkify->processUrls($text, ['attr' => ['rel' => 'nofollow']]);
121  }
122 
131  public function parseEmails(string $text): string {
132  $linkify = new \Misd\Linkify\Linkify();
133 
134  return $linkify->processEmails($text, ['attr' => ['rel' => 'nofollow']]);
135  }
136 
145  public function parseMentions(string $text): string {
146  $callback = function (array $matches) {
147  $source = elgg_extract(0, $matches);
148  $preceding_char = elgg_extract(1, $matches);
149  $username = elgg_extract(3, $matches);
150 
151  if (empty($username)) {
152  return $source;
153  }
154 
155  try {
156  _elgg_services()->accounts->assertValidUsername($username);
157  } catch (RegistrationException $e) {
158  return $source;
159  }
160 
162 
163  // Catch the trailing period when used as punctuation and not a username.
164  $period = '';
165  if (!$user && str_ends_with($username, '.')) {
166  $user = elgg_get_user_by_username(substr($username, 0, -1));
167  $period = '.';
168  }
169 
170  if (!$user) {
171  return $source;
172  }
173 
174  if (elgg_get_config('mentions_display_format') === 'username') {
175  $replacement = elgg_view_url($user->getURL(), "@{$user->username}");
176  } else {
177  $replacement = elgg_view_url($user->getURL(), $user->getDisplayName());
178  }
179 
180  return $preceding_char . $replacement . $period;
181  };
182 
183  return preg_replace_callback(self::MENTION_REGEX, $callback, $text) ?? $text;
184  }
185 
193  public function addParagaraphs(string $string): string {
194  try {
195  $result = $this->autop->process($string);
196  if ($result !== false) {
197  return $result;
198  }
199  } catch (\RuntimeException $e) {
200  $this->getLogger()->warning('AutoParagraph failed to process the string: ' . $e->getMessage());
201  }
202 
203  return $string;
204  }
205 
230  public function formatAttributes(array $attrs = []): string {
231  if (empty($attrs)) {
232  return '';
233  }
234 
235  $attributes = [];
236 
237  foreach ($attrs as $attr => $val) {
238  if (!str_starts_with($attr, 'data-') && str_contains($attr, '_')) {
239  // this is probably a view $vars variable not meant for output
240  continue;
241  }
242 
243  $attr = strtolower($attr);
244 
245  if (!isset($val) || $val === false) {
246  continue;
247  }
248 
249  if ($val === true) {
250  $val = $attr; //e.g. checked => true ==> checked="checked"
251  }
252 
253  if (is_array($val) && empty($val)) {
254  //e.g. ['class' => []]
255  continue;
256  }
257 
258  if (is_scalar($val)) {
259  $val = [$val];
260  }
261 
262  if (!is_array($val)) {
263  continue;
264  }
265 
266  // Check if array contains non-scalar values and bail if so
267  $filtered_val = array_filter($val, function($e) {
268  return is_scalar($e);
269  });
270 
271  if (count($val) != count($filtered_val)) {
272  continue;
273  }
274 
275  $val = implode(' ', $val);
276 
277  $val = htmlspecialchars($val, ENT_QUOTES, 'UTF-8', false);
278  $attributes[] = "$attr=\"$val\"";
279  }
280 
281  return implode(' ', $attributes);
282  }
283 
311  public function formatElement(string $tag_name, array $attributes = [], string $text = '', array $options = []): string {
312  if ($tag_name === '') {
313  throw new InvalidArgumentException('$tag_name is required');
314  }
315 
316  // from http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
317  $is_void = $options['is_void'] ?? in_array(strtolower($tag_name), [
318  'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem',
319  'meta', 'param', 'source', 'track', 'wbr'
320  ]);
321 
322  if (!empty($options['encode_text']) && is_string($text)) {
323  $double_encode = !empty($options['double_encode']);
324  $text = htmlspecialchars($text, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8', $double_encode);
325  }
326 
327  $attrs = '';
328  if (!empty($attributes)) {
329  $attrs = $this->formatAttributes($attributes);
330  if ($attrs !== '') {
331  $attrs = " $attrs";
332  }
333  }
334 
335  if ($is_void) {
336  return empty($options['is_xml']) ? "<{$tag_name}{$attrs}>" : "<{$tag_name}{$attrs} />";
337  }
338 
339  return "<{$tag_name}{$attrs}>$text</$tag_name>";
340  }
341 
352  public function stripTags(string $string, ?string $allowable_tags = null): string {
353  $params = [
354  'original_string' => $string,
355  'allowable_tags' => $allowable_tags,
356  ];
357 
358  $string = strip_tags($string, $allowable_tags);
359  return (string) $this->events->triggerResults('format', 'strip_tags', $params, $string);
360  }
361 
389  public function decode(string $string): string {
390  $string = str_replace(
391  ['&gt;', '&lt;', '&amp;', '&quot;', '&#039;'],
392  ['&amp;gt;', '&amp;lt;', '&amp;amp;', '&amp;quot;', '&amp;#039;'],
393  $string
394  );
395  $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8');
396  return str_replace(
397  ['&amp;gt;', '&amp;lt;', '&amp;amp;', '&amp;quot;', '&amp;#039;'],
398  ['&gt;', '&lt;', '&amp;', '&quot;', '&#039;'],
399  $string
400  );
401  }
402 
414  public function inlineCss(string $html, string $css, bool $body_only = false): string {
415  if (empty($html) || empty($css)) {
416  return $html;
417  }
418 
419  $html_with_inlined_css = CssInliner::fromHtml($html)->disableStyleBlocksParsing()->inlineCss($css)->render();
420  $inlined_attribute_converter = CssToAttributeConverter::fromHtml($html_with_inlined_css)->convertCssToVisualAttributes();
421 
422  return $body_only ? $inlined_attribute_converter->renderBodyContent() : $inlined_attribute_converter->render();
423  }
424 
434  public function normalizeUrls(string $text): string {
435  $pattern = '/\s(?:href|src)=([\'"]\S+[\'"])/i';
436 
437  // find all matches
438  $matches = [];
439  preg_match_all($pattern, $text, $matches);
440 
441  if (empty($matches) || !isset($matches[1])) {
442  return $text;
443  }
444 
445  // go through all the matches
446  $urls = $matches[1];
447  $urls = array_unique($urls);
448 
449  foreach ($urls as $url) {
450  // remove wrapping quotes from the url
451  $real_url = substr($url, 1, -1);
452  // normalize url
453  $new_url = elgg_normalize_url($real_url);
454  // make the correct replacement string
455  $replacement = str_replace($real_url, $new_url, $url);
456 
457  // replace the url in the content
458  $text = str_replace($url, $replacement, $text);
459  }
460 
461  return $text;
462  }
463 }
$username
Definition: delete.php:23
$params
Saves global plugin settings.
Definition: save.php:13
$attributes
Elgg AJAX loader.
Definition: ajax_loader.php:10
$source
$user
Definition: ban.php:7
$text
Definition: button.php:33
Events service.
Could not register a new user for whatever reason.
Exception thrown if an argument is not of the expected type.
Exception thrown if an error which can only be found on runtime occurs.
Views service.
Create wrapper P and BR elements in HTML depending on newlines.
Various helper method for formatting and sanitizing output.
formatAttributes(array $attrs=[])
Converts an associative array into a string of well-formed HTML/XML attributes Returns a concatenated...
parseUrls(string $text)
Takes a string and turns any URLs into formatted links.
normalizeUrls(string $text)
Replaces relative urls in href or src attributes in text.
__construct(protected ViewsService $views, protected EventsService $events, protected AutoParagraph $autop)
Output constructor.
parseEmails(string $text)
Takes a string and turns any email addresses into formatted links.
decode(string $string)
Decode HTML markup into a raw text string.
addParagaraphs(string $string)
Create paragraphs from text with line spacing.
const MENTION_REGEX
Mentions regex.
parseMentions(string $text)
Takes a string and turns any @ mentions into a formatted link.
stripTags(string $string, ?string $allowable_tags=null)
Strip tags and offer plugins the chance.
inlineCss(string $html, string $css, bool $body_only=false)
Adds inline style to html content.
formatElement(string $tag_name, array $attributes=[], string $text='', array $options=[])
Format an HTML element.
formatBlock(string $html, array $options=[])
Prepare HTML output.
elgg_get_config(string $name, $default=null)
Get an Elgg configuration value.
if($who_can_change_language==='nobody') elseif($who_can_change_language==='admin_only' &&!elgg_is_admin_logged_in()) $options
Definition: language.php:20
foreach($plugin_guids as $guid) if(empty($deactivated_plugins)) $url
Definition: deactivate.php:39
$views
Definition: item.php:17
if($alt_image) $tag_name
Definition: image_block.php:45
_elgg_services()
Get the global service provider.
Definition: elgglib.php:353
elgg_extract($key, $array, $default=null, bool $strict=true)
Checks for $array[$key] and returns its value if it exists, else returns $default.
Definition: elgglib.php:256
elgg_sanitize_input($input)
Filter input from a given string based on registered events.
Definition: input.php:77
elgg_get_user_by_username(string $username, bool $try_email=false)
Get a user by username.
Definition: users.php:31
$css
Definition: install.css.php:5
elgg_view_url(string $href, ?string $text=null, array $options=[])
Helper function for outputting urls.
Definition: views.php:1427
try
Definition: login_as.php:33
if(empty($title) &&empty($body)) if(!empty($link)) $attrs
Definition: message.php:28
elgg_normalize_url(string $url)
Definition: output.php:163
$html
A wrapper to render a section of the page shell.
Definition: section.php:9
if(parse_url(elgg_get_site_url(), PHP_URL_PATH) !=='/') if(file_exists(elgg_get_root_path() . 'robots.txt'))
Set robots.txt.
Definition: robots.php:10