Elgg  Version 4.3
Urls.php
Go to the documentation of this file.
1 <?php
2 
3 namespace Elgg\Http;
4 
11 class Urls {
12 
22  public function addQueryElementsToUrl(string $url, array $elements): string {
23  $url_array = parse_url($url);
24 
25  if (isset($url_array['query'])) {
26  $query = elgg_parse_str($url_array['query']);
27  } else {
28  $query = [];
29  }
30 
31  foreach ($elements as $k => $v) {
32  if ($v === null) {
33  unset($query[$k]);
34  } else {
35  $query[$k] = $v;
36  }
37  }
38 
39  // why check path? A: if no path, this may be a relative URL like "?foo=1". In this case,
40  // the output "" would be interpreted the current URL, so in this case we *must* set
41  // a query to make sure elements are removed.
42  if ($query || empty($url_array['path'])) {
43  $url_array['query'] = http_build_query($query);
44  } else {
45  unset($url_array['query']);
46  }
47 
48  $string = $this->buildUrl($url_array, false);
49 
50  // Restore relative protocol to url if missing and is provided as part of the initial url (see #9874)
51  if (!isset($url['scheme']) && (substr($url, 0, 2) == '//')) {
52  $string = "//{$string}";
53  }
54 
55  return $string;
56  }
57 
69  public function addActionTokensToUrl(string $url, bool $html_encode = false): string {
70  $url = $this->normalizeUrl($url);
71  $components = parse_url($url);
72 
73  if (isset($components['query'])) {
74  $query = elgg_parse_str($components['query']);
75  } else {
76  $query = [];
77  }
78 
79  if (isset($query['__elgg_ts'], $query['__elgg_token'])) {
80  return $url;
81  }
82 
83  // append action tokens to the existing query
84  // CSRF service is not DI injected because Urls is used by installer and CSRF requires DB installed
85  $query['__elgg_ts'] = _elgg_services()->csrf->getCurrentTime()->getTimestamp();
86  $query['__elgg_token'] = _elgg_services()->csrf->generateActionToken($query['__elgg_ts']);
87  $components['query'] = http_build_query($query);
88 
89  // rebuild the full url
90  return $this->buildUrl($components, $html_encode);
91  }
92 
106  public function buildUrl(array $parts, bool $html_encode = true): string {
107  // build only what's given to us
108  $scheme = isset($parts['scheme']) ? "{$parts['scheme']}://" : '';
109  $host = isset($parts['host']) ? "{$parts['host']}" : '';
110  $port = isset($parts['port']) ? ":{$parts['port']}" : '';
111  $path = isset($parts['path']) ? "{$parts['path']}" : '';
112  $query = isset($parts['query']) ? "?{$parts['query']}" : '';
113  $fragment = isset($parts['fragment']) ? "#{$parts['fragment']}" : '';
114 
115  $string = $scheme . $host . $port . $path . $query . $fragment;
116 
117  return $html_encode ? htmlspecialchars($string, ENT_QUOTES, 'UTF-8', false) : $string;
118  }
119 
134  public function normalizeUrl(string $url): string {
135  $url = str_replace(' ', '%20', $url);
136 
137  if ($this->isValidMultiByteUrl($url)) {
138  // fix invalid scheme in site url
139  $protocol_less_site_url = preg_replace('/^https?:/i', ':', elgg_get_site_url());
140  $protocol_less_site_url = rtrim($protocol_less_site_url, '/');
141  $protocol_less_site_url = str_replace('/', '\/', $protocol_less_site_url);
142 
143  return preg_replace("/^https?{$protocol_less_site_url}\/?/i", elgg_get_site_url(), $url);
144  }
145 
146  $matches = [];
147  if (preg_match("#^([a-z]+)\\:#", $url, $matches)) {
148  // we don't let http/https: URLs fail filter_var(), but anything else starting with a protocol
149  // is OK
150  if ($matches[1] !== 'http' && $matches[1] !== 'https') {
151  return $url;
152  }
153  }
154 
155  if (preg_match("#^(\\#|\\?|//)#", $url)) {
156  // starts with '//' (protocol-relative link), query, or fragment
157  return $url;
158  }
159 
160  if (preg_match("#^[^/]*\\.php(\\?.*)?$#", $url)) {
161  // root PHP scripts: 'install.php', 'install.php?step=step'. We don't want to confuse these
162  // for domain names.
163  return elgg_get_site_url() . $url;
164  }
165 
166  if (preg_match("#^[^/?]*\\.#", $url)) {
167  // URLs starting with domain: 'example.com', 'example.com/subpage'
168  return "http://{$url}";
169  }
170 
171  // 'page/handler', 'mod/plugin/file.php'
172  // trim off any leading / because the site URL is stored
173  // with a trailing /
174  return elgg_get_site_url() . ltrim($url, '/');
175  }
176 
190  public function isUrlIdentical(string $url1, string $url2, array $ignore_params): bool {
191  $url1 = $this->normalizeUrl($url1);
192  $url2 = $this->normalizeUrl($url2);
193 
194  if ($url1 === $url2) {
195  return true;
196  }
197 
198  $url1_info = parse_url($url1);
199  $url2_info = parse_url($url2);
200 
201  if (isset($url1_info['path'])) {
202  $url1_info['path'] = trim($url1_info['path'], '/');
203  }
204  if (isset($url2_info['path'])) {
205  $url2_info['path'] = trim($url2_info['path'], '/');
206  }
207 
208  // compare basic bits
209  $parts = ['scheme', 'host', 'path'];
210 
211  foreach ($parts as $part) {
212  if (isset($url1_info[$part], $url2_info[$part]) && $url1_info[$part] !== $url2_info[$part]) {
213  return false;
214  } elseif (isset($url1_info[$part]) && !isset($url2_info[$part])) {
215  return false;
216  } elseif (!isset($url1_info[$part]) && isset($url2_info[$part])) {
217  return false;
218  }
219  }
220 
221  // quick compare of get params
222  if (isset($url1_info['query'], $url2_info['query']) && $url1_info['query'] === $url2_info['query']) {
223  return true;
224  }
225 
226  // compare get params that might be out of order
227  $url1_params = [];
228  $url2_params = [];
229 
230  if (isset($url1_info['query'])) {
231  if ($url1_info['query'] = html_entity_decode($url1_info['query'])) {
232  $url1_params = elgg_parse_str($url1_info['query']);
233  }
234  }
235 
236  if (isset($url2_info['query'])) {
237  if ($url2_info['query'] = html_entity_decode($url2_info['query'])) {
238  $url2_params = elgg_parse_str($url2_info['query']);
239  }
240  }
241 
242  // drop ignored params
243  foreach ($ignore_params as $param) {
244  if (isset($url1_params[$param])) {
245  unset($url1_params[$param]);
246  }
247  if (isset($url2_params[$param])) {
248  unset($url2_params[$param]);
249  }
250  }
251 
252  // array_diff_assoc only returns the items in arr1 that aren't in arrN
253  // but not the items that ARE in arrN but NOT in arr1
254  // if arr1 is an empty array, this function will return 0 no matter what.
255  // since we only care if they're different and not how different,
256  // add the results together to get a non-zero (ie, different) result
257  $diff_count = count($this->arrayDiffAssocRecursive($url1_params, $url2_params));
258  $diff_count += count($this->arrayDiffAssocRecursive($url2_params, $url1_params));
259  if ($diff_count > 0) {
260  return false;
261  }
262 
263  return true;
264  }
265 
277  public static function isValidMultiByteUrl(string $url): bool {
278  // based on http://php.net/manual/en/function.filter-var.php#104160
279  if (filter_var($url, FILTER_VALIDATE_URL) !== false) {
280  return true;
281  }
282 
283  // Check if it has unicode chars.
284  $l = elgg_strlen($url);
285  if (strlen($url) === $l) {
286  return false;
287  }
288 
289  // Replace wide chars by X
290  $s = '';
291  for ($i = 0; $i < $l; ++$i) {
292  $ch = elgg_substr($url, $i, 1);
293  $s .= (strlen($ch) > 1) ? 'X' : $ch;
294  }
295 
296  // Re-check now.
297  return (bool) filter_var($s, FILTER_VALIDATE_URL);
298  }
299 
308  protected function arrayDiffAssocRecursive(): array {
309  $args = func_get_args();
310  $diff = [];
311 
312  foreach (array_shift($args) as $key => $val) {
313  for ($i = 0, $j = 0, $tmp = [$val], $count = count($args); $i < $count; $i++) {
314  if (is_array($val)) {
315  if (!isset($args[$i][$key]) || !is_array($args[$i][$key]) || empty($args[$i][$key])) {
316  $j++;
317  } else {
318  $tmp[] = $args[$i][$key];
319  }
320  } elseif (!array_key_exists($key, $args[$i]) || $args[$i][$key] !== $val) {
321  $j++;
322  }
323  }
324 
325  if (is_array($val)) {
326  $tmp = call_user_func_array([$this, 'arrayDiffAssocRecursive'], $tmp);
327  if (!empty($tmp)) {
328  $diff[$key] = $tmp;
329  } elseif ($j == $count) {
330  $diff[$key] = $val;
331  }
332  } elseif ($j == $count && $count) {
333  $diff[$key] = $val;
334  }
335  }
336 
337  return $diff;
338  }
339 }
elgg_parse_str($str)
Elgg UTF-8 string functions.
Definition: mb_wrapper.php:16
buildUrl(array $parts, bool $html_encode=true)
Builds a URL from the a parts array like one returned by parse_url().
Definition: Urls.php:106
elgg parse_url
Parse a URL into its parts.
Definition: elgglib.js:135
$args
Some servers don&#39;t allow PHP to check the rewrite, so try via AJAX.
elgg_strlen()
Wrapper function for mb_strlen().
Definition: mb_wrapper.php:52
isUrlIdentical(string $url1, string $url2, array $ignore_params)
Test if two URLs are functionally identical.
Definition: Urls.php:190
$path
Definition: details.php:68
static isValidMultiByteUrl(string $url)
Use a "fixed" filter_var() with FILTER_VALIDATE_URL that handles multi-byte chars.
Definition: Urls.php:277
addActionTokensToUrl(string $url, bool $html_encode=false)
Adds action tokens to URL.
Definition: Urls.php:69
$count
Definition: ban.php:24
elgg_get_site_url()
Get the URL for the current (or specified) site, ending with "/".
if($container instanceof ElggGroup &&$container->guid!=elgg_get_page_owner_guid()) $key
Definition: summary.php:44
elgg_substr()
Wrapper function for mb_substr().
Definition: mb_wrapper.php:219
if($item instanceof\ElggEntity) elseif($item instanceof\ElggRiverItem) elseif($item instanceof ElggRelationship) elseif(is_callable([$item, 'getType']))
Definition: item.php:48
$query
normalizeUrl(string $url)
Definition: Urls.php:134
foreach($plugin_guids as $guid) if(empty($deactivated_plugins)) $url
Definition: deactivate.php:39
_elgg_services()
Get the global service provider.
Definition: elgglib.php:638
arrayDiffAssocRecursive()
Computes the difference of arrays with additional index check.
Definition: Urls.php:308
addQueryElementsToUrl(string $url, array $elements)
Sets elements in a URL&#39;s query string.
Definition: Urls.php:22
Create, sanitize and compare urls.
Definition: Urls.php:11