Elgg  Version master
Urls.php
Go to the documentation of this file.
1 <?php
2 
3 namespace Elgg\Http;
4 
11 class Urls {
12 
22  public function addQueryElementsToUrl(string $url, array $elements): string {
23  $url_array = parse_url($url);
24 
25  if (isset($url_array['query'])) {
26  $query = elgg_parse_str($url_array['query']);
27  } else {
28  $query = [];
29  }
30 
31  foreach ($elements as $k => $v) {
32  if ($v === null) {
33  unset($query[$k]);
34  } else {
35  $query[$k] = $v;
36  }
37  }
38 
39  // why check path? A: if no path, this may be a relative URL like "?foo=1". In this case,
40  // the output "" would be interpreted the current URL, so in this case we *must* set
41  // a query to make sure elements are removed.
42  if ($query || empty($url_array['path'])) {
43  $url_array['query'] = http_build_query($query);
44  } else {
45  unset($url_array['query']);
46  }
47 
48  $string = $this->buildUrl($url_array, false);
49 
50  // Restore relative protocol to url if missing and is provided as part of the initial url (see #9874)
51  if (!isset($url_array['scheme']) && (str_starts_with($url, '//'))) {
52  $string = "//{$string}";
53  }
54 
55  return $string;
56  }
57 
69  public function addActionTokensToUrl(string $url, bool $html_encode = false): string {
70  $url = $this->normalizeUrl($url);
71  $components = parse_url($url);
72 
73  if (isset($components['query'])) {
74  $query = elgg_parse_str($components['query']);
75  } else {
76  $query = [];
77  }
78 
79  if (isset($query['__elgg_ts'], $query['__elgg_token'])) {
80  return $url;
81  }
82 
83  // append action tokens to the existing query
84  // CSRF service is not DI injected because Urls is used by installer and CSRF requires DB installed
85  $query['__elgg_ts'] = _elgg_services()->csrf->getCurrentTime()->getTimestamp();
86  $query['__elgg_token'] = _elgg_services()->csrf->generateActionToken($query['__elgg_ts']);
87  $components['query'] = http_build_query($query);
88 
89  // rebuild the full url
90  return $this->buildUrl($components, $html_encode);
91  }
92 
106  public function buildUrl(array $parts, bool $html_encode = true): string {
107  // build only what's given to us
108  $scheme = isset($parts['scheme']) ? "{$parts['scheme']}://" : '';
109  $host = isset($parts['host']) ? "{$parts['host']}" : '';
110  $port = isset($parts['port']) ? ":{$parts['port']}" : '';
111  $path = isset($parts['path']) ? "{$parts['path']}" : '';
112  $query = isset($parts['query']) ? "?{$parts['query']}" : '';
113  $fragment = isset($parts['fragment']) ? "#{$parts['fragment']}" : '';
114 
115  $string = $scheme . $host . $port . $path . $query . $fragment;
116 
117  return $html_encode ? htmlspecialchars($string, ENT_QUOTES, 'UTF-8', false) : $string;
118  }
119 
134  public function normalizeUrl(string $url): string {
135  $url = str_replace(' ', '%20', $url);
136 
137  if ($this->isValidMultiByteUrl($url)) {
138  // fix invalid scheme in site url
139  $protocol_less_site_url = preg_replace('/^https?:/i', ':', elgg_get_site_url());
140  $protocol_less_site_url = rtrim($protocol_less_site_url, '/');
141  $protocol_less_site_url = str_replace('/', '\/', $protocol_less_site_url);
142 
143  return preg_replace("/^https?{$protocol_less_site_url}\/?/i", elgg_get_site_url(), $url);
144  }
145 
146  $matches = [];
147  if (preg_match('#^([a-z]+)\\:#', $url, $matches)) {
148  // we don't let http/https: URLs fail filter_var(), but anything else starting with a protocol
149  // is OK
150  if ($matches[1] !== 'http' && $matches[1] !== 'https') {
151  return $url;
152  }
153  }
154 
155  if (preg_match('#^(\\#|\\?|//)#', $url)) {
156  // starts with '//' (protocol-relative link), query, or fragment
157  return $url;
158  }
159 
160  if (preg_match('#^[^/]*\\.php(\\?.*)?$#', $url)) {
161  // root PHP scripts: 'install.php', 'install.php?step=step'. We don't want to confuse these
162  // for domain names.
163  return elgg_get_site_url() . $url;
164  }
165 
166  if (preg_match('#^[^/?]*\\.#', $url)) {
167  // URLs starting with domain: 'example.com', 'example.com/subpage'
168  return "http://{$url}";
169  }
170 
171  // 'page/handler', 'mod/plugin/file.php'
172  // trim off any leading / because the site URL is stored
173  // with a trailing /
174  return elgg_get_site_url() . ltrim($url, '/');
175  }
176 
190  public function isUrlIdentical(string $url1, string $url2, array $ignore_params): bool {
191  $url1 = $this->normalizeUrl($url1);
192  $url2 = $this->normalizeUrl($url2);
193 
194  if ($url1 === $url2) {
195  return true;
196  }
197 
198  $url1_info = parse_url($url1);
199  $url2_info = parse_url($url2);
200 
201  if (isset($url1_info['path'])) {
202  $url1_info['path'] = trim($url1_info['path'], '/');
203  }
204 
205  if (isset($url2_info['path'])) {
206  $url2_info['path'] = trim($url2_info['path'], '/');
207  }
208 
209  // compare basic bits
210  $parts = ['scheme', 'host', 'path'];
211 
212  foreach ($parts as $part) {
213  if (isset($url1_info[$part], $url2_info[$part]) && $url1_info[$part] !== $url2_info[$part]) {
214  return false;
215  } elseif (isset($url1_info[$part]) && !isset($url2_info[$part])) {
216  return false;
217  } elseif (!isset($url1_info[$part]) && isset($url2_info[$part])) {
218  return false;
219  }
220  }
221 
222  // quick compare of get params
223  if (isset($url1_info['query'], $url2_info['query']) && $url1_info['query'] === $url2_info['query']) {
224  return true;
225  }
226 
227  // compare get params that might be out of order
228  $url1_params = [];
229  $url2_params = [];
230 
231  if (isset($url1_info['query'])) {
232  $url1_info['query'] = html_entity_decode($url1_info['query']);
233  if (!elgg_is_empty($url1_info['query'])) {
234  $url1_params = elgg_parse_str($url1_info['query']);
235  }
236  }
237 
238  if (isset($url2_info['query'])) {
239  $url2_info['query'] = html_entity_decode($url2_info['query']);
240  if (!elgg_is_empty($url2_info['query'])) {
241  $url2_params = elgg_parse_str($url2_info['query']);
242  }
243  }
244 
245  // drop ignored params
246  foreach ($ignore_params as $param) {
247  unset($url1_params[$param]);
248  unset($url2_params[$param]);
249  }
250 
251  // array_diff_assoc only returns the items in arr1 that aren't in arrN
252  // but not the items that ARE in arrN but NOT in arr1
253  // if arr1 is an empty array, this function will return 0 no matter what.
254  // since we only care if they're different and not how different,
255  // add the results together to get a non-zero (ie, different) result
256  $diff_count = count($this->arrayDiffAssocRecursive($url1_params, $url2_params));
257  $diff_count += count($this->arrayDiffAssocRecursive($url2_params, $url1_params));
258  if ($diff_count > 0) {
259  return false;
260  }
261 
262  return true;
263  }
264 
276  public static function isValidMultiByteUrl(string $url): bool {
277  // based on http://php.net/manual/en/function.filter-var.php#104160
278  if (filter_var($url, FILTER_VALIDATE_URL) !== false) {
279  return true;
280  }
281 
282  // Check if it has unicode chars.
283  $l = elgg_strlen($url);
284  if (strlen($url) === $l) {
285  return false;
286  }
287 
288  // Replace wide chars by X
289  $s = '';
290  for ($i = 0; $i < $l; ++$i) {
291  $ch = elgg_substr($url, $i, 1);
292  $s .= (strlen($ch) > 1) ? 'X' : $ch;
293  }
294 
295  // Re-check now.
296  return (bool) filter_var($s, FILTER_VALIDATE_URL);
297  }
298 
307  protected function arrayDiffAssocRecursive(): array {
308  $args = func_get_args();
309  $diff = [];
310 
311  foreach (array_shift($args) as $key => $val) {
312  for ($i = 0, $j = 0, $tmp = [$val], $count = count($args); $i < $count; $i++) {
313  if (is_array($val)) {
314  if (empty($args[$i][$key]) || !is_array($args[$i][$key])) {
315  $j++;
316  } else {
317  $tmp[] = $args[$i][$key];
318  }
319  } elseif (!array_key_exists($key, $args[$i]) || $args[$i][$key] !== $val) {
320  $j++;
321  }
322  }
323 
324  if (is_array($val)) {
325  $tmp = call_user_func_array([$this, 'arrayDiffAssocRecursive'], $tmp);
326  if (!empty($tmp)) {
327  $diff[$key] = $tmp;
328  } elseif ($j == $count) {
329  $diff[$key] = $val;
330  }
331  } elseif ($j == $count && $count) {
332  $diff[$key] = $val;
333  }
334  }
335 
336  return $diff;
337  }
338 }
elgg_parse_str($str)
Elgg UTF-8 string functions.
Definition: mb_wrapper.php:16
buildUrl(array $parts, bool $html_encode=true)
Builds a URL from the a parts array like one returned by parse_url().
Definition: Urls.php:106
$args
Some servers don&#39;t allow PHP to check the rewrite, so try via AJAX.
if($item instanceof\ElggEntity) elseif($item instanceof\ElggRiverItem) elseif($item instanceof\ElggRelationship) elseif(is_callable([$item, 'getType']))
Definition: item.php:48
elgg_strlen()
Wrapper function for mb_strlen().
Definition: mb_wrapper.php:53
elgg_is_empty($value)
Check if a value isn&#39;t empty, but allow 0 and &#39;0&#39;.
Definition: input.php:176
isUrlIdentical(string $url1, string $url2, array $ignore_params)
Test if two URLs are functionally identical.
Definition: Urls.php:190
$path
Definition: details.php:70
static isValidMultiByteUrl(string $url)
Use a "fixed" filter_var() with FILTER_VALIDATE_URL that handles multi-byte chars.
Definition: Urls.php:276
addActionTokensToUrl(string $url, bool $html_encode=false)
Adds action tokens to URL.
Definition: Urls.php:69
$count
Definition: ban.php:24
elgg_get_site_url()
Get the URL for the current (or specified) site, ending with "/".
if($container instanceof ElggGroup &&$container->guid!=elgg_get_page_owner_guid()) $key
Definition: summary.php:44
elgg_substr()
Wrapper function for mb_substr().
Definition: mb_wrapper.php:230
$query
normalizeUrl(string $url)
Definition: Urls.php:134
foreach($plugin_guids as $guid) if(empty($deactivated_plugins)) $url
Definition: deactivate.php:39
_elgg_services()
Get the global service provider.
Definition: elgglib.php:346
arrayDiffAssocRecursive()
Computes the difference of arrays with additional index check.
Definition: Urls.php:307
addQueryElementsToUrl(string $url, array $elements)
Sets elements in a URL&#39;s query string.
Definition: Urls.php:22
Create, sanitize and compare urls.
Definition: Urls.php:11