Elgg  Version master
/root/Elgg/engine/classes/Elgg/Http/Urls.php

Converts shorthand URLs to absolute URLs, unless the given URL is absolute, protocol-relative, or starts with a protocol/fragment/queryelgg_normalize_url(''); // 'http://my.site.com/' elgg_normalize_url('dashboard'); // 'http://my.site.com/dashboard' elgg_normalize_url('http://google.com/'); // no change elgg_normalize_url('//google.com/'); // no change

Parameters
string$urlThe URL to normalize
Returns
string The absolute URL
<?php
namespace Elgg\Http;
class Urls {
public function addQueryElementsToUrl(string $url, array $elements): string {
$url_array = parse_url($url);
if (isset($url_array['query'])) {
$query = elgg_parse_str($url_array['query']);
} else {
$query = [];
}
foreach ($elements as $k => $v) {
if ($v === null) {
unset($query[$k]);
} else {
$query[$k] = $v;
}
}
// why check path? A: if no path, this may be a relative URL like "?foo=1". In this case,
// the output "" would be interpreted the current URL, so in this case we *must* set
// a query to make sure elements are removed.
if ($query || empty($url_array['path'])) {
$url_array['query'] = http_build_query($query);
} else {
unset($url_array['query']);
}
$string = $this->buildUrl($url_array, false);
// Restore relative protocol to url if missing and is provided as part of the initial url (see #9874)
if (!isset($url_array['scheme']) && (str_starts_with($url, '//'))) {
$string = "//{$string}";
}
return $string;
}
public function addActionTokensToUrl(string $url, bool $html_encode = false): string {
$url = $this->normalizeUrl($url);
$components = parse_url($url);
if (isset($components['query'])) {
$query = elgg_parse_str($components['query']);
} else {
$query = [];
}
if (isset($query['__elgg_ts'], $query['__elgg_token'])) {
return $url;
}
// append action tokens to the existing query
// CSRF service is not DI injected because Urls is used by installer and CSRF requires DB installed
$query['__elgg_ts'] = _elgg_services()->csrf->getCurrentTime()->getTimestamp();
$query['__elgg_token'] = _elgg_services()->csrf->generateActionToken($query['__elgg_ts']);
$components['query'] = http_build_query($query);
// rebuild the full url
return $this->buildUrl($components, $html_encode);
}
public function buildUrl(array $parts, bool $html_encode = true): string {
// build only what's given to us
$scheme = isset($parts['scheme']) ? "{$parts['scheme']}://" : '';
$host = isset($parts['host']) ? "{$parts['host']}" : '';
$port = isset($parts['port']) ? ":{$parts['port']}" : '';
$path = isset($parts['path']) ? "{$parts['path']}" : '';
$query = isset($parts['query']) ? "?{$parts['query']}" : '';
$fragment = isset($parts['fragment']) ? "#{$parts['fragment']}" : '';
$string = $scheme . $host . $port . $path . $query . $fragment;
return $html_encode ? htmlspecialchars($string, ENT_QUOTES, 'UTF-8', false) : $string;
}
public function normalizeUrl(string $url): string {
$url = str_replace(' ', '%20', $url);
if ($this->isValidMultiByteUrl($url)) {
// fix invalid scheme in site url
$protocol_less_site_url = preg_replace('/^https?:/i', ':', elgg_get_site_url());
$protocol_less_site_url = rtrim($protocol_less_site_url, '/');
$protocol_less_site_url = str_replace('/', '\/', $protocol_less_site_url);
return preg_replace("/^https?{$protocol_less_site_url}\/?/i", elgg_get_site_url(), $url);
}
$matches = [];
if (preg_match('#^([a-z]+)\\:#', $url, $matches)) {
// we don't let http/https: URLs fail filter_var(), but anything else starting with a protocol
// is OK
if ($matches[1] !== 'http' && $matches[1] !== 'https') {
return $url;
}
}
if (preg_match('#^(\\#|\\?|//)#', $url)) {
// starts with '//' (protocol-relative link), query, or fragment
return $url;
}
if (preg_match('#^[^/]*\\.php(\\?.*)?$#', $url)) {
// root PHP scripts: 'install.php', 'install.php?step=step'. We don't want to confuse these
// for domain names.
return elgg_get_site_url() . $url;
}
if (preg_match('#^[^/?]*\\.#', $url)) {
// URLs starting with domain: 'example.com', 'example.com/subpage'
return "http://{$url}";
}
// 'page/handler', 'mod/plugin/file.php'
// trim off any leading / because the site URL is stored
// with a trailing /
return elgg_get_site_url() . ltrim($url, '/');
}
public function isUrlIdentical(string $url1, string $url2, array $ignore_params): bool {
$url1 = $this->normalizeUrl($url1);
$url2 = $this->normalizeUrl($url2);
if ($url1 === $url2) {
return true;
}
$url1_info = parse_url($url1);
$url2_info = parse_url($url2);
if (isset($url1_info['path'])) {
$url1_info['path'] = trim($url1_info['path'], '/');
}
if (isset($url2_info['path'])) {
$url2_info['path'] = trim($url2_info['path'], '/');
}
// compare basic bits
$parts = ['scheme', 'host', 'path'];
foreach ($parts as $part) {
if (isset($url1_info[$part], $url2_info[$part]) && $url1_info[$part] !== $url2_info[$part]) {
return false;
} elseif (isset($url1_info[$part]) && !isset($url2_info[$part])) {
return false;
} elseif (!isset($url1_info[$part]) && isset($url2_info[$part])) {
return false;
}
}
// quick compare of get params
if (isset($url1_info['query'], $url2_info['query']) && $url1_info['query'] === $url2_info['query']) {
return true;
}
// compare get params that might be out of order
$url1_params = [];
$url2_params = [];
if (isset($url1_info['query'])) {
$url1_info['query'] = html_entity_decode($url1_info['query']);
if (!elgg_is_empty($url1_info['query'])) {
$url1_params = elgg_parse_str($url1_info['query']);
}
}
if (isset($url2_info['query'])) {
$url2_info['query'] = html_entity_decode($url2_info['query']);
if (!elgg_is_empty($url2_info['query'])) {
$url2_params = elgg_parse_str($url2_info['query']);
}
}
// drop ignored params
foreach ($ignore_params as $param) {
unset($url1_params[$param]);
unset($url2_params[$param]);
}
// array_diff_assoc only returns the items in arr1 that aren't in arrN
// but not the items that ARE in arrN but NOT in arr1
// if arr1 is an empty array, this function will return 0 no matter what.
// since we only care if they're different and not how different,
// add the results together to get a non-zero (ie, different) result
$diff_count = count($this->arrayDiffAssocRecursive($url1_params, $url2_params));
$diff_count += count($this->arrayDiffAssocRecursive($url2_params, $url1_params));
if ($diff_count > 0) {
return false;
}
return true;
}
public static function isValidMultiByteUrl(string $url): bool {
// based on http://php.net/manual/en/function.filter-var.php#104160
if (filter_var($url, FILTER_VALIDATE_URL) !== false) {
return true;
}
// Check if it has unicode chars.
$l = elgg_strlen($url);
if (strlen($url) === $l) {
return false;
}
// Replace wide chars by X
$s = '';
for ($i = 0; $i < $l; ++$i) {
$ch = elgg_substr($url, $i, 1);
$s .= (strlen($ch) > 1) ? 'X' : $ch;
}
// Re-check now.
return (bool) filter_var($s, FILTER_VALIDATE_URL);
}
protected function arrayDiffAssocRecursive(): array {
$args = func_get_args();
$diff = [];
foreach (array_shift($args) as $key => $val) {
for ($i = 0, $j = 0, $tmp = [$val], $count = count($args); $i < $count; $i++) {
if (is_array($val)) {
if (empty($args[$i][$key]) || !is_array($args[$i][$key])) {
$j++;
} else {
$tmp[] = $args[$i][$key];
}
} elseif (!array_key_exists($key, $args[$i]) || $args[$i][$key] !== $val) {
$j++;
}
}
if (is_array($val)) {
$tmp = call_user_func_array([$this, 'arrayDiffAssocRecursive'], $tmp);
if (!empty($tmp)) {
$diff[$key] = $tmp;
} elseif ($j == $count) {
$diff[$key] = $val;
}
} elseif ($j == $count && $count) {
$diff[$key] = $val;
}
}
return $diff;
}
}