%PDF-1.4
Directory : /var/www/vhosts/lautnusantara.com/httpdocs/web/system/ThirdParty/Escaper/ |
<?php declare(strict_types=1); namespace Laminas\Escaper; use function bin2hex; use function ctype_digit; use function hexdec; use function htmlspecialchars; use function in_array; use function mb_convert_encoding; use function ord; use function preg_match; use function preg_replace_callback; use function rawurlencode; use function sprintf; use function strlen; use function strtolower; use function strtoupper; use function substr; use const ENT_QUOTES; use const ENT_SUBSTITUTE; /** * Context specific methods for use in secure output escaping */ class Escaper { /** * Entity Map mapping Unicode codepoints to any available named HTML entities. * * While HTML supports far more named entities, the lowest common denominator * has become HTML5's XML Serialisation which is restricted to the those named * entities that XML supports. Using HTML entities would result in this error: * XML Parsing Error: undefined entity * * @var array<int, string> */ protected static $htmlNamedEntityMap = [ 34 => 'quot', // quotation mark 38 => 'amp', // ampersand 60 => 'lt', // less-than sign 62 => 'gt', // greater-than sign ]; /** * Current encoding for escaping. If not UTF-8, we convert strings from this encoding * pre-escaping and back to this encoding post-escaping. * * @var string */ protected $encoding = 'utf-8'; /** * Holds the value of the special flags passed as second parameter to * htmlspecialchars(). * * @var int */ protected $htmlSpecialCharsFlags; /** * Static Matcher which escapes characters for HTML Attribute contexts * * @var callable * @psalm-var callable(array<array-key, string>):string */ protected $htmlAttrMatcher; /** * Static Matcher which escapes characters for Javascript contexts * * @var callable * @psalm-var callable(array<array-key, string>):string */ protected $jsMatcher; /** * Static Matcher which escapes characters for CSS Attribute contexts * * @var callable * @psalm-var callable(array<array-key, string>):string */ protected $cssMatcher; /** * List of all encoding supported by this class * * @var array */ protected $supportedEncodings = [ 'iso-8859-1', 'iso8859-1', 'iso-8859-5', 'iso8859-5', 'iso-8859-15', 'iso8859-15', 'utf-8', 'cp866', 'ibm866', '866', 'cp1251', 'windows-1251', 'win-1251', '1251', 'cp1252', 'windows-1252', '1252', 'koi8-r', 'koi8-ru', 'koi8r', 'big5', '950', 'gb2312', '936', 'big5-hkscs', 'shift_jis', 'sjis', 'sjis-win', 'cp932', '932', 'euc-jp', 'eucjp', 'eucjp-win', 'macroman', ]; /** * Constructor: Single parameter allows setting of global encoding for use by * the current object. * * @throws Exception\InvalidArgumentException */ public function __construct(?string $encoding = null) { if ($encoding !== null) { if ($encoding === '') { throw new Exception\InvalidArgumentException( static::class . ' constructor parameter does not allow a blank value' ); } $encoding = strtolower($encoding); if (! in_array($encoding, $this->supportedEncodings)) { throw new Exception\InvalidArgumentException( 'Value of \'' . $encoding . '\' passed to ' . static::class . ' constructor parameter is invalid. Provide an encoding supported by htmlspecialchars()' ); } $this->encoding = $encoding; } // We take advantage of ENT_SUBSTITUTE flag to correctly deal with invalid UTF-8 sequences. $this->htmlSpecialCharsFlags = ENT_QUOTES | ENT_SUBSTITUTE; // set matcher callbacks $this->htmlAttrMatcher = [$this, 'htmlAttrMatcher']; $this->jsMatcher = [$this, 'jsMatcher']; $this->cssMatcher = [$this, 'cssMatcher']; } /** * Return the encoding that all output/input is expected to be encoded in. * * @return string */ public function getEncoding() { return $this->encoding; } /** * Escape a string for the HTML Body context where there are very few characters * of special meaning. Internally this will use htmlspecialchars(). * * @return string */ public function escapeHtml(string $string) { return htmlspecialchars($string, $this->htmlSpecialCharsFlags, $this->encoding); } /** * Escape a string for the HTML Attribute context. We use an extended set of characters * to escape that are not covered by htmlspecialchars() to cover cases where an attribute * might be unquoted or quoted illegally (e.g. backticks are valid quotes for IE). * * @return string */ public function escapeHtmlAttr(string $string) { $string = $this->toUtf8($string); if ($string === '' || ctype_digit($string)) { return $string; } $result = preg_replace_callback('/[^a-z0-9,\.\-_]/iSu', $this->htmlAttrMatcher, $string); return $this->fromUtf8($result); } /** * Escape a string for the Javascript context. This does not use json_encode(). An extended * set of characters are escaped beyond ECMAScript's rules for Javascript literal string * escaping in order to prevent misinterpretation of Javascript as HTML leading to the * injection of special characters and entities. The escaping used should be tolerant * of cases where HTML escaping was not applied on top of Javascript escaping correctly. * Backslash escaping is not used as it still leaves the escaped character as-is and so * is not useful in a HTML context. * * @return string */ public function escapeJs(string $string) { $string = $this->toUtf8($string); if ($string === '' || ctype_digit($string)) { return $string; } $result = preg_replace_callback('/[^a-z0-9,\._]/iSu', $this->jsMatcher, $string); return $this->fromUtf8($result); } /** * Escape a string for the URI or Parameter contexts. This should not be used to escape * an entire URI - only a subcomponent being inserted. The function is a simple proxy * to rawurlencode() which now implements RFC 3986 since PHP 5.3 completely. * * @return string */ public function escapeUrl(string $string) { return rawurlencode($string); } /** * Escape a string for the CSS context. CSS escaping can be applied to any string being * inserted into CSS and escapes everything except alphanumerics. * * @return string */ public function escapeCss(string $string) { $string = $this->toUtf8($string); if ($string === '' || ctype_digit($string)) { return $string; } $result = preg_replace_callback('/[^a-z0-9]/iSu', $this->cssMatcher, $string); return $this->fromUtf8($result); } /** * Callback function for preg_replace_callback that applies HTML Attribute * escaping to all matches. * * @param array<array-key, string> $matches * @return string */ protected function htmlAttrMatcher($matches) { $chr = $matches[0]; $ord = ord($chr); /** * The following replaces characters undefined in HTML with the * hex entity for the Unicode replacement character. */ if ( ($ord <= 0x1f && $chr !== "\t" && $chr !== "\n" && $chr !== "\r") || ($ord >= 0x7f && $ord <= 0x9f) ) { return '�'; } /** * Check if the current character to escape has a name entity we should * replace it with while grabbing the integer value of the character. */ if (strlen($chr) > 1) { $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8'); } $hex = bin2hex($chr); $ord = hexdec($hex); if (isset(static::$htmlNamedEntityMap[$ord])) { return '&' . static::$htmlNamedEntityMap[$ord] . ';'; } /** * Per OWASP recommendations, we'll use upper hex entities * for any other characters where a named entity does not exist. */ if ($ord > 255) { return sprintf('&#x%04X;', $ord); } return sprintf('&#x%02X;', $ord); } /** * Callback function for preg_replace_callback that applies Javascript * escaping to all matches. * * @param array<array-key, string> $matches * @return string */ protected function jsMatcher($matches) { $chr = $matches[0]; if (strlen($chr) === 1) { return sprintf('\\x%02X', ord($chr)); } $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8'); $hex = strtoupper(bin2hex($chr)); if (strlen($hex) <= 4) { return sprintf('\\u%04s', $hex); } $highSurrogate = substr($hex, 0, 4); $lowSurrogate = substr($hex, 4, 4); return sprintf('\\u%04s\\u%04s', $highSurrogate, $lowSurrogate); } /** * Callback function for preg_replace_callback that applies CSS * escaping to all matches. * * @param array<array-key, string> $matches * @return string */ protected function cssMatcher($matches) { $chr = $matches[0]; if (strlen($chr) === 1) { $ord = ord($chr); } else { $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8'); $ord = hexdec(bin2hex($chr)); } return sprintf('\\%X ', $ord); } /** * Converts a string to UTF-8 from the base encoding. The base encoding is set via this * * @param string $string * @throws Exception\RuntimeException * @return string */ protected function toUtf8($string) { if ($this->getEncoding() === 'utf-8') { $result = $string; } else { $result = $this->convertEncoding($string, 'UTF-8', $this->getEncoding()); } if (! $this->isUtf8($result)) { throw new Exception\RuntimeException( sprintf('String to be escaped was not valid UTF-8 or could not be converted: %s', $result) ); } return $result; } /** * Converts a string from UTF-8 to the base encoding. The base encoding is set via this * * @param string $string * @return string */ protected function fromUtf8($string) { if ($this->getEncoding() === 'utf-8') { return $string; } return $this->convertEncoding($string, $this->getEncoding(), 'UTF-8'); } /** * Checks if a given string appears to be valid UTF-8 or not. * * @param string $string * @return bool */ protected function isUtf8($string) { return $string === '' || preg_match('/^./su', $string); } /** * Encoding conversion helper which wraps mb_convert_encoding * * @param string $string * @param string $to * @param array|string $from * @return string */ protected function convertEncoding($string, $to, $from) { $result = mb_convert_encoding($string, $to, $from); if ($result === false) { return ''; // return non-fatal blank string on encoding errors from users } return $result; } }