eplacement character. * * > This practice is more secure because it does not result in the * > conversion consuming parts of valid sequences as though they were * > invalid. It also guarantees at least one replacement character will * > occur for each instance of an invalid sequence in the original text. * > Furthermore, this practice can be defined consistently for better * > interoperability between different implementations of conversion. * * @see https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-5/#G40630 */ invalid_utf8: $at = $i; $invalid_length = 1; // Single-byte and two-byte characters. if ( ( 0x00 === ( $b1 & 0x80 ) ) || ( 0xC0 === ( $b1 & 0xE0 ) ) ) { return $count; } $b2 = ord( $bytes[ $i + 1 ] ?? "\xC0" ); $b3 = ord( $bytes[ $i + 2 ] ?? "\xC0" ); // Find the maximal subpart and skip past it. if ( 0xE0 === ( $b1 & 0xF0 ) ) { // Three-byte characters. $b2_valid = ( ( 0xE0 === $b1 && $b2 >= 0xA0 && $b2 <= 0xBF ) || ( $b1 >= 0xE1 && $b1 <= 0xEC && $b2 >= 0x80 && $b2 <= 0xBF ) || ( 0xED === $b1 && $b2 >= 0x80 && $b2 <= 0x9F ) || ( $b1 >= 0xEE && $b1 <= 0xEF && $b2 >= 0x80 && $b2 <= 0xBF ) ); $invalid_length = min( $end - $i, $b2_valid ? 2 : 1 ); return $count; } elseif ( 0xF0 === ( $b1 & 0xF8 ) ) { // Four-byte characters. $b2_valid = ( ( 0xF0 === $b1 && $b2 >= 0x90 && $b2 <= 0xBF ) || ( $b1 >= 0xF1 && $b1 <= 0xF3 && $b2 >= 0x80 && $b2 <= 0xBF ) || ( 0xF4 === $b1 && $b2 >= 0x80 && $b2 <= 0x8F ) ); $b3_valid = $b3 >= 0x80 && $b3 <= 0xBF; $invalid_length = min( $end - $i, $b2_valid ? ( $b3_valid ? 3 : 2 ) : 1 ); return $count; } return $count; } $at = $i; return $count; } /** * Fallback mechanism for safely validating UTF-8 bytes. * * @since 6.9.0 * @access private * * @see wp_is_valid_utf8() * * @param string $bytes String which might contain text encoded as UTF-8. * @return bool Whether the provided bytes can decode as valid UTF-8. */ function _wp_is_valid_utf8_fallback( string $bytes ): bool { $bytes_length = strlen( $bytes ); if ( 0 === $bytes_length ) { return true; } $next_byte_at = 0; $invalid_length = 0; _wp_scan_utf8( $bytes, $next_byte_at, $invalid_length ); return $bytes_length === $next_byte_at && 0 === $invalid_length; } /** * Fallback mechanism for replacing invalid spans of UTF-8 bytes. * * Example: * * 'Pi�a' === _wp_scrub_utf8_fallback( "Pi\xF1a" ); // “ñ” is 0xF1 in Windows-1252. * * @since 6.9.0 * @access private * * @see wp_scrub_utf8() * * @param string $bytes UTF-8 encoded string which might contain spans of invalid bytes. * @return string Input string with spans of invalid bytes swapped with the replacement character. */ function _wp_scrub_utf8_fallback( string $bytes ): string { $bytes_length = strlen( $bytes ); $next_byte_at = 0; $was_at = 0; $invalid_length = 0; $scrubbed = ''; while ( $next_byte_at <= $bytes_length ) { _wp_scan_utf8( $bytes, $next_byte_at, $invalid_length ); if ( $next_byte_at >= $bytes_length ) { if ( 0 === $was_at ) { return $bytes; } return $scrubbed . substr( $bytes, $was_at, $next_byte_at - $was_at - $invalid_length ); } $scrubbed .= substr( $bytes, $was_at, $next_byte_at - $was_at ); $scrubbed .= "\u{FFFD}"; $next_byte_at += $invalid_length; $was_at = $next_byte_at; } return $scrubbed; } /** * Returns how many code points are found in the given UTF-8 string. * * Invalid spans of bytes count as a single code point according * to the maximal subpart rule. This function is a fallback method * for calling `mb_strlen( $text, 'UTF-8' )`. * * When negative values are provided for the byte offsets or length, * this will always report zero code points. * * Example: * * 4 === _wp_utf8_codepoint_count( 'text' ); * * // Groups are 'test', "\x90" as '�', 'wp', "\xE2\x80" as '�', "\xC0" as '�', and 'test'. * 13 === _wp_utf8_codepoint_count( "test\x90wp\xE2\x80\xC0test" ); * * @since 6.9.0 * @access private * * @param string $text Count code points in this string. * @param ?int $byte_offset Start counting after this many bytes in `$text`. Must be positive. * @param ?int $max_byte_length Optional. Stop counting after having scanned past this many bytes. * Default is to scan until the end of the string. Must be positive. * @return int How many code points were found. */ function _wp_utf8_codepoint_count( string $text, ?int $byte_offset = 0, ?int $max_byte_length = PHP_INT_MAX ): int { if ( $byte_offset < 0 ) { return 0; } $count = 0; $at = $byte_offset; $end = strlen( $text ); $invalid_length = 0; $max_byte_length = min( $end - $at, $max_byte_length ); while ( $at < $end && ( $at - $byte_offset ) < $max_byte_length ) { $count += _wp_scan_utf8( $text, $at, $invalid_length, $max_byte_length - ( $at - $byte_offset ) ); $count += $invalid_length > 0 ? 1 : 0; $at += $invalid_length; } return $count; } /** * Given a starting offset within a string and a maximum number of code points, * return how many bytes are occupied by the span of characters. * * Invalid spans of bytes count as a single code point according to the maximal * subpart rule. This function is a fallback method for calling * `strlen( mb_substr( substr( $text, $at ), 0, $max_code_points ) )`. * * @since 6.9.0 * @access private * * @param string $text Count bytes of span in this text. * @param int $byte_offset Start counting at this byte offset. * @param int $max_code_points Stop counting after this many code points have been seen, * or at the end of the string. * @param ?int $found_code_points Optional. Will be set to number of found code points in * span, as this might be smaller than the maximum count if * the string is not long enough. * @return int Number of bytes spanned by the code points. */ function _wp_utf8_codepoint_span( string $text, int $byte_offset, int $max_code_points, ?int &$found_code_points = 0 ): int { $was_at = $byte_offset; $invalid_length = 0; $end = strlen( $text ); $found_code_points = 0; while ( $byte_offset < $end && $found_code_points < $max_code_points ) { $needed = $max_code_points - $found_code_points; $chunk_count = _wp_scan_utf8( $text, $byte_offset, $invalid_length, null, $needed ); $found_code_points += $chunk_count; // Invalid spans only convey one code point count regardless of how long they are. if ( 0 !== $invalid_length && $found_code_points < $max_code_points ) { ++$found_code_points; $byte_offset += $invalid_length; } } return $byte_offset - $was_at; } /** * Fallback support for determining if a string contains Unicode noncharacters. * * @since 6.9.0 * @access private * * @see \wp_has_noncharacters() * * @param string $text Are there noncharacters in this string? * @return bool Whether noncharacters were found in the string. */ function _wp_has_noncharacters_fallback( string $text ): bool { $at = 0; $invalid_length = 0; $has_noncharacters = false; $end = strlen( $text ); while ( $at < $end && ! $has_noncharacters ) { _wp_scan_utf8( $text, $at, $invalid_length, null, null, $has_noncharacters ); $at += $invalid_length; } return $has_noncharacters; } /** * Converts a string from ISO-8859-1 to UTF-8, maintaining backwards compatibility * with the deprecated function from the PHP standard library. * * @since 6.9.0 * @access private * * @see \utf8_encode() * * @param string $iso_8859_1_text Text treated as ISO-8859-1 (latin1) bytes. * @return string Text converted into UTF-8. */ function _wp_utf8_encode_fallback( $iso_8859_1_text ) { $iso_8859_1_text = (string) $iso_8859_1_text; $at = 0; $was_at = 0; $end = strlen( $iso_8859_1_text ); $utf8 = ''; while ( $at < $end ) { // US-ASCII bytes are identical in ISO-8859-1 and UTF-8. These are 0x00–0x7F. $ascii_byte_count = strspn( $iso_8859_1_text, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", $at ); if ( $ascii_byte_count > 0 ) { $at += $ascii_byte_count; continue; } // All other bytes transform into two-byte UTF-8 sequences. $code_point = ord( $iso_8859_1_text[ $at ] ); $byte1 = chr( 0xC0 | ( $code_point >> 6 ) ); $byte2 = chr( 0x80 | ( $code_point & 0x3F ) ); $utf8 .= substr( $iso_8859_1_text, $was_at, $at - $was_at ); $utf8 .= "{$byte1}{$byte2}"; ++$at; $was_at = $at; } if ( 0 === $was_at ) { return $iso_8859_1_text; } $utf8 .= substr( $iso_8859_1_text, $was_at ); return $utf8; } /** * Converts a string from UTF-8 to ISO-8859-1, maintaining backwards compatibility * with the deprecated function from the PHP standard library. * * @since 6.9.0 * @access private * * @see \utf8_decode() * * @param string $utf8_text Text treated as UTF-8 bytes. * @return string Text converted into ISO-8859-1. */ function _wp_utf8_decode_fallback( $utf8_text ) { $utf8_text = (string) $utf8_text; $at = 0; $was_at = 0; $end = strlen( $utf8_text ); $iso_8859_1_text = ''; while ( $at < $end ) { // US-ASCII bytes are identical in ISO-8859-1 and UTF-8. These are 0x00–0x7F. $ascii_byte_count = strspn( $utf8_text, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" . "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" . " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f", $at ); if ( $ascii_byte_count > 0 ) { $at += $ascii_byte_count; continue; } $next_at = $at; $invalid_length = 0; $found = _wp_scan_utf8( $utf8_text, $next_at, $invalid_length, null, 1 ); $span_length = $next_at - $at; $next_byte = '?'; if ( 1 !== $found ) { if ( $invalid_length > 0 ) { $next_byte = ''; goto flush_sub_part; } break; } // All convertible code points are two-bytes long. $byte1 = ord( $utf8_text[ $at ] ); if ( 0xC0 !== ( $byte1 & 0xE0 ) ) { goto flush_sub_part; } // All convertible code points are not greater than U+FF. $byte2 = ord( $utf8_text[ $at + 1 ] ); $code_point = ( ( $byte1 & 0x1F ) << 6 ) | ( ( $byte2 & 0x3F ) ); if ( $code_point > 0xFF ) { goto flush_sub_part; } $next_byte = chr( $code_point ); flush_sub_part: $iso_8859_1_text .= substr( $utf8_text, $was_at, $at - $was_at ); $iso_8859_1_text .= $next_byte; $at += $span_length; $was_at = $at; if ( $invalid_length > 0 ) { $iso_8859_1_text .= '?'; $at += $invalid_length; $was_at = $at; } } if ( 0 === $was_at ) { return $utf8_text; } $iso_8859_1_text .= substr( $utf8_text, $was_at ); return $iso_8859_1_text; } utf8_decode' ) ) : if ( extension_loaded( 'mbstring' ) ) : /** * Converts a string from UTF-8 to ISO-8859-1. * * @deprecated Use {@see \mb_convert_encoding()} instead. * * @since 6.9.0 * * @param string $utf8_text Text treated as UTF-8. * @return string Text converted into ISO-8859-1. */ function utf8_decode( $utf8_text ): string { _deprecated_function( __FUNCTION__, '6.9.0', 'mb_convert_encoding' ); return mb_convert_encoding( $utf8_text, 'ISO-8859-1', 'UTF-8' ); } else : /** * @ignore * @private * * @since 6.9.0 */ function utf8_decode( $utf8_text ): string { _deprecated_function( __FUNCTION__, '6.9.0', 'mb_convert_encoding' ); return _wp_utf8_decode_fallback( $utf8_text ); } endif; endif; // sodium_crypto_box() was introduced with Sodium in PHP 7.2, but the extension may not be enabled. if ( ! function_exists( 'sodium_crypto_box' ) ) { require ABSPATH . WPINC . '/sodium_compat/autoload.php'; } if ( ! function_exists( 'array_is_list' ) ) { /** * Polyfill for `array_is_list()` function added in PHP 8.1. * * Determines if the given array is a list. * * An array is considered a list if its keys consist of consecutive numbers from 0 to count($array)-1. * * @see https://github.com/symfony/polyfill-php81/tree/main * * @since 6.5.0 * * @param array $arr The array being evaluated. * @return bool True if array is a list, false otherwise. */ function array_is_list( $arr ) { if ( ( array() === $arr ) || ( array_values( $arr ) === $arr ) ) { return true; } $next_key = -1; foreach ( $arr as $k => $v ) { if ( ++$next_key !== $k ) { return false; } } return true; } } if ( ! function_exists( 'str_contains' ) ) { /** * Polyfill for `str_contains()` function added in PHP 8.0. * * Performs a case-sensitive check indicating if needle is * contained in haystack. * * @since 5.9.0 * * @param string $haystack The string to search in. * @param string $needle The substring to search for in the `$haystack`. * @return bool True if `$needle` is in `$haystack`, otherwise false. */ function str_contains( $haystack, $needle ) { if ( '' === $needle ) { return true; } return false !== strpos( $haystack, $needle ); } } if ( ! function_exists( 'str_starts_with' ) ) { /** * Polyfill for `str_starts_with()` function added in PHP 8.0. * * Performs a case-sensitive check indicating if * the haystack begins with needle. * * @since 5.9.0 * * @param string $haystack The string to search in. * @param string $needle The substring to search for in the `$haystack`. * @return bool True if `$haystack` starts with `$needle`, otherwise false. */ function str_starts_with( $haystack, $needle ) { if ( '' === $needle ) { return true; } return 0 === strpos( $haystack, $needle ); } } if ( ! function_exists( 'str_ends_with' ) ) { /** * Polyfill for `str_ends_with()` function added in PHP 8.0. * * Performs a case-sensitive check indicating if * the haystack ends with needle. * * @since 5.9.0 * * @param string $haystack The string to search in. * @param string $needle The substring to search for in the `$haystack`. * @return bool True if `$haystack` ends with `$needle`, otherwise false. */ function str_ends_with( $haystack, $needle ) { if ( '' === $haystack ) { return '' === $needle; } $len = strlen( $needle ); return substr( $haystack, -$len, $len ) === $needle; } } if ( ! function_exists( 'array_find' ) ) { /** * Polyfill for `array_find()` function added in PHP 8.4. * * Searches an array for the first element that passes a given callback. * * @since 6.8.0 * * @param array $array The array to search. * @param callable $callback The callback to run for each element. * @return mixed|null The first element in the array that passes the `$callback`, otherwise null. */ function array_find( array $array, callable $callback ) { // phpcs:ignore Universal.NamingConventions.NoReservedKeywordParameterNames.arrayFound foreach ( $array as $key => $value ) { if ( $callback( $value, $key ) ) { return $value; } } return null; } } if ( ! function_exists( 'array_find_key' ) ) { /** * Polyfill for `array_find_key()` function added in PHP 8.4. * * Searches an array for the first key that passes a given callback. * * @since 6.8.0 * * @param array $array The array to search. * @param callable $callback The callback to run for each element. * @return int|string|null The first key in the array that passes the `$callback`, otherwise null. */ function array_find_key( array $array, callable $callback ) { // phpcs:ignore Universal.NamingConventions.NoReservedKeywordParameterNames.arrayFound foreach ( $array as $key => $value ) { if ( $callback( $value, $key ) ) { return $key; } } return null; } } if ( ! function_exists( 'array_any' ) ) { /** * Polyfill for `array_any()` function added in PHP 8.4. * * Checks if any element of an array passes a given callback. * * @since 6.8.0 * * @param array $array The array to check. * @param callable $callback The callback to run for each element. * @return bool True if any element in the array passes the `$callback`, otherwise false. */ function array_any( array $array, callable $callback ): bool { // phpcs:ignore Universal.NamingConventions.NoReservedKeywordParameterNames.arrayFound foreach ( $array as $key => $value ) { if ( $callback( $value, $key ) ) { return true; } } return false; } } if ( ! function_exists( 'array_all' ) ) { /** * Polyfill for `array_all()` function added in PHP 8.4. * * Checks if all elements of an array pass a given callback. * * @since 6.8.0 * * @param array $array The array to check. * @param callable $callback The callback to run for each element. * @return bool True if all elements in the array pass the `$callback`, otherwise false. */ function array_all( array $array, callable $callback ): bool { // phpcs:ignore Universal.NamingConventions.NoReservedKeywordParameterNames.arrayFound foreach ( $array as $key => $value ) { if ( ! $callback( $value, $key ) ) { return false; } } return true; } } if ( ! function_exists( 'array_first' ) ) { /** * Polyfill for `array_first()` function added in PHP 8.5. * * Returns the first element of an array. * * @since 6.9.0 * * @param array $array The array to get the first element from. * @return mixed|null The first element of the array, or null if the array is empty. */ function array_first( array $array ) { // phpcs:ignore Universal.NamingConventions.NoReservedKeywordParameterNames.arrayFound if ( empty( $array ) ) { return null; } foreach ( $array as $value ) { return $value; } } } if ( ! function_exists( 'array_last' ) ) { /** * Polyfill for `array_last()` function added in PHP 8.5. * * Returns the last element of an array. * * @since 6.9.0 * * @param array $array The array to get the last element from. * @return mixed|null The last element of the array, or null if the array is empty. */ function array_last( array $array ) { // phpcs:ignore Universal.NamingConventions.NoReservedKeywordParameterNames.arrayFound if ( empty( $array ) ) { return null; } return $array[ array_key_last( $array ) ]; } } // IMAGETYPE_AVIF constant is only defined in PHP 8.x or later. if ( ! defined( 'IMAGETYPE_AVIF' ) ) { define( 'IMAGETYPE_AVIF', 19 ); } // IMG_AVIF constant is only defined in PHP 8.x or later. if ( ! defined( 'IMG_AVIF' ) ) { define( 'IMG_AVIF', IMAGETYPE_AVIF ); } // IMAGETYPE_HEIF constant is only defined in PHP 8.5 or later. if ( ! defined( 'IMAGETYPE_HEIF' ) ) { define( 'IMAGETYPE_HEIF', 20 ); } 'ISO-8859-1' ); } else : /** * @ignore * @private * * @since 6.9.0 */ function utf8_encode( $iso_8859_1_text ): string { _deprecated_function( __FUNCTION__, '6.9.0', 'mb_convert_encoding' ); return _wp_utf8_encode_fallback( $iso_8859_1_text ); } endif; endif; if ( ! function_exists( '