unichr extension 0.0.2

Surrogate (Cs) に加え、 Noncharacter のコードポイントが与えられたときも False を返すように修正したバージョンです。
インストール方法はこちら

<?xml version="1.0" ?>
<extension name="unichr" version="0.0.2">
<summary>Unicode character PHP extension</summary>
<description><![CDATA[
	Converts between UTF-8 string and Unicode code point.
	This extension is in accordance with RFC 3629 - UTF-8, a transformation format of ISO 10646.
]]></description>
<function name="unichr">
	<proto>string unichr(int code)</proto>
	<description><![CDATA[
		Get the UTF-8 encoded string which corresponds to the given code point.
	]]></description>
	<code><![CDATA[
if (/* Out-of-Unicode */
	code < 0 || code > 0x10FFFF ||
	/* Surrogate */
	(code >= 0xD800 && code <= 0xDFFF) ||
	/* Noncharacter */
	(code >= 0xFDD0 && code <= 0xFDEF) || (code & 0xFFFE) == 0xFFFE
) {
	RETURN_FALSE;
}
const char mask = 0x80;
char str[5] = { '\0' };
int len = 0;
if (code < 0x80) {
	/* UTF8-1, US-ASCII */
	str[len++] = (char)code;
} else if (code < 0x800) {
	/* UTF8-2 */
	str[len++] = 0xC0 | (char)(code >> 6);
	str[len++] = mask | (char)(code & 0x3F);
} else if (code < 0x10000) {
	/* UTF8-3 */
	str[len++] = 0xE0 | (char)(code >> 12);
	str[len++] = mask | (char)(code >> 6 & 0x3F);
	str[len++] = mask | (char)(code & 0x3F);
} else {
	/* UTF8-4, surrogate pairs in UTF-16 */
	str[len++] = 0xF0 | (char)(code >> 18);
	str[len++] = mask | (char)(code >> 12 & 0x3F);
	str[len++] = mask | (char)(code >> 6 & 0x3F);
	str[len++] = mask | (char)(code & 0x3F);
}
RETURN_STRINGL(str, len, 1);
	]]></code>
</function>
<function name="uniord">
	<proto>int uniord(string str)</proto>
	<description><![CDATA[
		Get the code point which corresponds to the given UTF-8 encoded string.
	]]></description>
	<code><![CDATA[
if (str_len == 0) {
	RETURN_FALSE;
}
unsigned char flags[5] = { (unsigned char)str[0], '\0' };
unsigned char mask = 0;
int len = 0;
if (flags[0] < 0x80) {
	/* UTF8-1, US-ASCII */
	mask = 0x7F;
	len = 1;
} else if (flags[0] > 0xC1 && flags[0] < 0xE0) {
	/* UTF8-2 */
	if (str_len < 2) {
		RETURN_FALSE;
	}
	flags[1] = (unsigned char)str[1];
	if (flags[1] < 0x80 || flags[1] > 0xBF) {
		RETURN_FALSE;
	}
	mask = 0x1F;
	len = 2;
} else if (flags[0] > 0xDF && flags[0] < 0xF0) {
	/* UTF8-3 */
	if (str_len < 3) {
		RETURN_FALSE;
	}
	flags[1] = (unsigned char)str[1];
	flags[2] = (unsigned char)str[2];
	if (flags[0] == 0xE0) {
		if (flags[1] < 0xA0 || flags[1] > 0xBF) {
			RETURN_FALSE;
		}
	} else if (flags[0] == 0xED) {
		if (flags[1] < 0x80 || flags[1] > 0x9F) {
			RETURN_FALSE;
		}
	} else {
		if (flags[1] < 0x80 || flags[1] > 0xBF) {
			RETURN_FALSE;
		}
	}
	if (flags[2] < 0x80 || flags[2] > 0xBF) {
		RETURN_FALSE;
	}
	mask = 0xF;
	len = 3;
} else if (flags[0] > 0xEF && flags[0] < 0xF5) {
	/* UTF8-4, surrogate pairs */
	if (str_len < 4) {
		RETURN_FALSE;
	}
	flags[1] = (unsigned char)str[1];
	flags[2] = (unsigned char)str[2];
	flags[3] = (unsigned char)str[3];
	if (flags[0] == 0xF0) {
		if (flags[1] < 0x90 || flags[1] > 0xBF) {
			RETURN_FALSE;
		}
	} else if (flags[0] == 0xF4) {
		if (flags[1] < 0x80 || flags[1] > 0x8F) {
			RETURN_FALSE;
		}
	} else {
		if (flags[1] < 0x80 || flags[1] > 0xBF) {
			RETURN_FALSE;
		}
	}
	if (flags[2] < 0x80 || flags[2] > 0xBF || flags[3] < 0x80 || flags[3] > 0xBF) {
		RETURN_FALSE;
	}
	mask = 0x7;
	len = 4;
} else {
	RETURN_FALSE;
}
int pos = 0;
long code = ((long)(flags[pos] & mask)) << (6 * (len - 1));
for (pos = 1; pos < len; pos++) {
	code |= ((long)(flags[pos] & 0x3F)) << (6 * (len - pos - 1));
}
RETURN_LONG(code);
	]]></code>
</function>
</extension>