diff --git a/include/parrot/encoding.h b/include/parrot/encoding.h index 75055b8..67d2866 100644 --- a/include/parrot/encoding.h +++ b/include/parrot/encoding.h @@ -30,8 +30,6 @@ typedef size_t (*encoding_hash_t)(PARROT_INTERP, const STRING *s, size_t hashval struct string_iterator_t; /* s. parrot/string.h */ -typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src, - struct string_iterator_t *); typedef UINTVAL (*encoding_iter_get_t)( PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset); typedef void (*encoding_iter_skip_t)( @@ -54,7 +52,6 @@ struct _encoding { encoding_get_bytes_t get_bytes; encoding_codepoints_t codepoints; encoding_bytes_t bytes; - encoding_iter_init_t iter_init; encoding_find_cclass_t find_cclass; encoding_hash_t hash; encoding_iter_get_t iter_get; @@ -223,8 +220,6 @@ void Parrot_str_internal_register_encoding_names(PARROT_INTERP) ((src)->encoding)->codepoints((i), (src)) #define ENCODING_BYTES(i, src) \ ((src)->encoding)->bytes((i), (src)) -#define ENCODING_ITER_INIT(i, src, iter) \ - ((src)->encoding)->iter_init((i), (src), (iter)) #define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \ ((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end)) #define ENCODING_HASH(i, src, seed) \ diff --git a/include/parrot/string.h b/include/parrot/string.h index 7d87f8e..ddd9254 100644 --- a/include/parrot/string.h +++ b/include/parrot/string.h @@ -29,12 +29,8 @@ typedef enum Forward_flag { /* String iterator */ typedef struct string_iterator_t { - const STRING *str; UINTVAL bytepos; UINTVAL charpos; - UINTVAL (*get_and_advance)(PARROT_INTERP, struct string_iterator_t *i); - void (*set_and_advance)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL c); - void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos); } String_iter; #define STRING_ITER_INIT(i, iter) \ diff --git a/src/string/encoding/fixed_8.c b/src/string/encoding/fixed_8.c index 13448f4..7733d87 100644 --- a/src/string/encoding/fixed_8.c +++ b/src/string/encoding/fixed_8.c @@ -41,11 +41,6 @@ static UINTVAL find_cclass(SHIM_INTERP, __attribute__nonnull__(2) __attribute__nonnull__(3); -static UINTVAL fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*iter); - static UINTVAL fixed8_iter_get(PARROT_INTERP, ARGIN(const STRING *str), ARGIN(const String_iter *iter), @@ -88,19 +83,6 @@ static void fixed8_iter_skip(SHIM_INTERP, __attribute__nonnull__(3) FUNC_MODIFIES(*iter); -static void fixed8_set_next(PARROT_INTERP, - ARGMOD(String_iter *iter), - UINTVAL c) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*iter); - -static void fixed8_set_position(SHIM_INTERP, - ARGMOD(String_iter *iter), - UINTVAL pos) - __attribute__nonnull__(2) - FUNC_MODIFIES(*iter); - static size_t fixed_8_hash(SHIM_INTERP, ARGIN(const STRING *s), size_t hashval) @@ -137,13 +119,6 @@ static STRING * get_codepoints(PARROT_INTERP, __attribute__nonnull__(1) __attribute__nonnull__(2); -static void iter_init(SHIM_INTERP, - ARGIN(const STRING *src), - ARGOUT(String_iter *iter)) - __attribute__nonnull__(2) - __attribute__nonnull__(3) - FUNC_MODIFIES(*iter); - static void set_byte(PARROT_INTERP, ARGIN(const STRING *source_string), UINTVAL offset, @@ -164,9 +139,6 @@ static STRING * to_encoding(PARROT_INTERP, SHIM(const STRING *src)) #define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(s) \ , PARROT_ASSERT_ARG(typetable)) -#define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ @@ -185,11 +157,6 @@ static STRING * to_encoding(PARROT_INTERP, SHIM(const STRING *src)) #define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(str) \ , PARROT_ASSERT_ARG(iter)) -#define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(iter)) -#define ASSERT_ARGS_fixed8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_fixed_8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(s)) #define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ @@ -203,9 +170,6 @@ static STRING * to_encoding(PARROT_INTERP, SHIM(const STRING *src)) #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(source_string)) -#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(src) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(source_string)) @@ -540,88 +504,6 @@ fixed8_iter_set_position(SHIM_INTERP, /* -=item C - -Moves the string iterator C to the next codepoint. - -=cut - -*/ - -static UINTVAL -fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter)) -{ - ASSERT_ARGS(fixed8_get_next) - const UINTVAL c = get_byte(interp, iter->str, iter->charpos++); - iter->bytepos++; - return c; -} - -/* - -=item C - -With the string iterator C, appends the codepoint C and advances to the -next position in the string. - -=cut - -*/ - -static void -fixed8_set_next(PARROT_INTERP, ARGMOD(String_iter *iter), UINTVAL c) -{ - ASSERT_ARGS(fixed8_set_next) - set_byte(interp, iter->str, iter->charpos++, c); - iter->bytepos++; -} - -/* - -=item C - -Moves the string iterator C to the position C in the string. - -=cut - -*/ - -static void -fixed8_set_position(SHIM_INTERP, ARGMOD(String_iter *iter), UINTVAL pos) -{ - ASSERT_ARGS(fixed8_set_position) - iter->bytepos = iter->charpos = pos; - PARROT_ASSERT(pos <= Buffer_buflen(iter->str)); -} - - -/* - -=item C - -Initializes for string C the string iterator C. - -=cut - -*/ - -static void -iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter)) -{ - ASSERT_ARGS(iter_init) - iter->str = src; - iter->bytepos = iter->charpos = 0; - iter->get_and_advance = fixed8_get_next; - iter->set_and_advance = fixed8_set_next; - iter->set_position = fixed8_set_position; -} - - -/* - =item C @@ -674,7 +556,6 @@ Parrot_encoding_fixed_8_init(PARROT_INTERP) get_bytes, codepoints, bytes, - iter_init, find_cclass, fixed_8_hash, fixed8_iter_get, diff --git a/src/string/encoding/ucs2.c b/src/string/encoding/ucs2.c index 5136d16..3b60fde 100644 --- a/src/string/encoding/ucs2.c +++ b/src/string/encoding/ucs2.c @@ -84,14 +84,6 @@ static STRING * get_codepoints(PARROT_INTERP, __attribute__nonnull__(1) __attribute__nonnull__(2); -static void iter_init(PARROT_INTERP, - ARGIN(const STRING *src), - ARGOUT(String_iter *iter)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - __attribute__nonnull__(3) - FUNC_MODIFIES(*iter); - static void set_byte(PARROT_INTERP, SHIM(const STRING *src), SHIM(UINTVAL offset), @@ -104,16 +96,6 @@ static STRING * to_encoding(PARROT_INTERP, ARGIN(const STRING *src)) __attribute__nonnull__(1) __attribute__nonnull__(2); -static UINTVAL ucs2_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i)) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - -static void ucs2_encode_and_advance(SHIM_INTERP, - ARGMOD(String_iter *i), - UINTVAL c) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - static size_t ucs2_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval) @@ -164,12 +146,6 @@ static void ucs2_iter_skip(PARROT_INTERP, __attribute__nonnull__(3) FUNC_MODIFIES(*i); -static void ucs2_set_position(SHIM_INTERP, - ARGMOD(String_iter *i), - UINTVAL n) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(src)) #define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ @@ -189,19 +165,11 @@ static void ucs2_set_position(SHIM_INTERP, #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) -#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(src) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp)) #define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) -#define ASSERT_ARGS_ucs2_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_ucs2_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(s)) @@ -225,8 +193,6 @@ static void ucs2_set_position(SHIM_INTERP, PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ , PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(i)) /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ /* HEADERIZER END: static */ @@ -609,75 +575,6 @@ ucs2_iter_set_position(PARROT_INTERP, /* -=item C - -Moves the string iterator C to the next UCS-2 codepoint. - -=cut - -*/ - -static UINTVAL -ucs2_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i)) -{ - ASSERT_ARGS(ucs2_decode_and_advance) - -#if PARROT_HAS_ICU - const UChar * const s = (const UChar*) i->str->strstart; - size_t pos = i->bytepos / sizeof (UChar); - - /* TODO either make sure that we don't go past end or use SAFE - * iter versions - */ - const UChar c = s[pos++]; - i->charpos++; - i->bytepos = pos * sizeof (UChar); - return c; -#else - /* This function must never be called if compiled without ICU. - * See TT #557 - */ - PARROT_ASSERT(0); - UNUSED(i); - return (UINTVAL)0; /* Stop the static analyzers from panicing */ -#endif -} - -/* - -=item C - -With the string iterator C, appends the codepoint C and advances to the -next position in the string. - -=cut - -*/ - -static void -ucs2_encode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL c) -{ - ASSERT_ARGS(ucs2_encode_and_advance) - -#if PARROT_HAS_ICU - UChar *s = (UChar*) i->str->strstart; - UINTVAL pos = i->bytepos / sizeof (UChar); - s[pos++] = (UChar)c; - i->charpos++; - i->bytepos = pos * sizeof (UChar); -#else - /* This function must never be called if compiled without ICU. - * See TT #557 - */ - UNUSED(i); - UNUSED(c); - PARROT_ASSERT(0); -#endif -} - -/* - =item C Returns the hashed value of the string, given a seed in hashval. @@ -710,66 +607,6 @@ ucs2_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval) #endif } - -/* - -=item C - -Moves the string iterator C to the position C in the string. - -=cut - -*/ - -static void -ucs2_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n) -{ - ASSERT_ARGS(ucs2_set_position) - -#if PARROT_HAS_ICU - i->charpos = n; - i->bytepos = n * sizeof (UChar); -#else - /* This function must never be called if compiled without ICU. - * See TT #557 - */ - UNUSED(i); - UNUSED(n); - PARROT_ASSERT(0); -#endif -} - - -/* - -=item C - -Initializes for string C the string iterator C. - -=cut - -*/ - -static void -iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter)) -{ - ASSERT_ARGS(iter_init) -#if PARROT_HAS_ICU - UNUSED(interp); - iter->str = src; - iter->bytepos = 0; - iter->charpos = 0; - iter->get_and_advance = ucs2_decode_and_advance; - iter->set_and_advance = ucs2_encode_and_advance; - iter->set_position = ucs2_set_position; -#else - UNUSED(src); - UNUSED(iter); - no_ICU_lib(interp); -#endif -} - /* =item C @@ -797,7 +634,6 @@ Parrot_encoding_ucs2_init(PARROT_INTERP) get_bytes, codepoints, bytes, - iter_init, find_cclass, ucs2_hash, ucs2_iter_get, diff --git a/src/string/encoding/utf16.c b/src/string/encoding/utf16.c index 6df6e96..73fa10f 100644 --- a/src/string/encoding/utf16.c +++ b/src/string/encoding/utf16.c @@ -75,14 +75,6 @@ static STRING * get_codepoints(PARROT_INTERP, __attribute__nonnull__(1) __attribute__nonnull__(2); -static void iter_init(PARROT_INTERP, - ARGIN(const STRING *src), - ARGOUT(String_iter *iter)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - __attribute__nonnull__(3) - FUNC_MODIFIES(*iter); - static void set_byte(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, @@ -96,17 +88,6 @@ static STRING * to_encoding(PARROT_INTERP, ARGIN(const STRING *src)) __attribute__nonnull__(1) __attribute__nonnull__(2); -PARROT_WARN_UNUSED_RESULT -static UINTVAL utf16_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i)) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - -static void utf16_encode_and_advance(SHIM_INTERP, - ARGMOD(String_iter *i), - UINTVAL c) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - static UINTVAL utf16_iter_get(PARROT_INTERP, ARGIN(const STRING *str), ARGIN(const String_iter *i), @@ -152,12 +133,6 @@ static void utf16_iter_skip(PARROT_INTERP, __attribute__nonnull__(3) FUNC_MODIFIES(*i); -static void utf16_set_position(SHIM_INTERP, - ARGMOD(String_iter *i), - UINTVAL n) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(src)) #define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ @@ -178,20 +153,12 @@ static void utf16_set_position(SHIM_INTERP, #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) -#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(src) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) #define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) -#define ASSERT_ARGS_utf16_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ @@ -212,8 +179,6 @@ static void utf16_set_position(SHIM_INTERP, PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ , PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(i)) /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ /* HEADERIZER END: static */ @@ -717,111 +682,6 @@ utf16_iter_set_position(PARROT_INTERP, #endif } -#if PARROT_HAS_ICU -/* - -=item C - -Moves the string iterator C to the next UTF-16 codepoint. - -=cut - -*/ - -PARROT_WARN_UNUSED_RESULT -static UINTVAL -utf16_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i)) -{ - ASSERT_ARGS(utf16_decode_and_advance) - const UChar * const s = (const UChar*) i->str->strstart; - UINTVAL pos = i->bytepos / sizeof (UChar); - UINTVAL c; - - /* TODO either make sure that we don't go past end or use SAFE - * iter versions - */ - U16_NEXT_UNSAFE(s, pos, c); - i->charpos++; - i->bytepos = pos * sizeof (UChar); - return c; -} - -/* - -=item C - -With the string iterator C, appends the codepoint C and advances to the -next position in the string. - -=cut - -*/ - -static void -utf16_encode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL c) -{ - ASSERT_ARGS(utf16_encode_and_advance) - UChar * const s = (UChar*) i->str->strstart; - UINTVAL pos = i->bytepos / sizeof (UChar); - U16_APPEND_UNSAFE(s, pos, c); - i->charpos++; - i->bytepos = pos * sizeof (UChar); -} - -/* - -=item C - -Moves the string iterator C to the position C in the string. - -=cut - -*/ - -static void -utf16_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n) -{ - ASSERT_ARGS(utf16_set_position) - UChar * const s = (UChar*) i->str->strstart; - UINTVAL pos; - pos = 0; - U16_FWD_N_UNSAFE(s, pos, n); - i->charpos = n; - i->bytepos = pos * sizeof (UChar); -} - -#endif - -/* - -=item C - -Initializes for string C the string iterator C. - -=cut - -*/ - -static void -iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter)) -{ - ASSERT_ARGS(iter_init) - iter->str = src; - iter->bytepos = iter->charpos = 0; -#if PARROT_HAS_ICU - UNUSED(interp); - iter->get_and_advance = utf16_decode_and_advance; - iter->set_and_advance = utf16_encode_and_advance; - iter->set_position = utf16_set_position; -#else - Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, - "no ICU lib loaded"); -#endif -} - /* =item C @@ -849,7 +709,6 @@ Parrot_encoding_utf16_init(PARROT_INTERP) get_bytes, codepoints, bytes, - iter_init, find_cclass, NULL, utf16_iter_get, diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index 55e1753..e929397 100644 --- a/src/string/encoding/utf8.c +++ b/src/string/encoding/utf8.c @@ -72,13 +72,6 @@ static STRING * get_codepoints(PARROT_INTERP, __attribute__nonnull__(1) __attribute__nonnull__(2); -static void iter_init(SHIM_INTERP, - ARGIN(const STRING *src), - ARGOUT(String_iter *iter)) - __attribute__nonnull__(2) - __attribute__nonnull__(3) - FUNC_MODIFIES(*iter); - static void set_byte(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, @@ -101,24 +94,11 @@ static UINTVAL utf8_decode(PARROT_INTERP, ARGIN(const utf8_t *ptr)) __attribute__nonnull__(1) __attribute__nonnull__(2); -static UINTVAL utf8_decode_and_advance(PARROT_INTERP, - ARGMOD(String_iter *i)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - PARROT_CANNOT_RETURN_NULL static void * utf8_encode(PARROT_INTERP, ARGIN(void *ptr), UINTVAL c) __attribute__nonnull__(1) __attribute__nonnull__(2); -static void utf8_encode_and_advance(PARROT_INTERP, - ARGMOD(String_iter *i), - UINTVAL c) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - static UINTVAL utf8_iter_get(PARROT_INTERP, ARGIN(const STRING *str), ARGIN(const String_iter *i), @@ -161,12 +141,6 @@ static void utf8_iter_skip(SHIM_INTERP, __attribute__nonnull__(3) FUNC_MODIFIES(*i); -static void utf8_set_position(SHIM_INTERP, - ARGMOD(String_iter *i), - UINTVAL pos) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL static const void * utf8_skip_backward(ARGIN(const void *ptr), UINTVAL n) @@ -194,9 +168,6 @@ static const void * utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n) #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) -#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(src) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) @@ -209,15 +180,9 @@ static const void * utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n) #define ASSERT_ARGS_utf8_decode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(ptr)) -#define ASSERT_ARGS_utf8_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_utf8_encode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(ptr)) -#define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ @@ -236,8 +201,6 @@ static const void * utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n) #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(str) \ , PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(ptr)) #define ASSERT_ARGS_utf8_skip_forward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ @@ -634,115 +597,6 @@ utf8_iter_set_position(SHIM_INTERP, i->bytepos = (const char *)u8ptr - (const char *)str->strstart; } -/* - -=item C - -The UTF-8 implementation of the string iterator's C -function. - -=cut - -*/ - -static UINTVAL -utf8_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i)) -{ - ASSERT_ARGS(utf8_decode_and_advance) - const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos); - UINTVAL c = *u8ptr; - - if (UTF8_IS_START(c)) { - UINTVAL len = UTF8SKIP(u8ptr); - - c &= UTF8_START_MASK(len); - i->bytepos += len; - for (len--; len; len--) { - u8ptr++; - - if (!UTF8_IS_CONTINUATION(*u8ptr)) - Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, - "Malformed UTF-8 string\n"); - - c = UTF8_ACCUMULATE(c, *u8ptr); - } - - if (UNICODE_IS_SURROGATE(c)) - Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, - "Surrogate in UTF-8 string\n"); - } - else if (!UNICODE_IS_INVARIANT(c)) { - Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, - "Malformed UTF-8 string\n"); - } - else { - i->bytepos++; - } - - i->charpos++; - return c; -} - -/* - -=item C - -The UTF-8 implementation of the string iterator's C -function. - -=cut - -*/ - -static void -utf8_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c) -{ - ASSERT_ARGS(utf8_encode_and_advance) - const STRING * const s = i->str; - unsigned char * const pos = (unsigned char *)s->strstart + i->bytepos; - unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c); - - i->bytepos += (new_pos - pos); - /* XXX possible buffer overrun exception? */ - PARROT_ASSERT(i->bytepos <= Buffer_buflen(s)); - i->charpos++; -} - -/* - -=item C - -The UTF-8 implementation of the string iterator's C -function. - -=cut - -*/ - -static void -utf8_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL pos) -{ - ASSERT_ARGS(utf8_set_position) - const utf8_t *u8ptr = (const utf8_t *)i->str->strstart; - - /* start from last known charpos, if we can */ - if (i->charpos <= pos) { - const UINTVAL old_pos = pos; - pos -= i->charpos; - u8ptr += i->bytepos; - i->charpos = old_pos; - } - else - i->charpos = pos; - - while (pos-- > 0) - u8ptr += UTF8SKIP(u8ptr); - - i->bytepos = (const char *)u8ptr - (const char *)i->str->strstart; -} - /* @@ -1026,29 +880,6 @@ bytes(SHIM_INTERP, ARGIN(const STRING *src)) /* -=item C - -Initializes for string C the string iterator C. - -=cut - -*/ - -static void -iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter)) -{ - ASSERT_ARGS(iter_init) - iter->str = src; - iter->bytepos = 0; - iter->charpos = 0; - iter->get_and_advance = utf8_decode_and_advance; - iter->set_and_advance = utf8_encode_and_advance; - iter->set_position = utf8_set_position; -} - -/* - =item C Initializes the UTF-8 encoding. @@ -1074,7 +905,6 @@ Parrot_encoding_utf8_init(PARROT_INTERP) get_bytes, codepoints, bytes, - iter_init, find_cclass, NULL, utf8_iter_get,