diff --git a/include/parrot/encoding.h b/include/parrot/encoding.h index 23023bb..ac22ad9 100644 --- a/include/parrot/encoding.h +++ b/include/parrot/encoding.h @@ -35,8 +35,6 @@ typedef UINTVAL (*encoding_find_cclass_t)(PARROT_INTERP, STRING *s, const INTVAL struct string_iterator_t; /* s. parrot/string.h */ -typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src, - struct string_iterator_t *); typedef UINTVAL (*encoding_iter_get_t)( PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset); typedef void (*encoding_iter_skip_t)( @@ -65,7 +63,6 @@ struct _encoding { encoding_become_encoding_t become_encoding; encoding_codepoints_t codepoints; encoding_bytes_t bytes; - encoding_iter_init_t iter_init; encoding_find_cclass_t find_cclass; encoding_iter_get_t iter_get; encoding_iter_skip_t iter_skip; @@ -238,8 +235,6 @@ void Parrot_str_internal_register_encoding_names(PARROT_INTERP) ((src)->encoding)->codepoints((i), (src)) #define ENCODING_BYTES(i, src) \ ((src)->encoding)->bytes((i), (src)) -#define ENCODING_ITER_INIT(i, src, iter) \ - ((src)->encoding)->iter_init((i), (src), (iter)) #define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \ ((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end)) diff --git a/include/parrot/string.h b/include/parrot/string.h index 7d87f8e..ddd9254 100644 --- a/include/parrot/string.h +++ b/include/parrot/string.h @@ -29,12 +29,8 @@ typedef enum Forward_flag { /* String iterator */ typedef struct string_iterator_t { - const STRING *str; UINTVAL bytepos; UINTVAL charpos; - UINTVAL (*get_and_advance)(PARROT_INTERP, struct string_iterator_t *i); - void (*set_and_advance)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL c); - void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos); } String_iter; #define STRING_ITER_INIT(i, iter) \ diff --git a/src/string/encoding/fixed_8.c b/src/string/encoding/fixed_8.c index 712479d..0906c4c 100644 --- a/src/string/encoding/fixed_8.c +++ b/src/string/encoding/fixed_8.c @@ -45,11 +45,6 @@ static UINTVAL find_cclass(PARROT_INTERP, __attribute__nonnull__(2) __attribute__nonnull__(3); -static UINTVAL fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*iter); - static UINTVAL fixed8_iter_get(PARROT_INTERP, ARGIN(const STRING *str), ARGIN(const String_iter *iter), @@ -92,19 +87,6 @@ static void fixed8_iter_skip(SHIM_INTERP, __attribute__nonnull__(3) FUNC_MODIFIES(*iter); -static void fixed8_set_next(PARROT_INTERP, - ARGMOD(String_iter *iter), - UINTVAL c) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*iter); - -static void fixed8_set_position(SHIM_INTERP, - ARGMOD(String_iter *iter), - UINTVAL pos) - __attribute__nonnull__(2) - FUNC_MODIFIES(*iter); - PARROT_WARN_UNUSED_RESULT static UINTVAL get_byte(PARROT_INTERP, ARGIN(const STRING *source_string), @@ -161,13 +143,6 @@ static STRING * get_codepoints_inplace(PARROT_INTERP, __attribute__nonnull__(5) FUNC_MODIFIES(*dest_string); -static void iter_init(SHIM_INTERP, - ARGIN(const STRING *src), - ARGOUT(String_iter *iter)) - __attribute__nonnull__(2) - __attribute__nonnull__(3) - FUNC_MODIFIES(*iter); - static void set_byte(PARROT_INTERP, ARGIN(const STRING *source_string), UINTVAL offset, @@ -220,9 +195,6 @@ static STRING * to_encoding(PARROT_INTERP, PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(s) \ , PARROT_ASSERT_ARG(typetable)) -#define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ @@ -241,11 +213,6 @@ static STRING * to_encoding(PARROT_INTERP, #define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(str) \ , PARROT_ASSERT_ARG(iter)) -#define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(iter)) -#define ASSERT_ARGS_fixed8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(source_string)) @@ -266,9 +233,6 @@ static STRING * to_encoding(PARROT_INTERP, PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(source_string) \ , PARROT_ASSERT_ARG(dest_string)) -#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(src) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(source_string)) @@ -743,87 +707,6 @@ fixed8_iter_set_position(SHIM_INTERP, /* -=item C - -Moves the string iterator C to the next codepoint. - -=cut - -*/ - -static UINTVAL -fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter)) -{ - ASSERT_ARGS(fixed8_get_next) - const UINTVAL c = get_byte(interp, iter->str, iter->charpos++); - iter->bytepos++; - return c; -} - -/* - -=item C - -With the string iterator C, appends the codepoint C and advances to the -next position in the string. - -=cut - -*/ - -static void -fixed8_set_next(PARROT_INTERP, ARGMOD(String_iter *iter), UINTVAL c) -{ - ASSERT_ARGS(fixed8_set_next) - set_byte(interp, iter->str, iter->charpos++, c); - iter->bytepos++; -} - -/* - -=item C - -Moves the string iterator C to the position C in the string. - -=cut - -*/ - -static void -fixed8_set_position(SHIM_INTERP, ARGMOD(String_iter *iter), UINTVAL pos) -{ - ASSERT_ARGS(fixed8_set_position) - iter->bytepos = iter->charpos = pos; - PARROT_ASSERT(pos <= Buffer_buflen(iter->str)); -} - - -/* - -=item C - -Initializes for string C the string iterator C. - -=cut - -*/ - -static void -iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter)) -{ - ASSERT_ARGS(iter_init) - iter->str = src; - iter->bytepos = iter->charpos = 0; - iter->get_and_advance = fixed8_get_next; - iter->set_and_advance = fixed8_set_next; - iter->set_position = fixed8_set_position; -} - -/* - =item C Initializes the fixed-8 encoding. @@ -856,7 +739,6 @@ Parrot_encoding_fixed_8_init(PARROT_INTERP) become_encoding, codepoints, bytes, - iter_init, find_cclass, fixed8_iter_get, fixed8_iter_skip, diff --git a/src/string/encoding/ucs2.c b/src/string/encoding/ucs2.c index 6a7459c..163dbac 100644 --- a/src/string/encoding/ucs2.c +++ b/src/string/encoding/ucs2.c @@ -106,14 +106,6 @@ static STRING * get_codepoints_inplace(PARROT_INTERP, SHIM(STRING *dest_string)) __attribute__nonnull__(1); -static void iter_init(PARROT_INTERP, - ARGIN(const STRING *src), - ARGOUT(String_iter *iter)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - __attribute__nonnull__(3) - FUNC_MODIFIES(*iter); - static void set_byte(PARROT_INTERP, SHIM(const STRING *src), SHIM(UINTVAL offset), @@ -151,19 +143,6 @@ static STRING * to_encoding(PARROT_INTERP, __attribute__nonnull__(3) FUNC_MODIFIES(*dest); -static UINTVAL ucs2_decode_and_advance(PARROT_INTERP, - ARGMOD(String_iter *i)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - -static void ucs2_encode_and_advance(PARROT_INTERP, - ARGMOD(String_iter *i), - UINTVAL c) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - static UINTVAL ucs2_iter_get(PARROT_INTERP, ARGIN(const STRING *str), ARGIN(const String_iter *i), @@ -206,12 +185,6 @@ static void ucs2_iter_skip(SHIM_INTERP, __attribute__nonnull__(3) FUNC_MODIFIES(*i); -static void ucs2_set_position(SHIM_INTERP, - ARGMOD(String_iter *i), - UINTVAL n) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - #define ASSERT_ARGS_become_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp)) #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ @@ -238,10 +211,6 @@ static void ucs2_set_position(SHIM_INTERP, , PARROT_ASSERT_ARG(src)) #define ASSERT_ARGS_get_codepoints_inplace __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp)) -#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(src) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp)) #define ASSERT_ARGS_set_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ @@ -255,12 +224,6 @@ static void ucs2_set_position(SHIM_INTERP, PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src) \ , PARROT_ASSERT_ARG(dest)) -#define ASSERT_ARGS_ucs2_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_ucs2_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ @@ -279,8 +242,6 @@ static void ucs2_set_position(SHIM_INTERP, #define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(str) \ , PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(i)) /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ /* HEADERIZER END: static */ @@ -780,126 +741,6 @@ ucs2_iter_set_position(SHIM_INTERP, /* -=item C - -Moves the string iterator C to the next UCS-2 codepoint. - -=cut - -*/ - -static UINTVAL -ucs2_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i)) -{ - ASSERT_ARGS(ucs2_decode_and_advance) - -#if PARROT_HAS_ICU - UChar * const s = (UChar*) i->str->strstart; - size_t pos = i->bytepos / sizeof (UChar); - - /* TODO either make sure that we don't go past end or use SAFE - * iter versions - */ - const UChar c = s[pos++]; - i->charpos++; - i->bytepos = pos * sizeof (UChar); - return c; -#else - /* This function must never be called if compiled without ICU. - * See TT #557 - */ - PARROT_ASSERT(0); - return (UINTVAL)0; /* Stop the static analyzers from panicing */ -#endif -} - -/* - -=item C - -With the string iterator C, appends the codepoint C and advances to the -next position in the string. - -=cut - -*/ - -static void -ucs2_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c) -{ - ASSERT_ARGS(ucs2_encode_and_advance) - -#if PARROT_HAS_ICU - UChar * const s = (UChar*) i->str->strstart; - UINTVAL pos = i->bytepos / sizeof (UChar); - s[pos++] = (UChar)c; - i->charpos++; - i->bytepos = pos * sizeof (UChar); -#else - /* This function must never be called if compiled without ICU. - * See TT #557 - */ - PARROT_ASSERT(0); -#endif -} - -/* - -=item C - -Moves the string iterator C to the position C in the string. - -=cut - -*/ - -static void -ucs2_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n) -{ - ASSERT_ARGS(ucs2_set_position) - -#if PARROT_HAS_ICU - i->charpos = n; - i->bytepos = n * sizeof (UChar); -#else - /* This function must never be called if compiled without ICU. - * See TT #557 - */ - PARROT_ASSERT(0); -#endif -} - - -/* - -=item C - -Initializes for string C the string iterator C. - -=cut - -*/ - -static void -iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter)) -{ - ASSERT_ARGS(iter_init) -#if PARROT_HAS_ICU - iter->str = src; - iter->bytepos = 0; - iter->charpos = 0; - iter->get_and_advance = ucs2_decode_and_advance; - iter->set_and_advance = ucs2_encode_and_advance; - iter->set_position = ucs2_set_position; -#else - no_ICU_lib(interp); -#endif -} - -/* - =item C Initializes the UCS-2 encoding. @@ -932,7 +773,6 @@ Parrot_encoding_ucs2_init(PARROT_INTERP) become_encoding, codepoints, bytes, - iter_init, find_cclass, ucs2_iter_get, ucs2_iter_skip, diff --git a/src/string/encoding/utf16.c b/src/string/encoding/utf16.c index f0749d9..615d176 100644 --- a/src/string/encoding/utf16.c +++ b/src/string/encoding/utf16.c @@ -101,14 +101,6 @@ static STRING * get_codepoints_inplace(PARROT_INTERP, __attribute__nonnull__(5) FUNC_MODIFIES(*return_string); -static void iter_init(PARROT_INTERP, - ARGIN(const STRING *src), - ARGOUT(String_iter *iter)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - __attribute__nonnull__(3) - FUNC_MODIFIES(*iter); - static void set_byte(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, @@ -147,20 +139,6 @@ static STRING * to_encoding(PARROT_INTERP, __attribute__nonnull__(1) __attribute__nonnull__(2); -PARROT_WARN_UNUSED_RESULT -static UINTVAL utf16_decode_and_advance(PARROT_INTERP, - ARGMOD(String_iter *i)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - -static void utf16_encode_and_advance(PARROT_INTERP, - ARGMOD(String_iter *i), - UINTVAL c) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - static UINTVAL utf16_iter_get(PARROT_INTERP, ARGIN(const STRING *str), ARGIN(const String_iter *i), @@ -206,13 +184,6 @@ static void utf16_iter_skip(PARROT_INTERP, __attribute__nonnull__(3) FUNC_MODIFIES(*i); -static void utf16_set_position(PARROT_INTERP, - ARGMOD(String_iter *i), - UINTVAL n) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - #define ASSERT_ARGS_become_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp)) #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ @@ -243,10 +214,6 @@ static void utf16_set_position(PARROT_INTERP, PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src) \ , PARROT_ASSERT_ARG(return_string)) -#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(src) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) @@ -262,12 +229,6 @@ static void utf16_set_position(PARROT_INTERP, #define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) -#define ASSERT_ARGS_utf16_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ @@ -288,9 +249,6 @@ static void utf16_set_position(PARROT_INTERP, PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ , PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(i)) /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ /* HEADERIZER END: static */ @@ -960,110 +918,6 @@ utf16_iter_set_position(PARROT_INTERP, #endif } -#if PARROT_HAS_ICU -/* - -=item C - -Moves the string iterator C to the next UTF-16 codepoint. - -=cut - -*/ - -PARROT_WARN_UNUSED_RESULT -static UINTVAL -utf16_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i)) -{ - ASSERT_ARGS(utf16_decode_and_advance) - UChar *s = (UChar*) i->str->strstart; - UINTVAL c, pos; - pos = i->bytepos / sizeof (UChar); - /* TODO either make sure that we don't go past end or use SAFE - * iter versions - */ - U16_NEXT_UNSAFE(s, pos, c); - i->charpos++; - i->bytepos = pos * sizeof (UChar); - return c; -} - -/* - -=item C - -With the string iterator C, appends the codepoint C and advances to the -next position in the string. - -=cut - -*/ - -static void -utf16_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c) -{ - ASSERT_ARGS(utf16_encode_and_advance) - UChar *s = (UChar*) i->str->strstart; - UINTVAL pos; - pos = i->bytepos / sizeof (UChar); - U16_APPEND_UNSAFE(s, pos, c); - i->charpos++; - i->bytepos = pos * sizeof (UChar); -} - -/* - -=item C - -Moves the string iterator C to the position C in the string. - -=cut - -*/ - -static void -utf16_set_position(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL n) -{ - ASSERT_ARGS(utf16_set_position) - UChar * const s = (UChar*) i->str->strstart; - UINTVAL pos; - pos = 0; - U16_FWD_N_UNSAFE(s, pos, n); - i->charpos = n; - i->bytepos = pos * sizeof (UChar); -} - -#endif - -/* - -=item C - -Initializes for string C the string iterator C. - -=cut - -*/ - -static void -iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter)) -{ - ASSERT_ARGS(iter_init) - iter->str = src; - iter->bytepos = iter->charpos = 0; -#if PARROT_HAS_ICU - iter->get_and_advance = utf16_decode_and_advance; - iter->set_and_advance = utf16_encode_and_advance; - iter->set_position = utf16_set_position; -#else - Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, - "no ICU lib loaded"); -#endif -} - /* =item C @@ -1098,7 +952,6 @@ Parrot_encoding_utf16_init(PARROT_INTERP) become_encoding, codepoints, bytes, - iter_init, find_cclass, utf16_iter_get, utf16_iter_skip, diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index fc8c262..9cd77e1 100644 --- a/src/string/encoding/utf8.c +++ b/src/string/encoding/utf8.c @@ -98,13 +98,6 @@ static STRING * get_codepoints_inplace(PARROT_INTERP, FUNC_MODIFIES(*src) FUNC_MODIFIES(*return_string); -static void iter_init(SHIM_INTERP, - ARGIN(const STRING *src), - ARGOUT(String_iter *iter)) - __attribute__nonnull__(2) - __attribute__nonnull__(3) - FUNC_MODIFIES(*iter); - static void set_byte(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, @@ -152,24 +145,11 @@ static UINTVAL utf8_decode(PARROT_INTERP, ARGIN(const utf8_t *ptr)) __attribute__nonnull__(1) __attribute__nonnull__(2); -static UINTVAL utf8_decode_and_advance(PARROT_INTERP, - ARGMOD(String_iter *i)) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - PARROT_CANNOT_RETURN_NULL static void * utf8_encode(PARROT_INTERP, ARGIN(void *ptr), UINTVAL c) __attribute__nonnull__(1) __attribute__nonnull__(2); -static void utf8_encode_and_advance(PARROT_INTERP, - ARGMOD(String_iter *i), - UINTVAL c) - __attribute__nonnull__(1) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - static UINTVAL utf8_iter_get(PARROT_INTERP, ARGIN(const STRING *str), ARGIN(const String_iter *i), @@ -212,12 +192,6 @@ static void utf8_iter_skip(SHIM_INTERP, __attribute__nonnull__(3) FUNC_MODIFIES(*i); -static void utf8_set_position(SHIM_INTERP, - ARGMOD(String_iter *i), - UINTVAL pos) - __attribute__nonnull__(2) - FUNC_MODIFIES(*i); - PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL static const void * utf8_skip_backward(ARGIN(const void *ptr), UINTVAL n) @@ -255,9 +229,6 @@ static const void * utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n) PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src) \ , PARROT_ASSERT_ARG(return_string)) -#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(src) \ - , PARROT_ASSERT_ARG(iter)) #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(src)) @@ -277,15 +248,9 @@ static const void * utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n) #define ASSERT_ARGS_utf8_decode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(ptr)) -#define ASSERT_ARGS_utf8_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_utf8_encode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(ptr)) -#define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(interp) \ - , PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(interp) \ , PARROT_ASSERT_ARG(str) \ @@ -304,8 +269,6 @@ static const void * utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n) #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(str) \ , PARROT_ASSERT_ARG(i)) -#define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ - PARROT_ASSERT_ARG(i)) #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(ptr)) #define ASSERT_ARGS_utf8_skip_forward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ @@ -702,115 +665,6 @@ utf8_iter_set_position(SHIM_INTERP, i->bytepos = (const char *)u8ptr - (const char *)str->strstart; } -/* - -=item C - -The UTF-8 implementation of the string iterator's C -function. - -=cut - -*/ - -static UINTVAL -utf8_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i)) -{ - ASSERT_ARGS(utf8_decode_and_advance) - const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos); - UINTVAL c = *u8ptr; - - if (UTF8_IS_START(c)) { - UINTVAL len = UTF8SKIP(u8ptr); - - c &= UTF8_START_MASK(len); - i->bytepos += len; - for (len--; len; len--) { - u8ptr++; - - if (!UTF8_IS_CONTINUATION(*u8ptr)) - Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, - "Malformed UTF-8 string\n"); - - c = UTF8_ACCUMULATE(c, *u8ptr); - } - - if (UNICODE_IS_SURROGATE(c)) - Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, - "Surrogate in UTF-8 string\n"); - } - else if (!UNICODE_IS_INVARIANT(c)) { - Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, - "Malformed UTF-8 string\n"); - } - else { - i->bytepos++; - } - - i->charpos++; - return c; -} - -/* - -=item C - -The UTF-8 implementation of the string iterator's C -function. - -=cut - -*/ - -static void -utf8_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c) -{ - ASSERT_ARGS(utf8_encode_and_advance) - const STRING * const s = i->str; - unsigned char * const pos = (unsigned char *)s->strstart + i->bytepos; - unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c); - - i->bytepos += (new_pos - pos); - /* XXX possible buffer overrun exception? */ - PARROT_ASSERT(i->bytepos <= Buffer_buflen(s)); - i->charpos++; -} - -/* - -=item C - -The UTF-8 implementation of the string iterator's C -function. - -=cut - -*/ - -static void -utf8_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL pos) -{ - ASSERT_ARGS(utf8_set_position) - const utf8_t *u8ptr = (const utf8_t *)i->str->strstart; - - /* start from last known charpos, if we can */ - if (i->charpos <= pos) { - const UINTVAL old_pos = pos; - pos -= i->charpos; - u8ptr += i->bytepos; - i->charpos = old_pos; - } - else - i->charpos = pos; - - while (pos-- > 0) - u8ptr += UTF8SKIP(u8ptr); - - i->bytepos = (const char *)u8ptr - (const char *)i->str->strstart; -} - /* @@ -1249,29 +1103,6 @@ bytes(SHIM_INTERP, ARGIN(STRING *src)) /* -=item C - -Initializes for string C the string iterator C. - -=cut - -*/ - -static void -iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter)) -{ - ASSERT_ARGS(iter_init) - iter->str = src; - iter->bytepos = 0; - iter->charpos = 0; - iter->get_and_advance = utf8_decode_and_advance; - iter->set_and_advance = utf8_encode_and_advance; - iter->set_position = utf8_set_position; -} - -/* - =item C Initializes the UTF-8 encoding. @@ -1304,7 +1135,6 @@ Parrot_encoding_utf8_init(PARROT_INTERP) become_encoding, codepoints, bytes, - iter_init, find_cclass, utf8_iter_get, utf8_iter_skip,