Ticket #1456: string-iter-remove-old-api-v4.diff
File string-iter-remove-old-api-v4.diff, 27.9 KB (added by nwellnhof, 12 years ago) |
---|
-
include/parrot/encoding.h
diff --git a/include/parrot/encoding.h b/include/parrot/encoding.h index 391f454..09b7105 100644
a b 35 35 36 36 struct string_iterator_t; /* s. parrot/string.h */ 37 37 38 typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src,39 struct string_iterator_t *);40 38 typedef UINTVAL (*encoding_iter_get_t)( 41 39 PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset); 42 40 typedef void (*encoding_iter_skip_t)( … … 65 63 encoding_become_encoding_t become_encoding; 66 64 encoding_codepoints_t codepoints; 67 65 encoding_bytes_t bytes; 68 encoding_iter_init_t iter_init;69 66 encoding_find_cclass_t find_cclass; 70 67 encoding_iter_get_t iter_get; 71 68 encoding_iter_skip_t iter_skip; … … 233 230 ((src)->encoding)->codepoints((i), (src)) 234 231 #define ENCODING_BYTES(i, src) \ 235 232 ((src)->encoding)->bytes((i), (src)) 236 #define ENCODING_ITER_INIT(i, src, iter) \237 ((src)->encoding)->iter_init((i), (src), (iter))238 233 #define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \ 239 234 ((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end)) 240 235 -
include/parrot/string.h
diff --git a/include/parrot/string.h b/include/parrot/string.h index 7d87f8e..ddd9254 100644
a b 29 29 30 30 /* String iterator */ 31 31 typedef struct string_iterator_t { 32 const STRING *str;33 32 UINTVAL bytepos; 34 33 UINTVAL charpos; 35 UINTVAL (*get_and_advance)(PARROT_INTERP, struct string_iterator_t *i);36 void (*set_and_advance)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL c);37 void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos);38 34 } String_iter; 39 35 40 36 #define STRING_ITER_INIT(i, iter) \ -
src/string/encoding/fixed_8.c
diff --git a/src/string/encoding/fixed_8.c b/src/string/encoding/fixed_8.c index 712479d..0906c4c 100644
a b 45 45 __attribute__nonnull__(2) 46 46 __attribute__nonnull__(3); 47 47 48 static UINTVAL fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter))49 __attribute__nonnull__(1)50 __attribute__nonnull__(2)51 FUNC_MODIFIES(*iter);52 53 48 static UINTVAL fixed8_iter_get(PARROT_INTERP, 54 49 ARGIN(const STRING *str), 55 50 ARGIN(const String_iter *iter), … … 92 87 __attribute__nonnull__(3) 93 88 FUNC_MODIFIES(*iter); 94 89 95 static void fixed8_set_next(PARROT_INTERP,96 ARGMOD(String_iter *iter),97 UINTVAL c)98 __attribute__nonnull__(1)99 __attribute__nonnull__(2)100 FUNC_MODIFIES(*iter);101 102 static void fixed8_set_position(SHIM_INTERP,103 ARGMOD(String_iter *iter),104 UINTVAL pos)105 __attribute__nonnull__(2)106 FUNC_MODIFIES(*iter);107 108 90 PARROT_WARN_UNUSED_RESULT 109 91 static UINTVAL get_byte(PARROT_INTERP, 110 92 ARGIN(const STRING *source_string), … … 161 143 __attribute__nonnull__(5) 162 144 FUNC_MODIFIES(*dest_string); 163 145 164 static void iter_init(SHIM_INTERP,165 ARGIN(const STRING *src),166 ARGOUT(String_iter *iter))167 __attribute__nonnull__(2)168 __attribute__nonnull__(3)169 FUNC_MODIFIES(*iter);170 171 146 static void set_byte(PARROT_INTERP, 172 147 ARGIN(const STRING *source_string), 173 148 UINTVAL offset, … … 220 195 PARROT_ASSERT_ARG(interp) \ 221 196 , PARROT_ASSERT_ARG(s) \ 222 197 , PARROT_ASSERT_ARG(typetable)) 223 #define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\224 PARROT_ASSERT_ARG(interp) \225 , PARROT_ASSERT_ARG(iter))226 198 #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 227 199 PARROT_ASSERT_ARG(interp) \ 228 200 , PARROT_ASSERT_ARG(str) \ … … 241 213 #define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 242 214 PARROT_ASSERT_ARG(str) \ 243 215 , PARROT_ASSERT_ARG(iter)) 244 #define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\245 PARROT_ASSERT_ARG(interp) \246 , PARROT_ASSERT_ARG(iter))247 #define ASSERT_ARGS_fixed8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\248 PARROT_ASSERT_ARG(iter))249 216 #define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 250 217 PARROT_ASSERT_ARG(interp) \ 251 218 , PARROT_ASSERT_ARG(source_string)) … … 266 233 PARROT_ASSERT_ARG(interp) \ 267 234 , PARROT_ASSERT_ARG(source_string) \ 268 235 , PARROT_ASSERT_ARG(dest_string)) 269 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\270 PARROT_ASSERT_ARG(src) \271 , PARROT_ASSERT_ARG(iter))272 236 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 273 237 PARROT_ASSERT_ARG(interp) \ 274 238 , PARROT_ASSERT_ARG(source_string)) … … 743 707 744 708 /* 745 709 746 =item C<static UINTVAL fixed8_get_next(PARROT_INTERP, String_iter *iter)>747 748 Moves the string iterator C<i> to the next codepoint.749 750 =cut751 752 */753 754 static UINTVAL755 fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter))756 {757 ASSERT_ARGS(fixed8_get_next)758 const UINTVAL c = get_byte(interp, iter->str, iter->charpos++);759 iter->bytepos++;760 return c;761 }762 763 /*764 765 =item C<static void fixed8_set_next(PARROT_INTERP, String_iter *iter, UINTVAL766 c)>767 768 With the string iterator C<i>, appends the codepoint C<c> and advances to the769 next position in the string.770 771 =cut772 773 */774 775 static void776 fixed8_set_next(PARROT_INTERP, ARGMOD(String_iter *iter), UINTVAL c)777 {778 ASSERT_ARGS(fixed8_set_next)779 set_byte(interp, iter->str, iter->charpos++, c);780 iter->bytepos++;781 }782 783 /*784 785 =item C<static void fixed8_set_position(PARROT_INTERP, String_iter *iter,786 UINTVAL pos)>787 788 Moves the string iterator C<i> to the position C<n> in the string.789 790 =cut791 792 */793 794 static void795 fixed8_set_position(SHIM_INTERP, ARGMOD(String_iter *iter), UINTVAL pos)796 {797 ASSERT_ARGS(fixed8_set_position)798 iter->bytepos = iter->charpos = pos;799 PARROT_ASSERT(pos <= Buffer_buflen(iter->str));800 }801 802 803 /*804 805 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter806 *iter)>807 808 Initializes for string C<src> the string iterator C<iter>.809 810 =cut811 812 */813 814 static void815 iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))816 {817 ASSERT_ARGS(iter_init)818 iter->str = src;819 iter->bytepos = iter->charpos = 0;820 iter->get_and_advance = fixed8_get_next;821 iter->set_and_advance = fixed8_set_next;822 iter->set_position = fixed8_set_position;823 }824 825 /*826 827 710 =item C<ENCODING * Parrot_encoding_fixed_8_init(PARROT_INTERP)> 828 711 829 712 Initializes the fixed-8 encoding. … … 856 739 become_encoding, 857 740 codepoints, 858 741 bytes, 859 iter_init,860 742 find_cclass, 861 743 fixed8_iter_get, 862 744 fixed8_iter_skip, -
src/string/encoding/ucs2.c
diff --git a/src/string/encoding/ucs2.c b/src/string/encoding/ucs2.c index 6a7459c..163dbac 100644
a b 106 106 SHIM(STRING *dest_string)) 107 107 __attribute__nonnull__(1); 108 108 109 static void iter_init(PARROT_INTERP,110 ARGIN(const STRING *src),111 ARGOUT(String_iter *iter))112 __attribute__nonnull__(1)113 __attribute__nonnull__(2)114 __attribute__nonnull__(3)115 FUNC_MODIFIES(*iter);116 117 109 static void set_byte(PARROT_INTERP, 118 110 SHIM(const STRING *src), 119 111 SHIM(UINTVAL offset), … … 151 143 __attribute__nonnull__(3) 152 144 FUNC_MODIFIES(*dest); 153 145 154 static UINTVAL ucs2_decode_and_advance(PARROT_INTERP,155 ARGMOD(String_iter *i))156 __attribute__nonnull__(1)157 __attribute__nonnull__(2)158 FUNC_MODIFIES(*i);159 160 static void ucs2_encode_and_advance(PARROT_INTERP,161 ARGMOD(String_iter *i),162 UINTVAL c)163 __attribute__nonnull__(1)164 __attribute__nonnull__(2)165 FUNC_MODIFIES(*i);166 167 146 static UINTVAL ucs2_iter_get(PARROT_INTERP, 168 147 ARGIN(const STRING *str), 169 148 ARGIN(const String_iter *i), … … 206 185 __attribute__nonnull__(3) 207 186 FUNC_MODIFIES(*i); 208 187 209 static void ucs2_set_position(SHIM_INTERP,210 ARGMOD(String_iter *i),211 UINTVAL n)212 __attribute__nonnull__(2)213 FUNC_MODIFIES(*i);214 215 188 #define ASSERT_ARGS_become_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 216 189 PARROT_ASSERT_ARG(interp)) 217 190 #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 238 211 , PARROT_ASSERT_ARG(src)) 239 212 #define ASSERT_ARGS_get_codepoints_inplace __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 240 213 PARROT_ASSERT_ARG(interp)) 241 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\242 PARROT_ASSERT_ARG(interp) \243 , PARROT_ASSERT_ARG(src) \244 , PARROT_ASSERT_ARG(iter))245 214 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 246 215 PARROT_ASSERT_ARG(interp)) 247 216 #define ASSERT_ARGS_set_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 255 224 PARROT_ASSERT_ARG(interp) \ 256 225 , PARROT_ASSERT_ARG(src) \ 257 226 , PARROT_ASSERT_ARG(dest)) 258 #define ASSERT_ARGS_ucs2_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\259 PARROT_ASSERT_ARG(interp) \260 , PARROT_ASSERT_ARG(i))261 #define ASSERT_ARGS_ucs2_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\262 PARROT_ASSERT_ARG(interp) \263 , PARROT_ASSERT_ARG(i))264 227 #define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 265 228 PARROT_ASSERT_ARG(interp) \ 266 229 , PARROT_ASSERT_ARG(str) \ … … 279 242 #define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 280 243 PARROT_ASSERT_ARG(str) \ 281 244 , PARROT_ASSERT_ARG(i)) 282 #define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\283 PARROT_ASSERT_ARG(i))284 245 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ 285 246 /* HEADERIZER END: static */ 286 247 … … 780 741 781 742 /* 782 743 783 =item C<static UINTVAL ucs2_decode_and_advance(PARROT_INTERP, String_iter *i)>784 785 Moves the string iterator C<i> to the next UCS-2 codepoint.786 787 =cut788 789 */790 791 static UINTVAL792 ucs2_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))793 {794 ASSERT_ARGS(ucs2_decode_and_advance)795 796 #if PARROT_HAS_ICU797 UChar * const s = (UChar*) i->str->strstart;798 size_t pos = i->bytepos / sizeof (UChar);799 800 /* TODO either make sure that we don't go past end or use SAFE801 * iter versions802 */803 const UChar c = s[pos++];804 i->charpos++;805 i->bytepos = pos * sizeof (UChar);806 return c;807 #else808 /* This function must never be called if compiled without ICU.809 * See TT #557810 */811 PARROT_ASSERT(0);812 return (UINTVAL)0; /* Stop the static analyzers from panicing */813 #endif814 }815 816 /*817 818 =item C<static void ucs2_encode_and_advance(PARROT_INTERP, String_iter *i,819 UINTVAL c)>820 821 With the string iterator C<i>, appends the codepoint C<c> and advances to the822 next position in the string.823 824 =cut825 826 */827 828 static void829 ucs2_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)830 {831 ASSERT_ARGS(ucs2_encode_and_advance)832 833 #if PARROT_HAS_ICU834 UChar * const s = (UChar*) i->str->strstart;835 UINTVAL pos = i->bytepos / sizeof (UChar);836 s[pos++] = (UChar)c;837 i->charpos++;838 i->bytepos = pos * sizeof (UChar);839 #else840 /* This function must never be called if compiled without ICU.841 * See TT #557842 */843 PARROT_ASSERT(0);844 #endif845 }846 847 /*848 849 =item C<static void ucs2_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>850 851 Moves the string iterator C<i> to the position C<n> in the string.852 853 =cut854 855 */856 857 static void858 ucs2_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n)859 {860 ASSERT_ARGS(ucs2_set_position)861 862 #if PARROT_HAS_ICU863 i->charpos = n;864 i->bytepos = n * sizeof (UChar);865 #else866 /* This function must never be called if compiled without ICU.867 * See TT #557868 */869 PARROT_ASSERT(0);870 #endif871 }872 873 874 /*875 876 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter877 *iter)>878 879 Initializes for string C<src> the string iterator C<iter>.880 881 =cut882 883 */884 885 static void886 iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))887 {888 ASSERT_ARGS(iter_init)889 #if PARROT_HAS_ICU890 iter->str = src;891 iter->bytepos = 0;892 iter->charpos = 0;893 iter->get_and_advance = ucs2_decode_and_advance;894 iter->set_and_advance = ucs2_encode_and_advance;895 iter->set_position = ucs2_set_position;896 #else897 no_ICU_lib(interp);898 #endif899 }900 901 /*902 903 744 =item C<ENCODING * Parrot_encoding_ucs2_init(PARROT_INTERP)> 904 745 905 746 Initializes the UCS-2 encoding. … … 932 773 become_encoding, 933 774 codepoints, 934 775 bytes, 935 iter_init,936 776 find_cclass, 937 777 ucs2_iter_get, 938 778 ucs2_iter_skip, -
src/string/encoding/utf16.c
diff --git a/src/string/encoding/utf16.c b/src/string/encoding/utf16.c index 4d810a7..59bc6c9 100644
a b 101 101 __attribute__nonnull__(5) 102 102 FUNC_MODIFIES(*return_string); 103 103 104 static void iter_init(PARROT_INTERP,105 ARGIN(const STRING *src),106 ARGOUT(String_iter *iter))107 __attribute__nonnull__(1)108 __attribute__nonnull__(2)109 __attribute__nonnull__(3)110 FUNC_MODIFIES(*iter);111 112 104 static void set_byte(PARROT_INTERP, 113 105 ARGIN(const STRING *src), 114 106 UINTVAL offset, … … 147 139 __attribute__nonnull__(1) 148 140 __attribute__nonnull__(2); 149 141 150 PARROT_WARN_UNUSED_RESULT151 static UINTVAL utf16_decode_and_advance(PARROT_INTERP,152 ARGMOD(String_iter *i))153 __attribute__nonnull__(1)154 __attribute__nonnull__(2)155 FUNC_MODIFIES(*i);156 157 static void utf16_encode_and_advance(PARROT_INTERP,158 ARGMOD(String_iter *i),159 UINTVAL c)160 __attribute__nonnull__(1)161 __attribute__nonnull__(2)162 FUNC_MODIFIES(*i);163 164 142 static UINTVAL utf16_iter_get(PARROT_INTERP, 165 143 ARGIN(const STRING *str), 166 144 ARGIN(const String_iter *i), … … 206 184 __attribute__nonnull__(3) 207 185 FUNC_MODIFIES(*i); 208 186 209 static void utf16_set_position(PARROT_INTERP,210 ARGMOD(String_iter *i),211 UINTVAL n)212 __attribute__nonnull__(1)213 __attribute__nonnull__(2)214 FUNC_MODIFIES(*i);215 216 187 #define ASSERT_ARGS_become_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 217 188 PARROT_ASSERT_ARG(interp)) 218 189 #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 243 214 PARROT_ASSERT_ARG(interp) \ 244 215 , PARROT_ASSERT_ARG(src) \ 245 216 , PARROT_ASSERT_ARG(return_string)) 246 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\247 PARROT_ASSERT_ARG(interp) \248 , PARROT_ASSERT_ARG(src) \249 , PARROT_ASSERT_ARG(iter))250 217 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 251 218 PARROT_ASSERT_ARG(interp) \ 252 219 , PARROT_ASSERT_ARG(src)) … … 262 229 #define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 263 230 PARROT_ASSERT_ARG(interp) \ 264 231 , PARROT_ASSERT_ARG(src)) 265 #define ASSERT_ARGS_utf16_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\266 PARROT_ASSERT_ARG(interp) \267 , PARROT_ASSERT_ARG(i))268 #define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\269 PARROT_ASSERT_ARG(interp) \270 , PARROT_ASSERT_ARG(i))271 232 #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 272 233 PARROT_ASSERT_ARG(interp) \ 273 234 , PARROT_ASSERT_ARG(str) \ … … 288 249 PARROT_ASSERT_ARG(interp) \ 289 250 , PARROT_ASSERT_ARG(str) \ 290 251 , PARROT_ASSERT_ARG(i)) 291 #define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\292 PARROT_ASSERT_ARG(interp) \293 , PARROT_ASSERT_ARG(i))294 252 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ 295 253 /* HEADERIZER END: static */ 296 254 … … 960 918 #endif 961 919 } 962 920 963 #if PARROT_HAS_ICU964 /*965 966 =item C<static UINTVAL utf16_decode_and_advance(PARROT_INTERP, String_iter *i)>967 968 Moves the string iterator C<i> to the next UTF-16 codepoint.969 970 =cut971 972 */973 974 PARROT_WARN_UNUSED_RESULT975 static UINTVAL976 utf16_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))977 {978 ASSERT_ARGS(utf16_decode_and_advance)979 UChar *s = (UChar*) i->str->strstart;980 UINTVAL c, pos;981 pos = i->bytepos / sizeof (UChar);982 /* TODO either make sure that we don't go past end or use SAFE983 * iter versions984 */985 U16_NEXT_UNSAFE(s, pos, c);986 i->charpos++;987 i->bytepos = pos * sizeof (UChar);988 return c;989 }990 991 /*992 993 =item C<static void utf16_encode_and_advance(PARROT_INTERP, String_iter *i,994 UINTVAL c)>995 996 With the string iterator C<i>, appends the codepoint C<c> and advances to the997 next position in the string.998 999 =cut1000 1001 */1002 1003 static void1004 utf16_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)1005 {1006 ASSERT_ARGS(utf16_encode_and_advance)1007 UChar *s = (UChar*) i->str->strstart;1008 UINTVAL pos;1009 pos = i->bytepos / sizeof (UChar);1010 U16_APPEND_UNSAFE(s, pos, c);1011 i->charpos++;1012 i->bytepos = pos * sizeof (UChar);1013 }1014 1015 /*1016 1017 =item C<static void utf16_set_position(PARROT_INTERP, String_iter *i, UINTVAL1018 n)>1019 1020 Moves the string iterator C<i> to the position C<n> in the string.1021 1022 =cut1023 1024 */1025 1026 static void1027 utf16_set_position(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL n)1028 {1029 ASSERT_ARGS(utf16_set_position)1030 UChar * const s = (UChar*) i->str->strstart;1031 UINTVAL pos;1032 pos = 0;1033 U16_FWD_N_UNSAFE(s, pos, n);1034 i->charpos = n;1035 i->bytepos = pos * sizeof (UChar);1036 }1037 1038 #endif1039 1040 /*1041 1042 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter1043 *iter)>1044 1045 Initializes for string C<src> the string iterator C<iter>.1046 1047 =cut1048 1049 */1050 1051 static void1052 iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))1053 {1054 ASSERT_ARGS(iter_init)1055 iter->str = src;1056 iter->bytepos = iter->charpos = 0;1057 #if PARROT_HAS_ICU1058 iter->get_and_advance = utf16_decode_and_advance;1059 iter->set_and_advance = utf16_encode_and_advance;1060 iter->set_position = utf16_set_position;1061 #else1062 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,1063 "no ICU lib loaded");1064 #endif1065 }1066 1067 921 /* 1068 922 1069 923 =item C<ENCODING * Parrot_encoding_utf16_init(PARROT_INTERP)> … … 1098 952 become_encoding, 1099 953 codepoints, 1100 954 bytes, 1101 iter_init,1102 955 find_cclass, 1103 956 utf16_iter_get, 1104 957 utf16_iter_skip, -
src/string/encoding/utf8.c
diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index 4706596..b50fbd5 100644
a b 98 98 FUNC_MODIFIES(*src) 99 99 FUNC_MODIFIES(*return_string); 100 100 101 static void iter_init(SHIM_INTERP,102 ARGIN(const STRING *src),103 ARGOUT(String_iter *iter))104 __attribute__nonnull__(2)105 __attribute__nonnull__(3)106 FUNC_MODIFIES(*iter);107 108 101 static void set_byte(PARROT_INTERP, 109 102 ARGIN(const STRING *src), 110 103 UINTVAL offset, … … 152 145 __attribute__nonnull__(1) 153 146 __attribute__nonnull__(2); 154 147 155 static UINTVAL utf8_decode_and_advance(PARROT_INTERP,156 ARGMOD(String_iter *i))157 __attribute__nonnull__(1)158 __attribute__nonnull__(2)159 FUNC_MODIFIES(*i);160 161 148 PARROT_CANNOT_RETURN_NULL 162 149 static void * utf8_encode(PARROT_INTERP, ARGIN(void *ptr), UINTVAL c) 163 150 __attribute__nonnull__(1) 164 151 __attribute__nonnull__(2); 165 152 166 static void utf8_encode_and_advance(PARROT_INTERP,167 ARGMOD(String_iter *i),168 UINTVAL c)169 __attribute__nonnull__(1)170 __attribute__nonnull__(2)171 FUNC_MODIFIES(*i);172 173 153 static UINTVAL utf8_iter_get(PARROT_INTERP, 174 154 ARGIN(const STRING *str), 175 155 ARGIN(const String_iter *i), … … 212 192 __attribute__nonnull__(3) 213 193 FUNC_MODIFIES(*i); 214 194 215 static void utf8_set_position(SHIM_INTERP,216 ARGMOD(String_iter *i),217 UINTVAL pos)218 __attribute__nonnull__(2)219 FUNC_MODIFIES(*i);220 221 195 PARROT_WARN_UNUSED_RESULT 222 196 PARROT_CANNOT_RETURN_NULL 223 197 static const void * utf8_skip_backward(ARGIN(const void *ptr), UINTVAL n) … … 255 229 PARROT_ASSERT_ARG(interp) \ 256 230 , PARROT_ASSERT_ARG(src) \ 257 231 , PARROT_ASSERT_ARG(return_string)) 258 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\259 PARROT_ASSERT_ARG(src) \260 , PARROT_ASSERT_ARG(iter))261 232 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 262 233 PARROT_ASSERT_ARG(interp) \ 263 234 , PARROT_ASSERT_ARG(src)) … … 277 248 #define ASSERT_ARGS_utf8_decode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 278 249 PARROT_ASSERT_ARG(interp) \ 279 250 , PARROT_ASSERT_ARG(ptr)) 280 #define ASSERT_ARGS_utf8_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\281 PARROT_ASSERT_ARG(interp) \282 , PARROT_ASSERT_ARG(i))283 251 #define ASSERT_ARGS_utf8_encode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 284 252 PARROT_ASSERT_ARG(interp) \ 285 253 , PARROT_ASSERT_ARG(ptr)) 286 #define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\287 PARROT_ASSERT_ARG(interp) \288 , PARROT_ASSERT_ARG(i))289 254 #define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 290 255 PARROT_ASSERT_ARG(interp) \ 291 256 , PARROT_ASSERT_ARG(str) \ … … 304 269 #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 305 270 PARROT_ASSERT_ARG(str) \ 306 271 , PARROT_ASSERT_ARG(i)) 307 #define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\308 PARROT_ASSERT_ARG(i))309 272 #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 310 273 PARROT_ASSERT_ARG(ptr)) 311 274 #define ASSERT_ARGS_utf8_skip_forward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 702 665 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 703 666 } 704 667 705 /*706 707 =item C<static UINTVAL utf8_decode_and_advance(PARROT_INTERP, String_iter *i)>708 709 The UTF-8 implementation of the string iterator's C<get_and_advance>710 function.711 712 =cut713 714 */715 716 static UINTVAL717 utf8_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))718 {719 ASSERT_ARGS(utf8_decode_and_advance)720 const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos);721 UINTVAL c = *u8ptr;722 723 if (UTF8_IS_START(c)) {724 UINTVAL len = UTF8SKIP(u8ptr);725 726 c &= UTF8_START_MASK(len);727 i->bytepos += len;728 for (len--; len; len--) {729 u8ptr++;730 731 if (!UTF8_IS_CONTINUATION(*u8ptr))732 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,733 "Malformed UTF-8 string\n");734 735 c = UTF8_ACCUMULATE(c, *u8ptr);736 }737 738 if (UNICODE_IS_SURROGATE(c))739 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,740 "Surrogate in UTF-8 string\n");741 }742 else if (!UNICODE_IS_INVARIANT(c)) {743 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,744 "Malformed UTF-8 string\n");745 }746 else {747 i->bytepos++;748 }749 750 i->charpos++;751 return c;752 }753 754 /*755 756 =item C<static void utf8_encode_and_advance(PARROT_INTERP, String_iter *i,757 UINTVAL c)>758 759 The UTF-8 implementation of the string iterator's C<set_and_advance>760 function.761 762 =cut763 764 */765 766 static void767 utf8_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)768 {769 ASSERT_ARGS(utf8_encode_and_advance)770 const STRING * const s = i->str;771 unsigned char * const pos = (unsigned char *)s->strstart + i->bytepos;772 unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c);773 774 i->bytepos += (new_pos - pos);775 /* XXX possible buffer overrun exception? */776 PARROT_ASSERT(i->bytepos <= Buffer_buflen(s));777 i->charpos++;778 }779 780 /*781 782 =item C<static void utf8_set_position(PARROT_INTERP, String_iter *i, UINTVAL783 pos)>784 785 The UTF-8 implementation of the string iterator's C<set_position>786 function.787 788 =cut789 790 */791 792 static void793 utf8_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL pos)794 {795 ASSERT_ARGS(utf8_set_position)796 const utf8_t *u8ptr = (const utf8_t *)i->str->strstart;797 798 /* start from last known charpos, if we can */799 if (i->charpos <= pos) {800 const UINTVAL old_pos = pos;801 pos -= i->charpos;802 u8ptr += i->bytepos;803 i->charpos = old_pos;804 }805 else806 i->charpos = pos;807 808 while (pos-- > 0)809 u8ptr += UTF8SKIP(u8ptr);810 811 i->bytepos = (const char *)u8ptr - (const char *)i->str->strstart;812 }813 814 668 815 669 /* 816 670 … … 1249 1103 1250 1104 /* 1251 1105 1252 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter1253 *iter)>1254 1255 Initializes for string C<src> the string iterator C<iter>.1256 1257 =cut1258 1259 */1260 1261 static void1262 iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))1263 {1264 ASSERT_ARGS(iter_init)1265 iter->str = src;1266 iter->bytepos = 0;1267 iter->charpos = 0;1268 iter->get_and_advance = utf8_decode_and_advance;1269 iter->set_and_advance = utf8_encode_and_advance;1270 iter->set_position = utf8_set_position;1271 }1272 1273 /*1274 1275 1106 =item C<ENCODING * Parrot_encoding_utf8_init(PARROT_INTERP)> 1276 1107 1277 1108 Initializes the UTF-8 encoding. … … 1304 1135 become_encoding, 1305 1136 codepoints, 1306 1137 bytes, 1307 iter_init,1308 1138 find_cclass, 1309 1139 utf8_iter_get, 1310 1140 utf8_iter_skip,