Ticket #1456: string-iter-v6.diff
File string-iter-v6.diff, 69.9 KB (added by nwellnhof, 12 years ago) |
---|
-
include/parrot/encoding.h
diff --git a/include/parrot/encoding.h b/include/parrot/encoding.h index d2f5833..75055b8 100644
a b 32 32 33 33 typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src, 34 34 struct string_iterator_t *); 35 typedef UINTVAL (*encoding_iter_get_t)( 36 PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset); 37 typedef void (*encoding_iter_skip_t)( 38 PARROT_INTERP, const STRING *str, String_iter *i, INTVAL skip); 39 typedef UINTVAL (*encoding_iter_get_and_advance_t)( 40 PARROT_INTERP, const STRING *str, String_iter *i); 41 typedef void (*encoding_iter_set_and_advance_t)( 42 PARROT_INTERP, STRING *str, String_iter *i, UINTVAL c); 43 typedef void (*encoding_iter_set_position_t)( 44 PARROT_INTERP, const STRING *str, String_iter *i, UINTVAL pos); 35 45 36 46 struct _encoding { 37 47 ARGIN(const char *name); … … 47 57 encoding_iter_init_t iter_init; 48 58 encoding_find_cclass_t find_cclass; 49 59 encoding_hash_t hash; 60 encoding_iter_get_t iter_get; 61 encoding_iter_skip_t iter_skip; 62 encoding_iter_get_and_advance_t iter_get_and_advance; 63 encoding_iter_set_and_advance_t iter_set_and_advance; 64 encoding_iter_set_position_t iter_set_position; 50 65 }; 51 66 52 67 typedef struct _encoding ENCODING; -
include/parrot/string.h
diff --git a/include/parrot/string.h b/include/parrot/string.h index fb6a3be..7d87f8e 100644
a b 37 37 void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos); 38 38 } String_iter; 39 39 40 #define STRING_ITER_INIT(i, iter) \ 41 (iter)->charpos = (iter)->bytepos = 0 42 #define STRING_ITER_GET(i, str, iter, offset) \ 43 ((str)->encoding)->iter_get((i), (str), (iter), (offset)) 44 #define STRING_ITER_SKIP(i, str, iter, skip) \ 45 ((str)->encoding)->iter_skip((i), (str), (iter), (skip)) 46 #define STRING_ITER_GET_AND_ADVANCE(i, str, iter) \ 47 ((str)->encoding)->iter_get_and_advance((i), (str), (iter)) 48 #define STRING_ITER_SET_AND_ADVANCE(i, str, iter, c) \ 49 ((str)->encoding)->iter_set_and_advance((i), (str), (iter), (c)) 50 #define STRING_ITER_SET_POSITION(i, str, iter, pos) \ 51 ((str)->encoding)->iter_set_position((i), (str), (iter), (pos)) 52 40 53 #define STREQ(x, y) (strcmp((x), (y))==0) 41 54 #define STRNEQ(x, y) (strcmp((x), (y))!=0) 42 55 -
include/parrot/string_funcs.h
diff --git a/include/parrot/string_funcs.h b/include/parrot/string_funcs.h index a328cb5..657cfb4 100644
a b 221 221 INTVAL Parrot_str_is_null(SHIM_INTERP, ARGIN_NULLOK(const STRING *s)); 222 222 223 223 PARROT_EXPORT 224 INTVAL Parrot_str_iter_index(PARROT_INTERP, 225 ARGIN(const STRING *src), 226 ARGMOD(String_iter *start), 227 ARGMOD(String_iter *end), 228 ARGIN(const STRING *search)) 229 __attribute__nonnull__(1) 230 __attribute__nonnull__(2) 231 __attribute__nonnull__(3) 232 __attribute__nonnull__(4) 233 __attribute__nonnull__(5) 234 FUNC_MODIFIES(*start) 235 FUNC_MODIFIES(*end); 236 237 PARROT_EXPORT 238 PARROT_CANNOT_RETURN_NULL 239 PARROT_WARN_UNUSED_RESULT 240 STRING * Parrot_str_iter_substr(PARROT_INTERP, 241 ARGIN(const STRING *str), 242 ARGIN(const String_iter *l), 243 ARGIN_NULLOK(const String_iter *r)) 244 __attribute__nonnull__(1) 245 __attribute__nonnull__(2) 246 __attribute__nonnull__(3); 247 248 PARROT_EXPORT 224 249 PARROT_WARN_UNUSED_RESULT 225 250 PARROT_CANNOT_RETURN_NULL 226 251 STRING* Parrot_str_join(PARROT_INTERP, … … 537 562 PARROT_ASSERT_ARG(interp) \ 538 563 , PARROT_ASSERT_ARG(s)) 539 564 #define ASSERT_ARGS_Parrot_str_is_null __attribute__unused__ int _ASSERT_ARGS_CHECK = (0) 565 #define ASSERT_ARGS_Parrot_str_iter_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 566 PARROT_ASSERT_ARG(interp) \ 567 , PARROT_ASSERT_ARG(src) \ 568 , PARROT_ASSERT_ARG(start) \ 569 , PARROT_ASSERT_ARG(end) \ 570 , PARROT_ASSERT_ARG(search)) 571 #define ASSERT_ARGS_Parrot_str_iter_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 572 PARROT_ASSERT_ARG(interp) \ 573 , PARROT_ASSERT_ARG(str) \ 574 , PARROT_ASSERT_ARG(l)) 540 575 #define ASSERT_ARGS_Parrot_str_join __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 541 576 PARROT_ASSERT_ARG(interp) \ 542 577 , PARROT_ASSERT_ARG(ar)) -
src/io/utf8.c
diff --git a/src/io/utf8.c b/src/io/utf8.c index c875861..56ad0fe 100644
a b 57 57 s->encoding = Parrot_utf8_encoding_ptr; 58 58 59 59 /* count chars, verify utf8 */ 60 Parrot_utf8_encoding_ptr->iter_init(interp, s, &iter);60 STRING_ITER_INIT(interp, &iter); 61 61 62 62 while (iter.bytepos < s->bufused) { 63 63 if (iter.bytepos + 4 > s->bufused) { … … 93 93 } 94 94 } 95 95 ok: 96 iter.get_and_advance(interp, &iter);96 Parrot_utf8_encoding_ptr->iter_get_and_advance(interp, *buf, &iter); 97 97 } 98 98 s->strlen = iter.charpos; 99 99 return len; -
src/pmc/stringiterator.pmc
diff --git a/src/pmc/stringiterator.pmc b/src/pmc/stringiterator.pmc index e003998..e2e86e8 100644
a b 27 27 /* HEADERIZER END: static */ 28 28 29 29 pmclass StringIterator auto_attrs extends Iterator { 30 ATTR PMC *string; /* String to iterate over */ 31 ATTR INTVAL pos; /* Current position of iterator for forward iterator */ 32 /* Previous position of iterator for reverse iterator */ 33 ATTR INTVAL length; /* Length of C<string> */ 34 ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */ 30 ATTR PMC *string; /* String PMC to iterate over */ 31 ATTR STRING *str_val; /* The actual string */ 32 ATTR String_iter iter; /* String iterator */ 33 ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */ 35 34 36 35 /* 37 36 … … 43 42 44 43 */ 45 44 VTABLE void init_pmc(PMC *string) { 45 Parrot_StringIterator_attributes * const attrs = 46 PARROT_STRINGITERATOR(SELF); 47 STRING * const str_val = VTABLE_get_string(INTERP, string); 48 46 49 SET_ATTR_string(INTERP, SELF, string); 50 SET_ATTR_str_val(INTERP, SELF, str_val); 51 STRING_ITER_INIT(INTERP, &attrs->iter); 47 52 48 53 /* by default, iterate from start */ 49 54 SELF.set_integer_native(ITERATE_FROM_START); … … 62 67 63 68 VTABLE void mark() { 64 69 PMC *string; 70 STRING *str_val; 71 65 72 GET_ATTR_string(INTERP, SELF, string); 66 73 Parrot_gc_mark_PMC_alive(INTERP, string); 74 GET_ATTR_str_val(INTERP, SELF, str_val); 75 Parrot_gc_mark_STRING_alive(INTERP, str_val); 67 76 } 68 77 69 78 /* … … 81 90 Parrot_StringIterator_attributes * const clone_attrs = 82 91 PARROT_STRINGITERATOR(clone); 83 92 84 clone_attrs->pos = attrs->pos; 93 /* TODO: this isn't safe if the string PMC has changed */ 94 clone_attrs->iter = attrs->iter; 85 95 clone_attrs->reverse = attrs->reverse; 86 96 return clone; 87 97 } … … 114 124 Parrot_StringIterator_attributes * const attrs = 115 125 PARROT_STRINGITERATOR(SELF); 116 126 if (attrs->reverse) 117 return attrs-> pos;127 return attrs->iter.charpos; 118 128 else 119 return attrs-> length - attrs->pos;129 return attrs->str_val->strlen - attrs->iter.charpos; 120 130 } 121 131 122 132 VTABLE INTVAL get_integer() { … … 141 151 PARROT_STRINGITERATOR(SELF); 142 152 if (value == ITERATE_FROM_START) { 143 153 attrs->reverse = 0; 144 attrs->pos = 0; 145 attrs->length = VTABLE_elements(INTERP, attrs->string); 154 STRING_ITER_SET_POSITION(INTERP, attrs->str_val, &attrs->iter, 0); 146 155 } 147 156 else if (value == ITERATE_FROM_END) { 148 157 attrs->reverse = 1; 149 attrs->pos = attrs->length 150 = VTABLE_elements(INTERP, attrs->string); 158 STRING_ITER_SET_POSITION(INTERP, attrs->str_val, &attrs->iter, attrs->str_val->strlen); 151 159 } 152 160 else 153 161 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_OPERATION, … … 183 191 Parrot_StringIterator_attributes * const attrs = 184 192 PARROT_STRINGITERATOR(SELF); 185 193 PMC *ret; 194 STRING *str; 195 const String_iter old_iter = attrs->iter; 186 196 187 if (attrs-> pos >= attrs->length)197 if (attrs->iter.charpos >= attrs->str_val->strlen) 188 198 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 189 199 "StopIteration"); 190 200 191 201 ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String)); 192 VTABLE_set_string_native(INTERP, ret, 193 VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++)); 202 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, 1); 203 str = Parrot_str_iter_substr(INTERP, attrs->str_val, &old_iter, &attrs->iter); 204 VTABLE_set_string_native(INTERP, ret, str); 194 205 return ret; 195 206 } 196 207 … … 206 217 VTABLE STRING *shift_string() { 207 218 Parrot_StringIterator_attributes * const attrs = 208 219 PARROT_STRINGITERATOR(SELF); 220 const String_iter old_iter = attrs->iter; 209 221 210 if (attrs-> pos >= attrs->length)222 if (attrs->iter.charpos >= attrs->str_val->strlen) 211 223 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 212 224 "StopIteration"); 213 225 214 return VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++); 226 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, 1); 227 return Parrot_str_iter_substr(INTERP, attrs->str_val, &old_iter, &attrs->iter); 215 228 } 216 229 217 230 /* … … 227 240 Parrot_StringIterator_attributes * const attrs = 228 241 PARROT_STRINGITERATOR(SELF); 229 242 230 if (attrs-> pos >= attrs->length)243 if (attrs->iter.charpos >= attrs->str_val->strlen) 231 244 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 232 245 "StopIteration"); 233 246 234 return VTABLE_get_integer_keyed_int(INTERP, attrs->string, attrs->pos++);247 return STRING_ITER_GET_AND_ADVANCE(INTERP, attrs->str_val, &attrs->iter); 235 248 } 236 249 237 250 /* … … 247 260 Parrot_StringIterator_attributes * const attrs = 248 261 PARROT_STRINGITERATOR(SELF); 249 262 PMC *ret; 263 STRING * str; 264 const String_iter old_iter = attrs->iter; 250 265 251 if ( !STATICSELF.get_bool())266 if (attrs->iter.charpos <= 0) 252 267 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 253 268 "StopIteration"); 254 269 255 270 ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String)); 256 VTABLE_set_string_native(INTERP, ret, 257 VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos)); 271 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, -1); 272 str = Parrot_str_iter_substr(INTERP, attrs->str_val, &attrs->iter, &old_iter); 273 VTABLE_set_string_native(INTERP, ret, str); 258 274 return ret; 259 275 } 260 276 … … 270 286 VTABLE STRING *pop_string() { 271 287 Parrot_StringIterator_attributes * const attrs = 272 288 PARROT_STRINGITERATOR(SELF); 289 const String_iter old_iter = attrs->iter; 273 290 274 if ( !STATICSELF.get_bool())291 if (attrs->iter.charpos <= 0) 275 292 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 276 293 "StopIteration"); 277 294 278 return VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos); 295 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, -1); 296 return Parrot_str_iter_substr(INTERP, attrs->str_val, &attrs->iter, &old_iter); 279 297 } 280 298 281 299 /* … … 291 309 Parrot_StringIterator_attributes * const attrs = 292 310 PARROT_STRINGITERATOR(SELF); 293 311 294 if ( !STATICSELF.get_bool())312 if (attrs->iter.charpos <= 0) 295 313 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 296 314 "StopIteration"); 297 315 298 return VTABLE_get_integer_keyed_int(INTERP, attrs->string, --attrs->pos); 316 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, -1); 317 return STRING_ITER_GET(INTERP, attrs->str_val, &attrs->iter, 0); 299 318 } 300 319 301 320 /* … … 309 328 */ 310 329 311 330 VTABLE INTVAL get_integer_keyed_int(INTVAL idx) { 312 return VTABLE_get_integer_keyed_int(INTERP, STATICSELF.get_pmc(), 313 PARROT_STRINGITERATOR(SELF)->pos + idx); 331 Parrot_StringIterator_attributes * const attrs = 332 PARROT_STRINGITERATOR(SELF); 333 const UINTVAL offset = attrs->iter.charpos + idx; 334 335 if (offset >= attrs->str_val->strlen) 336 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 337 "StopIteration"); 338 339 return STRING_ITER_GET(INTERP, attrs->str_val, &attrs->iter, idx); 314 340 } 315 341 316 342 /* … … 324 350 */ 325 351 326 352 VTABLE STRING *get_string_keyed_int(INTVAL idx) { 327 return VTABLE_get_string_keyed_int(INTERP, STATICSELF.get_pmc(), 328 PARROT_STRINGITERATOR(SELF)->pos + idx); 353 Parrot_StringIterator_attributes * const attrs = 354 PARROT_STRINGITERATOR(SELF); 355 const UINTVAL offset = attrs->iter.charpos + idx; 356 String_iter iter, next_iter; 357 358 if (offset >= attrs->str_val->strlen) 359 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 360 "StopIteration"); 361 362 iter = attrs->iter; 363 if (idx != 0) 364 STRING_ITER_SKIP(INTERP, attrs->str_val, &iter, idx); 365 next_iter = iter; 366 STRING_ITER_SKIP(INTERP, attrs->str_val, &next_iter, 1); 367 368 return Parrot_str_iter_substr(INTERP, attrs->str_val, &iter, &next_iter); 329 369 } 330 370 } 331 371 -
src/string/api.c
diff --git a/src/string/api.c b/src/string/api.c index 8c1dc2e..c9564ee 100644
a b 1092 1092 return CHARSET_GET_CODEPOINTS(interp, src, true_offset, true_length); 1093 1093 } 1094 1094 1095 /* 1096 1097 =item C<STRING * Parrot_str_iter_substr(PARROT_INTERP, const STRING *str, const 1098 String_iter *l, const String_iter *r)> 1099 1100 Returns the substring between iterators C<l> and C<r>. 1101 1102 =cut 1103 1104 */ 1105 1106 PARROT_EXPORT 1107 PARROT_CANNOT_RETURN_NULL 1108 PARROT_WARN_UNUSED_RESULT 1109 STRING * 1110 Parrot_str_iter_substr(PARROT_INTERP, 1111 ARGIN(const STRING *str), 1112 ARGIN(const String_iter *l), ARGIN_NULLOK(const String_iter *r)) 1113 { 1114 ASSERT_ARGS(Parrot_str_iter_substr) 1115 STRING *dest = Parrot_str_copy(interp, str); 1116 1117 dest->strstart = (char *)dest->strstart + l->bytepos; 1118 1119 if (r == NULL) { 1120 dest->bufused = str->bufused - l->bytepos; 1121 dest->strlen = str->strlen - l->charpos; 1122 } 1123 else { 1124 dest->bufused = r->bytepos - l->bytepos; 1125 dest->strlen = r->charpos - l->charpos; 1126 } 1127 1128 dest->hashval = 0; 1129 1130 return dest; 1131 } 1132 1133 /* 1134 1135 =item C<INTVAL Parrot_str_iter_index(PARROT_INTERP, const STRING *src, 1136 String_iter *start, String_iter *end, const STRING *search)> 1137 1138 Find the next occurence of STRING C<search> in STRING C<src> starting at 1139 String_iter C<start>. If C<search> is found C<start> is modified to mark the 1140 beginning of C<search> and String_iter C<end> is set to the character after 1141 C<search> in C<src>. Returns the character position where C<search> was found 1142 or -1 if it wasn't found. 1143 1144 =cut 1145 1146 */ 1147 1148 PARROT_EXPORT 1149 INTVAL 1150 Parrot_str_iter_index(PARROT_INTERP, 1151 ARGIN(const STRING *src), 1152 ARGMOD(String_iter *start), ARGMOD(String_iter *end), 1153 ARGIN(const STRING *search)) 1154 { 1155 ASSERT_ARGS(Parrot_str_iter_index) 1156 String_iter search_iter; 1157 const UINTVAL len = search->strlen; 1158 1159 *end = *start; 1160 1161 if (len == 0) { 1162 return start->charpos; 1163 } 1164 1165 STRING_ITER_INIT(interp, &search_iter); 1166 1167 if (len == 1) { 1168 const UINTVAL c0 = STRING_ITER_GET(interp, search, &search_iter, 0); 1169 1170 while (start->charpos < src->strlen) { 1171 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, end); 1172 if (c == c0) 1173 return start->charpos; 1174 *start = *end; 1175 } 1176 } 1177 else { 1178 const UINTVAL c0 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter); 1179 String_iter search_start = search_iter; 1180 1181 while (1) { 1182 String_iter src_start_iter; 1183 UINTVAL c1, c2; 1184 1185 do { 1186 *start = *end; 1187 if (start->charpos + len > src->strlen) 1188 return -1; 1189 c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, end); 1190 } while (c1 != c0); 1191 1192 do { 1193 if (search_iter.charpos >= len) 1194 return start->charpos; 1195 c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, end); 1196 c2 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter); 1197 } while (c1 == c2); 1198 1199 STRING_ITER_SKIP(interp, src, start, 1); 1200 *end = *start; 1201 search_iter = search_start; 1202 } 1203 } 1204 1205 return -1; 1206 } 1207 1095 1208 1096 1209 /* 1097 1210 … … 1169 1282 } 1170 1283 1171 1284 /* get byte position of the part that will be replaced */ 1172 ENCODING_ITER_INIT(interp, src, &iter);1285 STRING_ITER_INIT(interp, &iter); 1173 1286 1174 iter.set_position(interp, &iter, true_offset);1287 STRING_ITER_SET_POSITION(interp, src, &iter, true_offset); 1175 1288 start_byte = iter.bytepos; 1176 1289 1177 iter.set_position(interp, &iter, true_offset + true_length);1290 STRING_ITER_SET_POSITION(interp, src, &iter, true_offset + true_length); 1178 1291 end_byte = iter.bytepos; 1179 1292 1180 1293 /* not possible.... */ … … 1240 1353 ASSERT_ARGS(Parrot_str_chopn) 1241 1354 1242 1355 STRING * const chopped = Parrot_str_copy(interp, s); 1243 UINTVAL new_length , uchar_size;1356 UINTVAL new_length; 1244 1357 1245 1358 if (n < 0) { 1246 1359 new_length = -n; … … 1261 1374 return chopped; 1262 1375 } 1263 1376 1264 uchar_size = chopped->bufused / chopped->strlen;1265 chopped->strlen = new_length;1266 1267 1377 if (chopped->encoding == Parrot_fixed_8_encoding_ptr) { 1268 1378 chopped->bufused = new_length; 1269 1379 } 1270 1380 else if (chopped->encoding == Parrot_ucs2_encoding_ptr) { 1381 const UINTVAL uchar_size = chopped->bufused / chopped->strlen; 1271 1382 chopped->bufused = new_length * uchar_size; 1272 1383 } 1273 1384 else { 1274 1385 String_iter iter; 1275 1386 1276 ENCODING_ITER_INIT(interp, s, &iter);1277 iter.set_position(interp, &iter, new_length);1387 STRING_ITER_INIT(interp, &iter); 1388 STRING_ITER_SET_POSITION(interp, s, &iter, new_length); 1278 1389 chopped->bufused = iter.bytepos; 1279 1390 } 1280 1391 1392 chopped->strlen = new_length; 1393 1281 1394 return chopped; 1282 1395 } 1283 1396 … … 1848 1961 int sign = 1; 1849 1962 UINTVAL i = 0; 1850 1963 String_iter iter; 1851 UINTVAL offs;1852 1964 number_parse_state state = parse_start; 1853 1965 1854 ENCODING_ITER_INIT(interp, s, &iter);1966 STRING_ITER_INIT(interp, &iter); 1855 1967 1856 for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {1857 const UINTVAL c = iter.get_and_advance(interp, &iter);1968 while (state != parse_end && iter.charpos < s->strlen) { 1969 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 1858 1970 /* Check for overflow */ 1859 1971 if (c > 255) 1860 1972 break; … … 1944 2056 int d_length = 0; 1945 2057 int check_nan = 0; /* Check for NaN and Inf after main loop */ 1946 2058 String_iter iter; 1947 UINTVAL offs;1948 2059 number_parse_state state = parse_start; 1949 2060 1950 2061 if (STRING_IS_NULL(s)) 1951 2062 return 0.0; 1952 2063 1953 ENCODING_ITER_INIT(interp, s, &iter);2064 STRING_ITER_INIT(interp, &iter); 1954 2065 1955 2066 /* Handcrafter FSM to read float value */ 1956 for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {1957 const UINTVAL c = iter.get_and_advance(interp, &iter);2067 while (state != parse_end && iter.charpos < s->strlen) { 2068 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 1958 2069 /* Check for overflow */ 1959 2070 if (c > 255) 1960 2071 break; … … 2320 2431 { 2321 2432 ASSERT_ARGS(Parrot_str_to_hashval) 2322 2433 String_iter iter; 2323 UINTVAL offs;2324 2434 size_t hashval = interp->hash_seed; 2325 2435 2326 2436 if (STRING_IS_NULL(s) || !s->strlen) … … 2332 2442 /* ZZZZZ workaround for something not setting up encodings right */ 2333 2443 ASSERT_STRING_SANITY(s); 2334 2444 2335 ENCODING_ITER_INIT(interp, s, &iter);2445 STRING_ITER_INIT(interp, &iter); 2336 2446 2337 for (offs = 0; offs < s->strlen; ++offs) {2338 const UINTVAL c = iter.get_and_advance(interp, &iter);2447 while (iter.charpos < s->strlen) { 2448 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 2339 2449 hashval += hashval << 5; 2340 2450 hashval += c; 2341 2451 } … … 2414 2524 Parrot_fixed_8_encoding_ptr, Parrot_ascii_charset_ptr, 0); 2415 2525 2416 2526 /* more work TODO */ 2417 ENCODING_ITER_INIT(interp, src, &iter);2527 STRING_ITER_INIT(interp, &iter); 2418 2528 dp = (unsigned char *)result->strstart; 2419 2529 2420 2530 for (i = 0; len > 0; --len) { 2421 UINTVAL c = iter.get_and_advance(interp, &iter);2531 UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 2422 2532 if (c < 0x7f) { 2423 2533 /* process ASCII chars */ 2424 2534 if (i >= charlen - 2) { … … 2572 2682 encoding = result->encoding; 2573 2683 } 2574 2684 2575 encoding->iter_init(interp, result, &iter);2685 STRING_ITER_INIT(interp, &iter); 2576 2686 2577 2687 for (offs = d = 0; offs < clength; ++offs) { 2578 2688 r = (Parrot_UInt4)((unsigned char *)result->strstart)[offs]; … … 2595 2705 } 2596 2706 2597 2707 PARROT_ASSERT(d < offs); 2598 iter.set_and_advance(interp, &iter, r);2708 encoding->iter_set_and_advance(interp, result, &iter, r); 2599 2709 ++d; 2600 2710 } 2601 2711 … … 3092 3202 ARGIN_NULLOK(STRING *delim), ARGIN_NULLOK(STRING *str)) 3093 3203 { 3094 3204 ASSERT_ARGS(Parrot_str_split) 3095 PMC *res; 3096 INTVAL slen, dlen, ps, pe; 3205 PMC *res; 3206 STRING *tstr; 3207 UINTVAL slen, dlen; 3208 String_iter iter; 3097 3209 3098 3210 if (STRING_IS_NULL(delim) || STRING_IS_NULL(str)) 3099 3211 return PMCNULL; … … 3105 3217 if (!slen) 3106 3218 return res; 3107 3219 3220 STRING_ITER_INIT(interp, &iter); 3108 3221 dlen = Parrot_str_byte_length(interp, delim); 3109 3222 3110 3223 if (dlen == 0) { 3111 int i;3112 3224 VTABLE_set_integer_native(interp, res, slen); 3113 3225 3114 for (i = 0; i < slen; ++i) { 3115 STRING * const p = Parrot_str_substr(interp, str, i, 1); 3116 VTABLE_set_string_keyed_int(interp, res, i, p); 3117 } 3118 3119 return res; 3120 } 3226 do { 3227 const String_iter old_iter = iter; 3121 3228 3122 pe = Parrot_str_find_index(interp, str, delim, 0); 3229 STRING_ITER_SKIP(interp, str, &iter, 1); 3230 tstr = Parrot_str_iter_substr(interp, str, &old_iter, &iter); 3231 VTABLE_set_string_keyed_int(interp, res, old_iter.charpos, tstr); 3232 } while (iter.charpos < slen); 3123 3233 3124 if (pe < 0) {3125 VTABLE_push_string(interp, res, str);3126 3234 return res; 3127 3235 } 3128 3236 3129 ps = 0; 3130 3131 while (ps <= slen) { 3132 const int pl = pe - ps; 3133 STRING * const tstr = Parrot_str_substr(interp, str, ps, pl); 3134 3135 VTABLE_push_string(interp, res, tstr); 3136 ps = pe + Parrot_str_byte_length(interp, delim); 3237 do { 3238 String_iter start, end; 3239 INTVAL pos; 3137 3240 3138 if (ps > slen) 3241 start = iter; 3242 if (Parrot_str_iter_index(interp, str, &start, &end, delim) < 0) 3139 3243 break; 3140 3244 3141 pe = Parrot_str_find_index(interp, str, delim, ps); 3245 tstr = Parrot_str_iter_substr(interp, str, &iter, &start); 3246 VTABLE_push_string(interp, res, tstr); 3247 iter = end; 3248 } while (iter.charpos < slen); 3142 3249 3143 if (pe < 0) 3144 pe = slen; 3145 } 3250 tstr = Parrot_str_iter_substr(interp, str, &iter, NULL); 3251 VTABLE_push_string(interp, res, tstr); 3146 3252 3147 3253 return res; 3148 3254 } -
src/string/charset/ascii.c
diff --git a/src/string/charset/ascii.c b/src/string/charset/ascii.c index c5aae87..95bdee6 100644
a b 206 206 { 207 207 ASSERT_ARGS(to_ascii) 208 208 String_iter iter; 209 UINTVAL offs;210 209 unsigned char *p; 211 210 const UINTVAL len = src->strlen; 212 211 … … 214 213 STRING * dest = Parrot_str_clone(interp, src); 215 214 216 215 p = (unsigned char *)dest->strstart; 217 ENCODING_ITER_INIT(interp, src, &iter);218 for (offs = 0; offs < len; ++offs) {219 const UINTVAL c = iter.get_and_advance(interp, &iter);216 STRING_ITER_INIT(interp, &iter); 217 while (iter.charpos < len) { 218 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 220 219 if (c >= 128) 221 220 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, 222 221 "can't convert unicode string to ascii"); … … 496 495 return ret_val < 0 ? -1 : 1; 497 496 } 498 497 else { 499 UINTVAL offs; 500 ENCODING_ITER_INIT(interp, rhs, &iter); 501 for (offs = 0; offs < min_len; ++offs) { 502 const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, offs); 503 const UINTVAL cr = iter.get_and_advance(interp, &iter); 498 STRING_ITER_INIT(interp, &iter); 499 while (iter.charpos < min_len) { 500 const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, iter.charpos); 501 const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &iter); 504 502 if (cl != cr) 505 503 return cl < cr ? -1 : 1; 506 504 } … … 534 532 UINTVAL offs) 535 533 { 536 534 ASSERT_ARGS(mixed_cs_index) 537 String_iter src_iter, search_iter; 538 UINTVAL len, next_pos; 539 INTVAL found_at; 540 541 ENCODING_ITER_INIT(interp, src, &src_iter); 542 src_iter.set_position(interp, &src_iter, offs); 543 ENCODING_ITER_INIT(interp, search, &search_iter); 544 len = search->strlen; 545 546 found_at = -1; 547 next_pos = offs; 548 549 for (; len && offs < src->strlen ;) { 550 const UINTVAL c1 = src_iter.get_and_advance(interp, &src_iter); 551 const UINTVAL c2 = search_iter.get_and_advance(interp, &search_iter); 552 553 if (c1 == c2) { 554 --len; 555 if (found_at == -1) 556 found_at = offs; 557 ++offs; 558 } 559 else { 560 len = search->strlen; 561 ++offs; 562 ++next_pos; 563 if (offs != next_pos) { 564 src_iter.set_position(interp, &src_iter, next_pos); 565 offs = next_pos; 566 } 567 568 found_at = -1; 569 search_iter.set_position(interp, &search_iter, 0); 570 } 571 } 572 if (len == 0) 573 return found_at; 574 return -1; 535 String_iter start, end; 536 537 STRING_ITER_INIT(interp, &start); 538 STRING_ITER_SET_POSITION(interp, src, &start, offs); 539 540 return Parrot_str_iter_index(interp, src, &start, &end, search); 575 541 } 576 542 577 543 /* … … 650 616 validate(PARROT_INTERP, ARGIN(STRING *src)) 651 617 { 652 618 ASSERT_ARGS(validate) 653 UINTVAL offset;619 const UINTVAL len = Parrot_str_byte_length(interp, src); 654 620 String_iter iter; 655 621 656 ENCODING_ITER_INIT(interp, src, &iter);657 for (offset = 0; offset < Parrot_str_byte_length(interp, src); ++offset) {658 const UINTVAL codepoint = iter.get_and_advance(interp, &iter);622 STRING_ITER_INIT(interp, &iter); 623 while (iter.charpos < len) { 624 const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 659 625 if (codepoint >= 0x80) 660 626 return 0; 661 627 } -
src/string/charset/iso-8859-1.c
diff --git a/src/string/charset/iso-8859-1.c b/src/string/charset/iso-8859-1.c index c1d1d8e..f3d0564 100644
a b 181 181 to_iso_8859_1(PARROT_INTERP, ARGIN(STRING *src)) 182 182 { 183 183 ASSERT_ARGS(to_iso_8859_1) 184 UINTVAL offs,src_len;184 UINTVAL src_len; 185 185 String_iter iter; 186 186 /* iso-8859-1 is never bigger then source */ 187 187 STRING * dest = Parrot_str_clone(interp, src); 188 188 189 ENCODING_ITER_INIT(interp, src, &iter);189 STRING_ITER_INIT(interp, &iter); 190 190 src_len = src->strlen; 191 191 dest->bufused = src_len; 192 dest->charset = Parrot_iso_8859_1_charset_ptr; 193 dest->encoding = Parrot_fixed_8_encoding_ptr; 194 for (offs = 0; offs < src_len; ++offs) { 195 const UINTVAL c = iter.get_and_advance(interp, &iter); 192 while (iter.charpos < src_len) { 193 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 196 194 if (c >= 0x100) 197 195 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, 198 196 "lossy conversion to iso-8559-1"); 199 197 200 ENCODING_SET_BYTE(interp, dest, offs, c);198 Parrot_fixed_8_encoding_ptr->set_byte(interp, dest, iter.charpos - 1, c); 201 199 } 200 dest->charset = Parrot_iso_8859_1_charset_ptr; 201 dest->encoding = Parrot_fixed_8_encoding_ptr; 202 202 return dest; 203 203 } 204 204 … … 224 224 dest->charset = Parrot_unicode_charset_ptr; 225 225 dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest); 226 226 Parrot_gc_reallocate_string_storage(interp, dest, src->strlen); 227 ENCODING_ITER_INIT(interp, dest, &iter);228 for (offs = 0; offs < src->strlen; ++offs) {229 const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);227 STRING_ITER_INIT(interp, &iter); 228 while (iter.charpos < src->strlen) { 229 const UINTVAL c = ENCODING_GET_BYTE(interp, src, iter.charpos); 230 230 231 231 if (iter.bytepos >= Buffer_buflen(dest) - 4) { 232 UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);232 UINTVAL need = (UINTVAL)((src->strlen - iter.charpos) * 1.5); 233 233 if (need < 16) 234 234 need = 16; 235 235 Parrot_gc_reallocate_string_storage(interp, dest, 236 236 Buffer_buflen(dest) + need); 237 237 } 238 iter.set_and_advance(interp, &iter, c);238 STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, c); 239 239 } 240 240 dest->bufused = iter.bytepos; 241 241 dest->strlen = iter.charpos; -
src/string/charset/unicode.c
diff --git a/src/string/charset/unicode.c b/src/string/charset/unicode.c index 0ce8e9c..310576c 100644
a b 657 657 { 658 658 ASSERT_ARGS(compare) 659 659 String_iter l_iter, r_iter; 660 UINTVAL offs, cl, cr,min_len, l_len, r_len;660 UINTVAL min_len, l_len, r_len; 661 661 662 662 /* TODO make optimized equal - strings are equal length then already */ 663 ENCODING_ITER_INIT(interp, lhs, &l_iter);664 ENCODING_ITER_INIT(interp, rhs, &r_iter);663 STRING_ITER_INIT(interp, &l_iter); 664 STRING_ITER_INIT(interp, &r_iter); 665 665 666 666 l_len = lhs->strlen; 667 667 r_len = rhs->strlen; 668 668 669 669 min_len = l_len > r_len ? r_len : l_len; 670 670 671 for (offs = 0; offs < min_len; ++offs) {672 cl = l_iter.get_and_advance(interp, &l_iter);673 cr = r_iter.get_and_advance(interp, &r_iter);671 while (l_iter.charpos < min_len) { 672 UINTVAL cl = STRING_ITER_GET_AND_ADVANCE(interp, lhs, &l_iter); 673 UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &r_iter); 674 674 675 675 if (cl != cr) 676 676 return cl < cr ? -1 : 1; … … 722 722 validate(PARROT_INTERP, ARGIN(STRING *src)) 723 723 { 724 724 ASSERT_ARGS(validate) 725 UINTVAL offset;725 UINTVAL len = Parrot_str_byte_length(interp, src); 726 726 String_iter iter; 727 727 728 ENCODING_ITER_INIT(interp, src, &iter);729 for (offset = 0; offset < Parrot_str_byte_length(interp, src); ++offset) {730 const UINTVAL codepoint = iter.get_and_advance(interp, &iter);728 STRING_ITER_INIT(interp, &iter); 729 while (iter.charpos < len) { 730 const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 731 731 /* Check for Unicode non-characters */ 732 732 if (codepoint >= 0xfdd0 733 733 && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe) … … 878 878 ASSERT_ARGS(find_cclass) 879 879 String_iter iter; 880 880 UINTVAL codepoint; 881 UINTVAL pos = offset;882 881 UINTVAL end = offset + count; 883 882 884 ENCODING_ITER_INIT(interp, source_string, &iter); 885 886 iter.set_position(interp, &iter, pos); 883 STRING_ITER_INIT(interp, &iter); 884 STRING_ITER_SET_POSITION(interp, source_string, &iter, offset); 887 885 888 886 end = source_string->strlen < end ? source_string->strlen : end; 889 887 890 for (; pos < end; ++pos) {891 codepoint = iter.get_and_advance(interp, &iter);888 while (iter.charpos < end) { 889 codepoint = STRING_ITER_GET_AND_ADVANCE(interp, source_string, &iter); 892 890 if (codepoint >= 256) { 893 891 if (u_iscclass(interp, codepoint, flags)) 894 return pos;892 return iter.charpos - 1; 895 893 } 896 894 else { 897 895 if (Parrot_iso_8859_1_typetable[codepoint] & flags) 898 return pos;896 return iter.charpos - 1; 899 897 } 900 898 } 901 899 … … 919 917 ASSERT_ARGS(find_not_cclass) 920 918 String_iter iter; 921 919 UINTVAL codepoint; 922 UINTVAL pos = offset;923 920 UINTVAL end = offset + count; 924 921 int bit; 925 922 926 if ( pos> source_string->strlen) {923 if (offset > source_string->strlen) { 927 924 /* XXX: Throw in this case? */ 928 925 return offset + count; 929 926 } 930 927 931 ENCODING_ITER_INIT(interp, source_string, &iter);928 STRING_ITER_INIT(interp, &iter); 932 929 933 if ( pos)934 iter.set_position(interp, &iter, pos);930 if (offset) 931 STRING_ITER_SET_POSITION(interp, source_string, &iter, offset); 935 932 936 933 end = source_string->strlen < end ? source_string->strlen : end; 937 934 938 935 if (flags == enum_cclass_any) 939 936 return end; 940 937 941 for (; pos < end; ++pos) {942 codepoint = iter.get_and_advance(interp, &iter);938 while (iter.charpos < end) { 939 codepoint = STRING_ITER_GET_AND_ADVANCE(interp, source_string, &iter); 943 940 if (codepoint >= 256) { 944 941 for (bit = enum_cclass_uppercase; 945 942 bit <= enum_cclass_word ; bit <<= 1) { 946 943 if ((bit & flags) && !u_iscclass(interp, codepoint, bit)) 947 return pos;944 return iter.charpos - 1; 948 945 } 949 946 } 950 947 else { 951 948 if (!(Parrot_iso_8859_1_typetable[codepoint] & flags)) 952 return pos;949 return iter.charpos - 1; 953 950 } 954 951 } 955 952 … … 977 974 978 975 dest->strlen = 1; 979 976 980 ENCODING_ITER_INIT(interp, dest, &iter);981 iter.set_and_advance(interp, &iter, codepoint);977 STRING_ITER_INIT(interp, &iter); 978 STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, codepoint); 982 979 dest->bufused = iter.bytepos; 983 980 984 981 return dest; … … 1001 998 { 1002 999 ASSERT_ARGS(compute_hash) 1003 1000 String_iter iter; 1004 UINTVAL offs;1005 1001 size_t hashval = seed; 1006 1002 1007 ENCODING_ITER_INIT(interp, src, &iter);1003 STRING_ITER_INIT(interp, &iter); 1008 1004 1009 for (offs = 0; offs < src->strlen; ++offs) {1010 const UINTVAL c = iter.get_and_advance(interp, &iter);1005 while (iter.charpos < src->strlen) { 1006 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 1011 1007 hashval += hashval << 5; 1012 1008 hashval += c; 1013 1009 } -
src/string/encoding/fixed_8.c
diff --git a/src/string/encoding/fixed_8.c b/src/string/encoding/fixed_8.c index 60b6128..13448f4 100644
a b 46 46 __attribute__nonnull__(2) 47 47 FUNC_MODIFIES(*iter); 48 48 49 static UINTVAL fixed8_iter_get(PARROT_INTERP, 50 ARGIN(const STRING *str), 51 ARGIN(const String_iter *iter), 52 INTVAL offset) 53 __attribute__nonnull__(1) 54 __attribute__nonnull__(2) 55 __attribute__nonnull__(3); 56 57 static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, 58 ARGIN(const STRING *str), 59 ARGMOD(String_iter *iter)) 60 __attribute__nonnull__(1) 61 __attribute__nonnull__(2) 62 __attribute__nonnull__(3) 63 FUNC_MODIFIES(*iter); 64 65 static void fixed8_iter_set_and_advance(PARROT_INTERP, 66 ARGMOD(STRING *str), 67 ARGMOD(String_iter *iter), 68 UINTVAL c) 69 __attribute__nonnull__(1) 70 __attribute__nonnull__(2) 71 __attribute__nonnull__(3) 72 FUNC_MODIFIES(*str) 73 FUNC_MODIFIES(*iter); 74 75 static void fixed8_iter_set_position(SHIM_INTERP, 76 ARGIN(const STRING *str), 77 ARGMOD(String_iter *iter), 78 UINTVAL pos) 79 __attribute__nonnull__(2) 80 __attribute__nonnull__(3) 81 FUNC_MODIFIES(*iter); 82 83 static void fixed8_iter_skip(SHIM_INTERP, 84 ARGIN(const STRING *str), 85 ARGMOD(String_iter *iter), 86 INTVAL skip) 87 __attribute__nonnull__(2) 88 __attribute__nonnull__(3) 89 FUNC_MODIFIES(*iter); 90 49 91 static void fixed8_set_next(PARROT_INTERP, 50 92 ARGMOD(String_iter *iter), 51 93 UINTVAL c) … … 125 167 #define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 126 168 PARROT_ASSERT_ARG(interp) \ 127 169 , PARROT_ASSERT_ARG(iter)) 170 #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 171 PARROT_ASSERT_ARG(interp) \ 172 , PARROT_ASSERT_ARG(str) \ 173 , PARROT_ASSERT_ARG(iter)) 174 #define ASSERT_ARGS_fixed8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 175 PARROT_ASSERT_ARG(interp) \ 176 , PARROT_ASSERT_ARG(str) \ 177 , PARROT_ASSERT_ARG(iter)) 178 #define ASSERT_ARGS_fixed8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 179 PARROT_ASSERT_ARG(interp) \ 180 , PARROT_ASSERT_ARG(str) \ 181 , PARROT_ASSERT_ARG(iter)) 182 #define ASSERT_ARGS_fixed8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 183 PARROT_ASSERT_ARG(str) \ 184 , PARROT_ASSERT_ARG(iter)) 185 #define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 186 PARROT_ASSERT_ARG(str) \ 187 , PARROT_ASSERT_ARG(iter)) 128 188 #define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 129 189 PARROT_ASSERT_ARG(interp) \ 130 190 , PARROT_ASSERT_ARG(iter)) … … 378 438 379 439 /* 380 440 441 =item C<static UINTVAL fixed8_iter_get(PARROT_INTERP, const STRING *str, const 442 String_iter *iter, INTVAL offset)> 443 444 Get the character at C<iter> plus C<offset>. 445 446 =cut 447 448 */ 449 450 static UINTVAL 451 fixed8_iter_get(PARROT_INTERP, 452 ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset) 453 { 454 ASSERT_ARGS(fixed8_iter_get) 455 return get_byte(interp, str, iter->charpos + offset); 456 } 457 458 /* 459 460 =item C<static void fixed8_iter_skip(PARROT_INTERP, const STRING *str, 461 String_iter *iter, INTVAL skip)> 462 463 Moves the string iterator C<i> by C<skip> characters. 464 465 =cut 466 467 */ 468 469 static void 470 fixed8_iter_skip(SHIM_INTERP, 471 ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip) 472 { 473 ASSERT_ARGS(fixed8_iter_skip) 474 iter->bytepos += skip; 475 iter->charpos += skip; 476 PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str)); 477 } 478 479 /* 480 481 =item C<static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, const STRING 482 *str, String_iter *iter)> 483 484 Moves the string iterator C<i> to the next codepoint. 485 486 =cut 487 488 */ 489 490 static UINTVAL 491 fixed8_iter_get_and_advance(PARROT_INTERP, 492 ARGIN(const STRING *str), ARGMOD(String_iter *iter)) 493 { 494 ASSERT_ARGS(fixed8_iter_get_and_advance) 495 const UINTVAL c = get_byte(interp, str, iter->charpos++); 496 iter->bytepos++; 497 return c; 498 } 499 500 /* 501 502 =item C<static void fixed8_iter_set_and_advance(PARROT_INTERP, STRING *str, 503 String_iter *iter, UINTVAL c)> 504 505 With the string iterator C<i>, appends the codepoint C<c> and advances to the 506 next position in the string. 507 508 =cut 509 510 */ 511 512 static void 513 fixed8_iter_set_and_advance(PARROT_INTERP, 514 ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c) 515 { 516 ASSERT_ARGS(fixed8_iter_set_and_advance) 517 set_byte(interp, str, iter->charpos++, c); 518 iter->bytepos++; 519 } 520 521 /* 522 523 =item C<static void fixed8_iter_set_position(PARROT_INTERP, const STRING *str, 524 String_iter *iter, UINTVAL pos)> 525 526 Moves the string iterator C<i> to the position C<n> in the string. 527 528 =cut 529 530 */ 531 532 static void 533 fixed8_iter_set_position(SHIM_INTERP, 534 ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos) 535 { 536 ASSERT_ARGS(fixed8_iter_set_position) 537 iter->bytepos = iter->charpos = pos; 538 PARROT_ASSERT(pos <= Buffer_buflen(str)); 539 } 540 541 /* 542 381 543 =item C<static UINTVAL fixed8_get_next(PARROT_INTERP, String_iter *iter)> 382 544 383 545 Moves the string iterator C<i> to the next codepoint. … … 514 676 bytes, 515 677 iter_init, 516 678 find_cclass, 517 fixed_8_hash 679 fixed_8_hash, 680 fixed8_iter_get, 681 fixed8_iter_skip, 682 fixed8_iter_get_and_advance, 683 fixed8_iter_set_and_advance, 684 fixed8_iter_set_position 518 685 }; 519 686 520 687 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); -
src/string/encoding/ucs2.c
diff --git a/src/string/encoding/ucs2.c b/src/string/encoding/ucs2.c index 7c2f5b0..5136d16 100644
a b 120 120 __attribute__nonnull__(1) 121 121 __attribute__nonnull__(2); 122 122 123 static UINTVAL ucs2_iter_get(PARROT_INTERP, 124 ARGIN(const STRING *str), 125 ARGIN(const String_iter *i), 126 INTVAL offset) 127 __attribute__nonnull__(1) 128 __attribute__nonnull__(2) 129 __attribute__nonnull__(3); 130 131 static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP, 132 ARGIN(const STRING *str), 133 ARGMOD(String_iter *i)) 134 __attribute__nonnull__(1) 135 __attribute__nonnull__(2) 136 __attribute__nonnull__(3) 137 FUNC_MODIFIES(*i); 138 139 static void ucs2_iter_set_and_advance(PARROT_INTERP, 140 ARGMOD(STRING *str), 141 ARGMOD(String_iter *i), 142 UINTVAL c) 143 __attribute__nonnull__(1) 144 __attribute__nonnull__(2) 145 __attribute__nonnull__(3) 146 FUNC_MODIFIES(*str) 147 FUNC_MODIFIES(*i); 148 149 static void ucs2_iter_set_position(PARROT_INTERP, 150 ARGIN(const STRING *str), 151 ARGMOD(String_iter *i), 152 UINTVAL n) 153 __attribute__nonnull__(1) 154 __attribute__nonnull__(2) 155 __attribute__nonnull__(3) 156 FUNC_MODIFIES(*i); 157 158 static void ucs2_iter_skip(PARROT_INTERP, 159 ARGIN(const STRING *str), 160 ARGMOD(String_iter *i), 161 INTVAL skip) 162 __attribute__nonnull__(1) 163 __attribute__nonnull__(2) 164 __attribute__nonnull__(3) 165 FUNC_MODIFIES(*i); 166 123 167 static void ucs2_set_position(SHIM_INTERP, 124 168 ARGMOD(String_iter *i), 125 169 UINTVAL n) … … 161 205 #define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 162 206 PARROT_ASSERT_ARG(interp) \ 163 207 , PARROT_ASSERT_ARG(s)) 208 #define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 209 PARROT_ASSERT_ARG(interp) \ 210 , PARROT_ASSERT_ARG(str) \ 211 , PARROT_ASSERT_ARG(i)) 212 #define ASSERT_ARGS_ucs2_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 213 PARROT_ASSERT_ARG(interp) \ 214 , PARROT_ASSERT_ARG(str) \ 215 , PARROT_ASSERT_ARG(i)) 216 #define ASSERT_ARGS_ucs2_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 217 PARROT_ASSERT_ARG(interp) \ 218 , PARROT_ASSERT_ARG(str) \ 219 , PARROT_ASSERT_ARG(i)) 220 #define ASSERT_ARGS_ucs2_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 221 PARROT_ASSERT_ARG(interp) \ 222 , PARROT_ASSERT_ARG(str) \ 223 , PARROT_ASSERT_ARG(i)) 224 #define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 225 PARROT_ASSERT_ARG(interp) \ 226 , PARROT_ASSERT_ARG(str) \ 227 , PARROT_ASSERT_ARG(i)) 164 228 #define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 165 229 PARROT_ASSERT_ARG(i)) 166 230 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ … … 323 387 String_iter iter; 324 388 UINTVAL start; 325 389 326 iter_init(interp, src, &iter);327 iter.set_position(interp, &iter, offset);390 STRING_ITER_INIT(interp, &iter); 391 ucs2_iter_set_position(interp, src, &iter, offset); 328 392 start = iter.bytepos; 329 393 return_string->strstart = (char *)return_string->strstart + start; 330 iter.set_position(interp, &iter, offset + count);394 ucs2_iter_set_position(interp, src, &iter, offset + count); 331 395 return_string->bufused = iter.bytepos - start; 332 396 } 333 397 #endif … … 402 466 403 467 /* 404 468 469 =item C<static UINTVAL ucs2_iter_get(PARROT_INTERP, const STRING *str, const 470 String_iter *i, INTVAL offset)> 471 472 Get the character at C<i> + C<offset>. 473 474 =cut 475 476 */ 477 478 static UINTVAL 479 ucs2_iter_get(PARROT_INTERP, 480 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 481 { 482 ASSERT_ARGS(ucs2_iter_get) 483 return get_codepoint(interp, str, i->charpos + offset); 484 } 485 486 /* 487 488 =item C<static void ucs2_iter_skip(PARROT_INTERP, const STRING *str, String_iter 489 *i, INTVAL skip)> 490 491 Moves the string iterator C<i> by C<skip> characters. 492 493 =cut 494 495 */ 496 497 static void 498 ucs2_iter_skip(PARROT_INTERP, 499 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 500 { 501 ASSERT_ARGS(ucs2_iter_skip) 502 UNUSED(str); 503 504 #if PARROT_HAS_ICU 505 i->charpos += skip; 506 i->bytepos += skip * sizeof (UChar); 507 #else 508 UNUSED(i); 509 UNUSED(skip); 510 no_ICU_lib(interp); 511 #endif 512 } 513 514 /* 515 516 =item C<static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP, const STRING 517 *str, String_iter *i)> 518 519 Moves the string iterator C<i> to the next UCS-2 codepoint. 520 521 =cut 522 523 */ 524 525 static UINTVAL 526 ucs2_iter_get_and_advance(PARROT_INTERP, 527 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 528 { 529 ASSERT_ARGS(ucs2_iter_get_and_advance) 530 531 #if PARROT_HAS_ICU 532 UChar * const s = (UChar*) str->strstart; 533 size_t pos = i->bytepos / sizeof (UChar); 534 535 /* TODO either make sure that we don't go past end or use SAFE 536 * iter versions 537 */ 538 const UChar c = s[pos++]; 539 i->charpos++; 540 i->bytepos = pos * sizeof (UChar); 541 return c; 542 #else 543 UNUSED(str); 544 UNUSED(i); 545 no_ICU_lib(interp); 546 return (UINTVAL)0; /* Stop the static analyzers from panicing */ 547 #endif 548 } 549 550 /* 551 552 =item C<static void ucs2_iter_set_and_advance(PARROT_INTERP, STRING *str, 553 String_iter *i, UINTVAL c)> 554 555 With the string iterator C<i>, appends the codepoint C<c> and advances to the 556 next position in the string. 557 558 =cut 559 560 */ 561 562 static void 563 ucs2_iter_set_and_advance(PARROT_INTERP, 564 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 565 { 566 ASSERT_ARGS(ucs2_iter_set_and_advance) 567 568 #if PARROT_HAS_ICU 569 UChar * const s = (UChar*) str->strstart; 570 UINTVAL pos = i->bytepos / sizeof (UChar); 571 s[pos++] = (UChar)c; 572 i->charpos++; 573 i->bytepos = pos * sizeof (UChar); 574 #else 575 UNUSED(str); 576 UNUSED(i); 577 UNUSED(c); 578 no_ICU_lib(interp); 579 #endif 580 } 581 582 /* 583 584 =item C<static void ucs2_iter_set_position(PARROT_INTERP, const STRING *str, 585 String_iter *i, UINTVAL n)> 586 587 Moves the string iterator C<i> to the position C<n> in the string. 588 589 =cut 590 591 */ 592 593 static void 594 ucs2_iter_set_position(PARROT_INTERP, 595 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n) 596 { 597 ASSERT_ARGS(ucs2_iter_set_position) 598 UNUSED(str); 599 600 #if PARROT_HAS_ICU 601 i->charpos = n; 602 i->bytepos = n * sizeof (UChar); 603 #else 604 UNUSED(i); 605 UNUSED(n); 606 no_ICU_lib(interp); 607 #endif 608 } 609 610 /* 611 405 612 =item C<static UINTVAL ucs2_decode_and_advance(PARROT_INTERP, String_iter *i)> 406 613 407 614 Moves the string iterator C<i> to the next UCS-2 codepoint. … … 592 799 bytes, 593 800 iter_init, 594 801 find_cclass, 595 ucs2_hash 802 ucs2_hash, 803 ucs2_iter_get, 804 ucs2_iter_skip, 805 ucs2_iter_get_and_advance, 806 ucs2_iter_set_and_advance, 807 ucs2_iter_set_position 596 808 }; 597 809 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 598 810 Parrot_register_encoding(interp, "ucs2", return_encoding); -
src/string/encoding/utf16.c
diff --git a/src/string/encoding/utf16.c b/src/string/encoding/utf16.c index fdc006f..6df6e96 100644
a b 107 107 __attribute__nonnull__(2) 108 108 FUNC_MODIFIES(*i); 109 109 110 static UINTVAL utf16_iter_get(PARROT_INTERP, 111 ARGIN(const STRING *str), 112 ARGIN(const String_iter *i), 113 INTVAL offset) 114 __attribute__nonnull__(1) 115 __attribute__nonnull__(2) 116 __attribute__nonnull__(3); 117 118 PARROT_WARN_UNUSED_RESULT 119 static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP, 120 ARGIN(const STRING *str), 121 ARGMOD(String_iter *i)) 122 __attribute__nonnull__(1) 123 __attribute__nonnull__(2) 124 __attribute__nonnull__(3) 125 FUNC_MODIFIES(*i); 126 127 static void utf16_iter_set_and_advance(PARROT_INTERP, 128 ARGMOD(STRING *str), 129 ARGMOD(String_iter *i), 130 UINTVAL c) 131 __attribute__nonnull__(1) 132 __attribute__nonnull__(2) 133 __attribute__nonnull__(3) 134 FUNC_MODIFIES(*str) 135 FUNC_MODIFIES(*i); 136 137 static void utf16_iter_set_position(PARROT_INTERP, 138 ARGIN(const STRING *str), 139 ARGMOD(String_iter *i), 140 UINTVAL n) 141 __attribute__nonnull__(1) 142 __attribute__nonnull__(2) 143 __attribute__nonnull__(3) 144 FUNC_MODIFIES(*i); 145 146 static void utf16_iter_skip(PARROT_INTERP, 147 ARGIN(const STRING *str), 148 ARGMOD(String_iter *i), 149 INTVAL skip) 150 __attribute__nonnull__(1) 151 __attribute__nonnull__(2) 152 __attribute__nonnull__(3) 153 FUNC_MODIFIES(*i); 154 110 155 static void utf16_set_position(SHIM_INTERP, 111 156 ARGMOD(String_iter *i), 112 157 UINTVAL n) … … 147 192 PARROT_ASSERT_ARG(i)) 148 193 #define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 149 194 PARROT_ASSERT_ARG(i)) 195 #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 196 PARROT_ASSERT_ARG(interp) \ 197 , PARROT_ASSERT_ARG(str) \ 198 , PARROT_ASSERT_ARG(i)) 199 #define ASSERT_ARGS_utf16_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 200 PARROT_ASSERT_ARG(interp) \ 201 , PARROT_ASSERT_ARG(str) \ 202 , PARROT_ASSERT_ARG(i)) 203 #define ASSERT_ARGS_utf16_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 204 PARROT_ASSERT_ARG(interp) \ 205 , PARROT_ASSERT_ARG(str) \ 206 , PARROT_ASSERT_ARG(i)) 207 #define ASSERT_ARGS_utf16_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 208 PARROT_ASSERT_ARG(interp) \ 209 , PARROT_ASSERT_ARG(str) \ 210 , PARROT_ASSERT_ARG(i)) 211 #define ASSERT_ARGS_utf16_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 212 PARROT_ASSERT_ARG(interp) \ 213 , PARROT_ASSERT_ARG(str) \ 214 , PARROT_ASSERT_ARG(i)) 150 215 #define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 151 216 PARROT_ASSERT_ARG(i)) 152 217 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ … … 381 446 UINTVAL start; 382 447 STRING * const return_string = Parrot_str_copy(interp, src); 383 448 384 iter_init(interp, src, &iter);385 iter.set_position(interp, &iter, offset);449 STRING_ITER_INIT(interp, &iter); 450 utf16_iter_set_position(interp, src, &iter, offset); 386 451 start = iter.bytepos; 387 452 return_string->strstart = (char *)return_string->strstart + start ; 388 iter.set_position(interp, &iter, offset +count);453 utf16_iter_skip(interp, src, &iter, count); 389 454 return_string->bufused = iter.bytepos - start; 390 455 return_string->strlen = count; 391 456 return_string->hashval = 0; … … 432 497 codepoints(PARROT_INTERP, ARGIN(const STRING *src)) 433 498 { 434 499 ASSERT_ARGS(codepoints) 435 String_iter iter; 500 #if PARROT_HAS_ICU 501 UChar *s = (UChar*) src->strstart; 502 UINTVAL pos = 0; 436 503 /* 437 504 * this is used to initially calculate src->strlen, 438 505 * therefore we must scan the whole string 439 506 */ 440 iter_init(interp, src, &iter); 441 while (iter.bytepos < src->bufused) 442 iter.get_and_advance(interp, &iter); 443 return iter.charpos; 507 while (pos * sizeof(UChar) < src->bufused) { 508 U16_FWD_1_UNSAFE(s, pos); 509 } 510 return pos * sizeof(UChar); 511 #else 512 UNUSED(src); 513 514 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 515 "no ICU lib loaded"); 516 #endif 444 517 } 445 518 446 519 /* … … 461 534 return src->bufused; 462 535 } 463 536 537 /* 538 539 =item C<static UINTVAL utf16_iter_get(PARROT_INTERP, const STRING *str, const 540 String_iter *i, INTVAL offset)> 541 542 Get the character at C<i> plus C<offset>. 543 544 =cut 545 546 */ 547 548 static UINTVAL 549 utf16_iter_get(PARROT_INTERP, 550 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 551 { 552 ASSERT_ARGS(utf16_iter_get) 553 #if PARROT_HAS_ICU 554 UChar *s = (UChar*) str->strstart; 555 UINTVAL c, pos; 556 557 pos = i->bytepos / sizeof (UChar); 558 if (offset > 0) { 559 U16_FWD_N_UNSAFE(s, pos, offset); 560 } 561 else if (offset < 0) { 562 U16_BACK_N_UNSAFE(s, pos, -offset); 563 } 564 U16_GET_UNSAFE(s, pos, c); 565 566 return c; 567 #else 568 UNUSED(str); 569 UNUSED(i); 570 UNUSED(offset); 571 572 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 573 "no ICU lib loaded"); 574 #endif 575 } 576 577 /* 578 579 =item C<static void utf16_iter_skip(PARROT_INTERP, const STRING *str, 580 String_iter *i, INTVAL skip)> 581 582 Moves the string iterator C<i> by C<skip> characters. 583 584 =cut 585 586 */ 587 588 static void 589 utf16_iter_skip(PARROT_INTERP, 590 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 591 { 592 ASSERT_ARGS(utf16_iter_skip) 593 #if PARROT_HAS_ICU 594 UChar * const s = (UChar*) str->strstart; 595 UINTVAL pos = i->bytepos / sizeof (UChar); 596 597 if (skip > 0) { 598 U16_FWD_N_UNSAFE(s, pos, skip); 599 } 600 else if (skip < 0) { 601 U16_BACK_N_UNSAFE(s, pos, -skip); 602 } 603 604 i->charpos += skip; 605 i->bytepos = pos * sizeof (UChar); 606 #else 607 UNUSED(str); 608 UNUSED(i); 609 UNUSED(skip); 610 611 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 612 "no ICU lib loaded"); 613 #endif 614 } 615 616 /* 617 618 =item C<static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP, const STRING 619 *str, String_iter *i)> 620 621 Moves the string iterator C<i> to the next UTF-16 codepoint. 622 623 =cut 624 625 */ 626 627 PARROT_WARN_UNUSED_RESULT 628 static UINTVAL 629 utf16_iter_get_and_advance(PARROT_INTERP, 630 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 631 { 632 ASSERT_ARGS(utf16_iter_get_and_advance) 633 #if PARROT_HAS_ICU 634 UChar *s = (UChar*) str->strstart; 635 UINTVAL c, pos; 636 pos = i->bytepos / sizeof (UChar); 637 /* TODO either make sure that we don't go past end or use SAFE 638 * iter versions 639 */ 640 U16_NEXT_UNSAFE(s, pos, c); 641 i->charpos++; 642 i->bytepos = pos * sizeof (UChar); 643 return c; 644 #else 645 UNUSED(str); 646 UNUSED(i); 647 648 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 649 "no ICU lib loaded"); 650 #endif 651 } 652 653 /* 654 655 =item C<static void utf16_iter_set_and_advance(PARROT_INTERP, STRING *str, 656 String_iter *i, UINTVAL c)> 657 658 With the string iterator C<i>, appends the codepoint C<c> and advances to the 659 next position in the string. 660 661 =cut 662 663 */ 664 665 static void 666 utf16_iter_set_and_advance(PARROT_INTERP, 667 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 668 { 669 ASSERT_ARGS(utf16_iter_set_and_advance) 670 #if PARROT_HAS_ICU 671 UChar *s = (UChar*) str->strstart; 672 UINTVAL pos; 673 pos = i->bytepos / sizeof (UChar); 674 U16_APPEND_UNSAFE(s, pos, c); 675 i->charpos++; 676 i->bytepos = pos * sizeof (UChar); 677 #else 678 UNUSED(str); 679 UNUSED(i); 680 UNUSED(c); 681 682 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 683 "no ICU lib loaded"); 684 #endif 685 } 686 687 /* 688 689 =item C<static void utf16_iter_set_position(PARROT_INTERP, const STRING *str, 690 String_iter *i, UINTVAL n)> 691 692 Moves the string iterator C<i> to the position C<n> in the string. 693 694 =cut 695 696 */ 697 698 static void 699 utf16_iter_set_position(PARROT_INTERP, 700 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n) 701 { 702 ASSERT_ARGS(utf16_iter_set_position) 703 #if PARROT_HAS_ICU 704 UChar * const s = (UChar*) str->strstart; 705 UINTVAL pos; 706 pos = 0; 707 U16_FWD_N_UNSAFE(s, pos, n); 708 i->charpos = n; 709 i->bytepos = pos * sizeof (UChar); 710 #else 711 UNUSED(str); 712 UNUSED(i); 713 UNUSED(n); 714 715 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 716 "no ICU lib loaded"); 717 #endif 718 } 719 464 720 #if PARROT_HAS_ICU 465 721 /* 466 722 … … 595 851 bytes, 596 852 iter_init, 597 853 find_cclass, 598 NULL 854 NULL, 855 utf16_iter_get, 856 utf16_iter_skip, 857 utf16_iter_get_and_advance, 858 utf16_iter_set_and_advance, 859 utf16_iter_set_position 599 860 }; 600 861 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 601 862 Parrot_register_encoding(interp, "utf16", return_encoding); -
src/string/encoding/utf8.c
diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index e853f63..55e1753 100644
a b 119 119 __attribute__nonnull__(2) 120 120 FUNC_MODIFIES(*i); 121 121 122 static UINTVAL utf8_iter_get(PARROT_INTERP, 123 ARGIN(const STRING *str), 124 ARGIN(const String_iter *i), 125 INTVAL offset) 126 __attribute__nonnull__(1) 127 __attribute__nonnull__(2) 128 __attribute__nonnull__(3); 129 130 static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, 131 ARGIN(const STRING *str), 132 ARGMOD(String_iter *i)) 133 __attribute__nonnull__(1) 134 __attribute__nonnull__(2) 135 __attribute__nonnull__(3) 136 FUNC_MODIFIES(*i); 137 138 static void utf8_iter_set_and_advance(PARROT_INTERP, 139 ARGMOD(STRING *str), 140 ARGMOD(String_iter *i), 141 UINTVAL c) 142 __attribute__nonnull__(1) 143 __attribute__nonnull__(2) 144 __attribute__nonnull__(3) 145 FUNC_MODIFIES(*str) 146 FUNC_MODIFIES(*i); 147 148 static void utf8_iter_set_position(SHIM_INTERP, 149 ARGIN(const STRING *str), 150 ARGMOD(String_iter *i), 151 UINTVAL pos) 152 __attribute__nonnull__(2) 153 __attribute__nonnull__(3) 154 FUNC_MODIFIES(*i); 155 156 static void utf8_iter_skip(SHIM_INTERP, 157 ARGIN(const STRING *str), 158 ARGMOD(String_iter *i), 159 INTVAL skip) 160 __attribute__nonnull__(2) 161 __attribute__nonnull__(3) 162 FUNC_MODIFIES(*i); 163 122 164 static void utf8_set_position(SHIM_INTERP, 123 165 ARGMOD(String_iter *i), 124 166 UINTVAL pos) … … 176 218 #define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 177 219 PARROT_ASSERT_ARG(interp) \ 178 220 , PARROT_ASSERT_ARG(i)) 221 #define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 222 PARROT_ASSERT_ARG(interp) \ 223 , PARROT_ASSERT_ARG(str) \ 224 , PARROT_ASSERT_ARG(i)) 225 #define ASSERT_ARGS_utf8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 226 PARROT_ASSERT_ARG(interp) \ 227 , PARROT_ASSERT_ARG(str) \ 228 , PARROT_ASSERT_ARG(i)) 229 #define ASSERT_ARGS_utf8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 230 PARROT_ASSERT_ARG(interp) \ 231 , PARROT_ASSERT_ARG(str) \ 232 , PARROT_ASSERT_ARG(i)) 233 #define ASSERT_ARGS_utf8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 234 PARROT_ASSERT_ARG(str) \ 235 , PARROT_ASSERT_ARG(i)) 236 #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 237 PARROT_ASSERT_ARG(str) \ 238 , PARROT_ASSERT_ARG(i)) 179 239 #define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 180 240 PARROT_ASSERT_ARG(i)) 181 241 #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 388 448 389 449 /* 390 450 451 =item C<static UINTVAL utf8_iter_get(PARROT_INTERP, const STRING *str, const 452 String_iter *i, INTVAL offset)> 453 454 Get the character at C<i> plus C<offset>. 455 456 =cut 457 458 */ 459 460 static UINTVAL 461 utf8_iter_get(PARROT_INTERP, 462 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 463 { 464 ASSERT_ARGS(utf8_iter_get) 465 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 466 467 if (offset > 0) { 468 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, offset); 469 } 470 else if (offset < 0) { 471 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -offset); 472 } 473 474 return utf8_decode(interp, u8ptr); 475 } 476 477 /* 478 479 =item C<static void utf8_iter_skip(PARROT_INTERP, const STRING *str, String_iter 480 *i, INTVAL skip)> 481 482 Moves the string iterator C<i> by C<skip> characters. 483 484 =cut 485 486 */ 487 488 static void 489 utf8_iter_skip(SHIM_INTERP, 490 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 491 { 492 ASSERT_ARGS(utf8_iter_skip) 493 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 494 495 if (skip > 0) { 496 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, skip); 497 } 498 else if (skip < 0) { 499 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -skip); 500 } 501 502 i->charpos += skip; 503 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 504 } 505 506 /* 507 508 =item C<static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, const STRING 509 *str, String_iter *i)> 510 511 The UTF-8 implementation of the string iterator's C<get_and_advance> 512 function. 513 514 =cut 515 516 */ 517 518 static UINTVAL 519 utf8_iter_get_and_advance(PARROT_INTERP, 520 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 521 { 522 ASSERT_ARGS(utf8_iter_get_and_advance) 523 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 524 UINTVAL c = *u8ptr; 525 526 if (UTF8_IS_START(c)) { 527 UINTVAL len = UTF8SKIP(u8ptr); 528 529 c &= UTF8_START_MASK(len); 530 i->bytepos += len; 531 for (len--; len; len--) { 532 u8ptr++; 533 534 if (!UTF8_IS_CONTINUATION(*u8ptr)) 535 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 536 "Malformed UTF-8 string\n"); 537 c = UTF8_ACCUMULATE(c, *u8ptr); 538 } 539 540 if (UNICODE_IS_SURROGATE(c)) 541 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 542 "Surrogate in UTF-8 string\n"); 543 } 544 else if (!UNICODE_IS_INVARIANT(c)) { 545 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 546 "Malformed UTF-8 string\n"); 547 } 548 else { 549 i->bytepos++; 550 } 551 552 i->charpos++; 553 return c; 554 } 555 556 /* 557 558 =item C<static void utf8_iter_set_and_advance(PARROT_INTERP, STRING *str, 559 String_iter *i, UINTVAL c)> 560 561 The UTF-8 implementation of the string iterator's C<set_and_advance> 562 function. 563 564 =cut 565 566 */ 567 568 static void 569 utf8_iter_set_and_advance(PARROT_INTERP, 570 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 571 { 572 ASSERT_ARGS(utf8_iter_set_and_advance) 573 unsigned char * const pos = (unsigned char *)str->strstart + i->bytepos; 574 unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c); 575 576 i->bytepos += (new_pos - pos); 577 /* XXX possible buffer overrun exception? */ 578 PARROT_ASSERT(i->bytepos <= Buffer_buflen(str)); 579 i->charpos++; 580 } 581 582 /* 583 584 =item C<static void utf8_iter_set_position(PARROT_INTERP, const STRING *str, 585 String_iter *i, UINTVAL pos)> 586 587 The UTF-8 implementation of the string iterator's C<set_position> 588 function. 589 590 =cut 591 592 */ 593 594 static void 595 utf8_iter_set_position(SHIM_INTERP, 596 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL pos) 597 { 598 ASSERT_ARGS(utf8_iter_set_position) 599 const utf8_t *u8ptr = (const utf8_t *)str->strstart; 600 601 if (pos == 0) { 602 i->charpos = 0; 603 i->bytepos = 0; 604 return; 605 } 606 607 /* 608 * we know the byte offsets of three positions: start, current and end 609 * now find the shortest way to reach pos 610 */ 611 if (pos < i->charpos) { 612 if (pos <= (i->charpos >> 1)) { 613 /* go forward from start */ 614 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, pos); 615 } 616 else { 617 /* go backward from current */ 618 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + i->bytepos, i->charpos - pos); 619 } 620 } 621 else { 622 const UINTVAL len = str->strlen; 623 if (pos <= i->charpos + ((len - i->charpos) >> 1)) { 624 /* go forward from current */ 625 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr + i->bytepos, pos - i->charpos); 626 } 627 else { 628 /* go backward from end */ 629 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + str->bufused, len - pos); 630 } 631 } 632 633 i->charpos = pos; 634 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 635 } 636 637 /* 638 391 639 =item C<static UINTVAL utf8_decode_and_advance(PARROT_INTERP, String_iter *i)> 392 640 393 641 The UTF-8 implementation of the string iterator's C<get_and_advance> … … 514 762 { 515 763 ASSERT_ARGS(to_encoding) 516 764 STRING *result; 517 String_iter src_iter;518 UINTVAL offs,dest_len, dest_pos, src_len;765 const ENCODING *src_encoding; 766 UINTVAL dest_len, dest_pos, src_len; 519 767 unsigned char *p; 520 768 521 769 if (src->encoding == Parrot_utf8_encoding_ptr) … … 524 772 result = Parrot_gc_new_string_header(interp, 0); 525 773 src_len = src->strlen; 526 774 527 /* init iter before possilby changing encoding*/528 ENCODING_ITER_INIT(interp, src, &src_iter);775 /* save source encoding before possibly changing it */ 776 src_encoding = src->encoding; 529 777 result->charset = Parrot_unicode_charset_ptr; 530 778 result->encoding = Parrot_utf8_encoding_ptr; 531 779 result->strlen = src_len; … … 543 791 result->bufused = dest_len; 544 792 } 545 793 else { 794 String_iter src_iter; 795 STRING_ITER_INIT(interp, &src_iter); 546 796 dest_len = src_len; 547 797 dest_pos = 0; 548 for (offs = 0; offs < src_len; ++offs) {549 const UINTVAL c = src_ iter.get_and_advance(interp, &src_iter);798 while (src_iter.charpos < src_len) { 799 const UINTVAL c = src_encoding->iter_get_and_advance(interp, src, &src_iter); 550 800 unsigned char *new_pos; 551 801 unsigned char *pos; 552 802 553 803 if (dest_len - dest_pos < 6) { 554 UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);804 UINTVAL need = (UINTVAL)((src->strlen - src_iter.charpos + 1) * 1.5); 555 805 if (need < 16) 556 806 need = 16; 557 807 dest_len += need; … … 684 934 String_iter iter; 685 935 UINTVAL start; 686 936 687 iter_init(interp, src, &iter);937 STRING_ITER_INIT(interp, &iter); 688 938 689 939 if (offset) 690 iter.set_position(interp, &iter, offset);940 utf8_iter_set_position(interp, src, &iter, offset); 691 941 692 942 start = iter.bytepos; 693 943 return_string->strstart = (char *)return_string->strstart + start; 694 944 695 945 if (count) 696 iter.set_position(interp, &iter, offset + count);946 utf8_iter_set_position(interp, src, &iter, offset + count); 697 947 698 948 return_string->bufused = iter.bytepos - start; 699 949 return_string->strlen = count; … … 750 1000 * this is used to initially calculate src->strlen, 751 1001 * therefore we must scan the whole string 752 1002 */ 753 iter_init(interp, src, &iter);1003 STRING_ITER_INIT(interp, &iter); 754 1004 while (iter.bytepos < src->bufused) 755 iter.get_and_advance(interp, &iter);1005 utf8_iter_get_and_advance(interp, src, &iter); 756 1006 return iter.charpos; 757 1007 } 758 1008 … … 826 1076 bytes, 827 1077 iter_init, 828 1078 find_cclass, 829 NULL 1079 NULL, 1080 utf8_iter_get, 1081 utf8_iter_skip, 1082 utf8_iter_get_and_advance, 1083 utf8_iter_set_and_advance, 1084 utf8_iter_set_position 830 1085 }; 831 1086 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 832 1087 Parrot_register_encoding(interp, "utf8", return_encoding);