Ticket #1456: string-iter-v4.diff
File string-iter-v4.diff, 71.6 KB (added by nwellnhof, 12 years ago) |
---|
-
include/parrot/encoding.h
diff --git a/include/parrot/encoding.h b/include/parrot/encoding.h index b9a5853..391f454 100644
a b 37 37 38 38 typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src, 39 39 struct string_iterator_t *); 40 typedef UINTVAL (*encoding_iter_get_t)( 41 PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset); 42 typedef void (*encoding_iter_skip_t)( 43 PARROT_INTERP, const STRING *str, String_iter *i, INTVAL skip); 44 typedef UINTVAL (*encoding_iter_get_and_advance_t)( 45 PARROT_INTERP, const STRING *str, String_iter *i); 46 typedef void (*encoding_iter_set_and_advance_t)( 47 PARROT_INTERP, STRING *str, String_iter *i, UINTVAL c); 48 typedef void (*encoding_iter_set_position_t)( 49 PARROT_INTERP, const STRING *str, String_iter *i, UINTVAL pos); 40 50 41 51 struct _encoding { 42 52 ARGIN(const char *name); … … 57 67 encoding_bytes_t bytes; 58 68 encoding_iter_init_t iter_init; 59 69 encoding_find_cclass_t find_cclass; 70 encoding_iter_get_t iter_get; 71 encoding_iter_skip_t iter_skip; 72 encoding_iter_get_and_advance_t iter_get_and_advance; 73 encoding_iter_set_and_advance_t iter_set_and_advance; 74 encoding_iter_set_position_t iter_set_position; 60 75 }; 61 76 62 77 typedef struct _encoding ENCODING; -
include/parrot/string.h
diff --git a/include/parrot/string.h b/include/parrot/string.h index fb6a3be..7d87f8e 100644
a b 37 37 void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos); 38 38 } String_iter; 39 39 40 #define STRING_ITER_INIT(i, iter) \ 41 (iter)->charpos = (iter)->bytepos = 0 42 #define STRING_ITER_GET(i, str, iter, offset) \ 43 ((str)->encoding)->iter_get((i), (str), (iter), (offset)) 44 #define STRING_ITER_SKIP(i, str, iter, skip) \ 45 ((str)->encoding)->iter_skip((i), (str), (iter), (skip)) 46 #define STRING_ITER_GET_AND_ADVANCE(i, str, iter) \ 47 ((str)->encoding)->iter_get_and_advance((i), (str), (iter)) 48 #define STRING_ITER_SET_AND_ADVANCE(i, str, iter, c) \ 49 ((str)->encoding)->iter_set_and_advance((i), (str), (iter), (c)) 50 #define STRING_ITER_SET_POSITION(i, str, iter, pos) \ 51 ((str)->encoding)->iter_set_position((i), (str), (iter), (pos)) 52 40 53 #define STREQ(x, y) (strcmp((x), (y))==0) 41 54 #define STRNEQ(x, y) (strcmp((x), (y))!=0) 42 55 -
include/parrot/string_funcs.h
diff --git a/include/parrot/string_funcs.h b/include/parrot/string_funcs.h index b4bcd34..8087b8f 100644
a b 249 249 __attribute__nonnull__(3); 250 250 251 251 PARROT_EXPORT 252 PARROT_CANNOT_RETURN_NULL 253 PARROT_WARN_UNUSED_RESULT 254 STRING * Parrot_str_iter_substr(PARROT_INTERP, 255 ARGMOD(STRING *str), 256 ARGIN(const String_iter *l), 257 ARGIN(const String_iter *r)) 258 __attribute__nonnull__(1) 259 __attribute__nonnull__(2) 260 __attribute__nonnull__(3) 261 __attribute__nonnull__(4) 262 FUNC_MODIFIES(*str); 263 264 PARROT_EXPORT 252 265 PARROT_WARN_UNUSED_RESULT 253 266 PARROT_CANNOT_RETURN_NULL 254 267 STRING* Parrot_str_join(PARROT_INTERP, … … 626 639 #define ASSERT_ARGS_Parrot_str_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 627 640 PARROT_ASSERT_ARG(interp) \ 628 641 , PARROT_ASSERT_ARG(s)) 642 #define ASSERT_ARGS_Parrot_str_iter_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 643 PARROT_ASSERT_ARG(interp) \ 644 , PARROT_ASSERT_ARG(str) \ 645 , PARROT_ASSERT_ARG(l) \ 646 , PARROT_ASSERT_ARG(r)) 629 647 #define ASSERT_ARGS_Parrot_str_join __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 630 648 PARROT_ASSERT_ARG(interp) \ 631 649 , PARROT_ASSERT_ARG(ar)) -
src/io/utf8.c
diff --git a/src/io/utf8.c b/src/io/utf8.c index 0df3d22..f2b3b5d 100644
a b 57 57 s->encoding = Parrot_utf8_encoding_ptr; 58 58 59 59 /* count chars, verify utf8 */ 60 Parrot_utf8_encoding_ptr->iter_init(interp, s, &iter);60 STRING_ITER_INIT(interp, &iter); 61 61 62 62 while (iter.bytepos < s->bufused) { 63 63 if (iter.bytepos + 4 > s->bufused) { … … 92 92 } 93 93 } 94 94 ok: 95 iter.get_and_advance(interp, &iter);95 Parrot_utf8_encoding_ptr->iter_get_and_advance(interp, *buf, &iter); 96 96 } 97 97 s->strlen = iter.charpos; 98 98 return len; -
src/pmc/stringiterator.pmc
diff --git a/src/pmc/stringiterator.pmc b/src/pmc/stringiterator.pmc index 3e0cefc..aa3086b 100644
a b 23 23 24 24 25 25 pmclass StringIterator auto_attrs extends Iterator { 26 ATTR PMC *string; /* String to iterate over */ 27 ATTR INTVAL pos; /* Current position of iterator for forward iterator */ 28 /* Previous position of iterator for reverse iterator */ 29 ATTR INTVAL length; /* Length of C<string> */ 30 ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */ 26 ATTR PMC *string; /* String PMC to iterate over */ 27 ATTR STRING *str_val; /* The actual string */ 28 ATTR String_iter iter; /* String iterator */ 29 ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */ 31 30 32 31 /* 33 32 … … 39 38 40 39 */ 41 40 VTABLE void init_pmc(PMC *string) { 41 Parrot_StringIterator_attributes * const attrs = 42 PARROT_STRINGITERATOR(SELF); 43 STRING * const str_val = VTABLE_get_string(INTERP, string); 44 42 45 SET_ATTR_string(INTERP, SELF, string); 46 SET_ATTR_str_val(INTERP, SELF, str_val); 47 STRING_ITER_INIT(INTERP, &attrs->iter); 43 48 44 49 /* by default, iterate from start */ 45 50 SELF.set_integer_native(ITERATE_FROM_START); … … 58 63 59 64 VTABLE void mark() { 60 65 PMC *string; 66 STRING *str_val; 67 61 68 GET_ATTR_string(INTERP, SELF, string); 62 69 Parrot_gc_mark_PMC_alive(INTERP, string); 70 GET_ATTR_str_val(INTERP, SELF, str_val); 71 Parrot_gc_mark_STRING_alive(INTERP, str_val); 63 72 } 64 73 65 74 /* … … 77 86 Parrot_StringIterator_attributes * const clone_attrs = 78 87 PARROT_STRINGITERATOR(clone); 79 88 80 clone_attrs->pos = attrs->pos; 89 /* TODO: this isn't safe if the string PMC has changed */ 90 clone_attrs->iter = attrs->iter; 81 91 clone_attrs->reverse = attrs->reverse; 82 92 return clone; 83 93 } … … 110 120 Parrot_StringIterator_attributes * const attrs = 111 121 PARROT_STRINGITERATOR(SELF); 112 122 if (attrs->reverse) 113 return attrs-> pos;123 return attrs->iter.charpos; 114 124 else 115 return attrs-> length - attrs->pos;125 return attrs->str_val->strlen - attrs->iter.charpos; 116 126 } 117 127 118 128 VTABLE INTVAL get_integer() { … … 137 147 PARROT_STRINGITERATOR(SELF); 138 148 if (value == ITERATE_FROM_START) { 139 149 attrs->reverse = 0; 140 attrs->pos = 0; 141 attrs->length = VTABLE_elements(INTERP, attrs->string); 150 STRING_ITER_SET_POSITION(INTERP, attrs->str_val, &attrs->iter, 0); 142 151 } 143 152 else if (value == ITERATE_FROM_END) { 144 153 attrs->reverse = 1; 145 attrs->pos = attrs->length 146 = VTABLE_elements(INTERP, attrs->string); 154 STRING_ITER_SET_POSITION(INTERP, attrs->str_val, &attrs->iter, attrs->str_val->strlen); 147 155 } 148 156 else 149 157 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_OPERATION, … … 179 187 Parrot_StringIterator_attributes * const attrs = 180 188 PARROT_STRINGITERATOR(SELF); 181 189 PMC *ret; 190 STRING *str; 191 const String_iter old_iter = attrs->iter; 182 192 183 if (attrs-> pos >= attrs->length)193 if (attrs->iter.charpos >= attrs->str_val->strlen) 184 194 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 185 195 "StopIteration"); 186 196 187 ret = pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String)); 188 VTABLE_set_string_native(INTERP, ret, 189 VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++)); 197 ret = pmc_new(INTERP, Parrot_get_ctx_HLL_type(INTERP, enum_class_String)); 198 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, 1); 199 str = Parrot_str_iter_substr(INTERP, attrs->str_val, &old_iter, &attrs->iter); 200 VTABLE_set_string_native(INTERP, ret, str); 190 201 return ret; 191 202 } 192 203 … … 202 213 VTABLE STRING *shift_string() { 203 214 Parrot_StringIterator_attributes * const attrs = 204 215 PARROT_STRINGITERATOR(SELF); 216 const String_iter old_iter = attrs->iter; 205 217 206 if (attrs-> pos >= attrs->length)218 if (attrs->iter.charpos >= attrs->str_val->strlen) 207 219 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 208 220 "StopIteration"); 209 221 210 return VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++); 222 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, 1); 223 return Parrot_str_iter_substr(INTERP, attrs->str_val, &old_iter, &attrs->iter); 211 224 } 212 225 213 226 /* … … 223 236 Parrot_StringIterator_attributes * const attrs = 224 237 PARROT_STRINGITERATOR(SELF); 225 238 226 if (attrs-> pos >= attrs->length)239 if (attrs->iter.charpos >= attrs->str_val->strlen) 227 240 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 228 241 "StopIteration"); 229 242 230 return VTABLE_get_integer_keyed_int(INTERP, attrs->string, attrs->pos++);243 return STRING_ITER_GET_AND_ADVANCE(INTERP, attrs->str_val, &attrs->iter); 231 244 } 232 245 233 246 /* … … 243 256 Parrot_StringIterator_attributes * const attrs = 244 257 PARROT_STRINGITERATOR(SELF); 245 258 PMC *ret; 259 STRING * str; 260 const String_iter old_iter = attrs->iter; 246 261 247 if ( !STATICSELF.get_bool())262 if (attrs->iter.charpos <= 0) 248 263 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 249 264 "StopIteration"); 250 265 251 ret = pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String)); 252 VTABLE_set_string_native(INTERP, ret, 253 VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos)); 266 ret = pmc_new(INTERP, Parrot_get_ctx_HLL_type(INTERP, enum_class_String)); 267 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, -1); 268 str = Parrot_str_iter_substr(INTERP, attrs->str_val, &attrs->iter, &old_iter); 269 VTABLE_set_string_native(INTERP, ret, str); 254 270 return ret; 255 271 } 256 272 … … 266 282 VTABLE STRING *pop_string() { 267 283 Parrot_StringIterator_attributes * const attrs = 268 284 PARROT_STRINGITERATOR(SELF); 285 const String_iter old_iter = attrs->iter; 269 286 270 if ( !STATICSELF.get_bool())287 if (attrs->iter.charpos <= 0) 271 288 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 272 289 "StopIteration"); 273 290 274 return VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos); 291 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, -1); 292 return Parrot_str_iter_substr(INTERP, attrs->str_val, &attrs->iter, &old_iter); 275 293 } 276 294 277 295 /* … … 287 305 Parrot_StringIterator_attributes * const attrs = 288 306 PARROT_STRINGITERATOR(SELF); 289 307 290 if ( !STATICSELF.get_bool())308 if (attrs->iter.charpos <= 0) 291 309 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 292 310 "StopIteration"); 293 311 294 return VTABLE_get_integer_keyed_int(INTERP, attrs->string, --attrs->pos); 312 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, -1); 313 return STRING_ITER_GET(INTERP, attrs->str_val, &attrs->iter, 0); 295 314 } 296 315 297 316 /* … … 305 324 */ 306 325 307 326 VTABLE INTVAL get_integer_keyed_int(INTVAL idx) { 308 return VTABLE_get_integer_keyed_int(INTERP, STATICSELF.get_pmc(), 309 PARROT_STRINGITERATOR(SELF)->pos + idx); 327 Parrot_StringIterator_attributes * const attrs = 328 PARROT_STRINGITERATOR(SELF); 329 const UINTVAL offset = attrs->iter.charpos + idx; 330 331 if (offset >= attrs->str_val->strlen) 332 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 333 "StopIteration"); 334 335 return STRING_ITER_GET(INTERP, attrs->str_val, &attrs->iter, idx); 310 336 } 311 337 312 338 /* … … 320 346 */ 321 347 322 348 VTABLE STRING *get_string_keyed_int(INTVAL idx) { 323 return VTABLE_get_string_keyed_int(INTERP, STATICSELF.get_pmc(), 324 PARROT_STRINGITERATOR(SELF)->pos + idx); 349 Parrot_StringIterator_attributes * const attrs = 350 PARROT_STRINGITERATOR(SELF); 351 const UINTVAL offset = attrs->iter.charpos + idx; 352 String_iter iter, next_iter; 353 354 if (offset >= attrs->str_val->strlen) 355 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 356 "StopIteration"); 357 358 iter = attrs->iter; 359 if (idx != 0) 360 STRING_ITER_SKIP(INTERP, attrs->str_val, &iter, idx); 361 next_iter = iter; 362 STRING_ITER_SKIP(INTERP, attrs->str_val, &next_iter, 1); 363 364 return Parrot_str_iter_substr(INTERP, attrs->str_val, &iter, &next_iter); 325 365 } 326 366 } 327 367 -
src/string/api.c
diff --git a/src/string/api.c b/src/string/api.c index 3c4e618..042beda 100644
a b 1270 1270 } 1271 1271 } 1272 1272 1273 /* 1274 1275 =item C<STRING * Parrot_str_iter_substr(PARROT_INTERP, STRING *str, const 1276 String_iter *l, const String_iter *r)> 1277 1278 Returns the substring between iterators C<l> and C<r>. 1279 1280 =cut 1281 1282 */ 1283 1284 PARROT_EXPORT 1285 PARROT_CANNOT_RETURN_NULL 1286 PARROT_WARN_UNUSED_RESULT 1287 STRING * 1288 Parrot_str_iter_substr(PARROT_INTERP, 1289 ARGMOD(STRING *str), 1290 ARGIN(const String_iter *l), ARGIN(const String_iter *r)) 1291 { 1292 ASSERT_ARGS(Parrot_str_iter_substr) 1293 STRING *dest = Parrot_str_new_COW(interp, str); 1294 1295 dest->strstart = (char *)dest->strstart + l->bytepos; 1296 dest->bufused = r->bytepos - l->bytepos; 1297 dest->strlen = r->charpos - l->charpos; 1298 dest->hashval = 0; 1299 1300 return dest; 1301 } 1302 1273 1303 1274 1304 /* 1275 1305 … … 1364 1394 } 1365 1395 1366 1396 /* get byte position of the part that will be replaced */ 1367 ENCODING_ITER_INIT(interp, src, &iter);1397 STRING_ITER_INIT(interp, &iter); 1368 1398 1369 iter.set_position(interp, &iter, true_offset);1399 STRING_ITER_SET_POSITION(interp, src, &iter, true_offset); 1370 1400 start_byte = iter.bytepos; 1371 1401 1372 iter.set_position(interp, &iter, true_offset + true_length);1402 STRING_ITER_SET_POSITION(interp, src, &iter, true_offset + true_length); 1373 1403 end_byte = iter.bytepos; 1374 1404 1375 1405 /* not possible.... */ … … 1467 1497 Parrot_str_chopn_inplace(PARROT_INTERP, ARGMOD(STRING *s), INTVAL n) 1468 1498 { 1469 1499 ASSERT_ARGS(Parrot_str_chopn_inplace) 1470 UINTVAL new_length , uchar_size;1500 UINTVAL new_length; 1471 1501 1472 1502 if (n < 0) { 1473 1503 new_length = -n; … … 1488 1518 return; 1489 1519 } 1490 1520 1491 uchar_size = s->bufused / s->strlen;1492 s->strlen = new_length;1493 1494 1521 if (s->encoding == Parrot_fixed_8_encoding_ptr) { 1495 1522 s->bufused = new_length; 1496 1523 } 1497 1524 else if (s->encoding == Parrot_ucs2_encoding_ptr) { 1525 const UINTVAL uchar_size = s->bufused / s->strlen; 1498 1526 s->bufused = new_length * uchar_size; 1499 1527 } 1500 1528 else { 1501 1529 String_iter iter; 1502 1530 1503 ENCODING_ITER_INIT(interp, s, &iter);1504 iter.set_position(interp, &iter, new_length);1531 STRING_ITER_INIT(interp, &iter); 1532 STRING_ITER_SET_POSITION(interp, s, &iter, new_length); 1505 1533 s->bufused = iter.bytepos; 1506 1534 } 1507 1535 1536 s->strlen = new_length; 1537 1508 1538 return; 1509 1539 } 1510 1540 … … 2140 2170 int sign = 1; 2141 2171 INTVAL i = 0; 2142 2172 String_iter iter; 2143 UINTVAL offs;2144 2173 number_parse_state state = parse_start; 2145 2174 2146 ENCODING_ITER_INIT(interp, s, &iter);2175 STRING_ITER_INIT(interp, &iter); 2147 2176 2148 for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {2149 const UINTVAL c = iter.get_and_advance(interp, &iter);2177 while (state != parse_end && iter.charpos < s->strlen) { 2178 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 2150 2179 /* Check for overflow */ 2151 2180 if (c > 255) 2152 2181 break; … … 2231 2260 int d_length = 0; 2232 2261 int check_nan = 0; /* Check for NaN and Inf after main loop */ 2233 2262 String_iter iter; 2234 UINTVAL offs;2235 2263 number_parse_state state = parse_start; 2236 2264 2237 2265 if (!s) 2238 2266 return 0.0; 2239 2267 2240 ENCODING_ITER_INIT(interp, s, &iter);2268 STRING_ITER_INIT(interp, &iter); 2241 2269 2242 2270 /* Handcrafter FSM to read float value */ 2243 for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {2244 const UINTVAL c = iter.get_and_advance(interp, &iter);2271 while (state != parse_end && iter.charpos < s->strlen) { 2272 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 2245 2273 /* Check for overflow */ 2246 2274 if (c > 255) 2247 2275 break; … … 2616 2644 { 2617 2645 ASSERT_ARGS(Parrot_str_to_hashval) 2618 2646 String_iter iter; 2619 UINTVAL offs;2620 2647 size_t hashval = interp->hash_seed; 2621 2648 2622 2649 if (!s) … … 2625 2652 /* ZZZZZ workaround for something not setting up encodings right */ 2626 2653 saneify_string(s); 2627 2654 2628 ENCODING_ITER_INIT(interp, s, &iter);2655 STRING_ITER_INIT(interp, &iter); 2629 2656 2630 for (offs = 0; offs < s->strlen; ++offs) {2631 const UINTVAL c = iter.get_and_advance(interp, &iter);2657 while (iter.charpos < s->strlen) { 2658 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 2632 2659 hashval += hashval << 5; 2633 2660 hashval += c; 2634 2661 } … … 2706 2733 Parrot_fixed_8_encoding_ptr, Parrot_ascii_charset_ptr, 0); 2707 2734 2708 2735 /* more work TODO */ 2709 ENCODING_ITER_INIT(interp, src, &iter);2736 STRING_ITER_INIT(interp, &iter); 2710 2737 dp = (unsigned char *)result->strstart; 2711 2738 2712 2739 for (i = 0; len > 0; --len) { 2713 UINTVAL c = iter.get_and_advance(interp, &iter);2740 UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 2714 2741 if (c < 0x7f) { 2715 2742 /* process ASCII chars */ 2716 2743 if (i >= charlen - 2) { … … 2867 2894 encoding = result->encoding; 2868 2895 } 2869 2896 2870 encoding->iter_init(interp, result, &iter);2897 STRING_ITER_INIT(interp, &iter); 2871 2898 2872 2899 for (offs = d = 0; offs < clength; ++offs) { 2873 2900 r = (Parrot_UInt4)((unsigned char *)result->strstart)[offs]; … … 2890 2917 } 2891 2918 2892 2919 PARROT_ASSERT(d < offs); 2893 iter.set_and_advance(interp, &iter, r);2920 encoding->iter_set_and_advance(interp, result, &iter, r); 2894 2921 ++d; 2895 2922 } 2896 2923 … … 3425 3452 ARGIN_NULLOK(STRING *delim), ARGIN_NULLOK(STRING *str)) 3426 3453 { 3427 3454 ASSERT_ARGS(Parrot_str_split) 3428 PMC *res; 3429 INTVAL slen, dlen, ps, pe; 3455 PMC *res; 3456 STRING *tstr; 3457 UINTVAL slen, dlen, start, len; 3458 String_iter iter; 3430 3459 3431 3460 if (STRING_IS_NULL(delim) || STRING_IS_NULL(str)) 3432 3461 return PMCNULL; … … 3437 3466 if (!slen) 3438 3467 return res; 3439 3468 3469 STRING_ITER_INIT(interp, &iter); 3440 3470 dlen = Parrot_str_byte_length(interp, delim); 3441 3471 3442 3472 if (dlen == 0) { 3443 int i;3444 3473 VTABLE_set_integer_native(interp, res, slen); 3445 3474 3446 for (i = 0; i < slen; ++i) { 3447 STRING * const p = Parrot_str_substr(interp, str, i, 1, NULL, 0); 3448 VTABLE_set_string_keyed_int(interp, res, i, p); 3449 } 3475 do { 3476 const String_iter old_iter = iter; 3450 3477 3451 return res; 3452 } 3478 STRING_ITER_SKIP(interp, str, &iter, 1); 3479 tstr = Parrot_str_iter_substr(interp, str, &old_iter, &iter); 3480 VTABLE_set_string_keyed_int(interp, res, old_iter.charpos, tstr); 3481 } while (iter.charpos < slen); 3453 3482 3454 pe = Parrot_str_find_index(interp, str, delim, 0);3455 3456 if (pe < 0) {3457 VTABLE_push_string(interp, res, str);3458 3483 return res; 3459 3484 } 3460 3485 3461 ps = 0; 3462 3463 while (ps <= slen) { 3464 const int pl = pe - ps; 3465 STRING * const tstr = Parrot_str_substr(interp, str, ps, pl, NULL, 0); 3486 start = iter.bytepos; 3487 len = 0; 3466 3488 3467 VTABLE_push_string(interp, res, tstr); 3468 ps = pe + Parrot_str_byte_length(interp, delim); 3469 3470 if (ps > slen) 3471 break; 3489 do { 3490 UINTVAL end = start; 3491 String_iter delim_iter; 3492 3493 STRING_ITER_INIT(interp, &delim_iter); 3494 3495 while (delim_iter.charpos < dlen && iter.charpos < slen) { 3496 const UINTVAL c1 = STRING_ITER_GET_AND_ADVANCE(interp, str, &iter); 3497 const UINTVAL c2 = STRING_ITER_GET_AND_ADVANCE(interp, delim, &delim_iter); 3498 if (c1 != c2) { 3499 len += delim_iter.charpos; 3500 end = iter.bytepos; 3501 STRING_ITER_INIT(interp, &delim_iter); 3502 } 3503 } 3472 3504 3473 pe = Parrot_str_find_index(interp, str, delim, ps); 3505 if (delim_iter.charpos >= dlen) { 3506 tstr = Parrot_str_new_COW(interp, str); 3507 tstr->strstart = (char *)tstr->strstart + start; 3508 tstr->bufused = end - start; 3509 tstr->strlen = len; 3510 tstr->hashval = 0; 3511 VTABLE_push_string(interp, res, tstr); 3474 3512 3475 if (pe < 0) 3476 pe = slen; 3477 } 3513 start = iter.bytepos; 3514 len = 0; 3515 } 3516 else { 3517 len += delim_iter.charpos; 3518 } 3519 } while (iter.charpos < slen); 3520 3521 tstr = Parrot_str_new_COW(interp, str); 3522 tstr->strstart = (char *)tstr->strstart + start; 3523 tstr->bufused = iter.bytepos - start; 3524 tstr->strlen = len; 3525 tstr->hashval = 0; 3526 VTABLE_push_string(interp, res, tstr); 3478 3527 3479 3528 return res; 3480 3529 } -
src/string/charset/ascii.c
diff --git a/src/string/charset/ascii.c b/src/string/charset/ascii.c index 6618096..90d1eab 100644
a b 263 263 { 264 264 ASSERT_ARGS(to_ascii) 265 265 String_iter iter; 266 UINTVAL offs;267 266 unsigned char *p; 268 267 const UINTVAL len = src->strlen; 269 268 … … 275 274 dest = src; 276 275 } 277 276 p = (unsigned char *)dest->strstart; 278 ENCODING_ITER_INIT(interp, src, &iter);279 for (offs = 0; offs < len; ++offs) {280 const UINTVAL c = iter.get_and_advance(interp, &iter);277 STRING_ITER_INIT(interp, &iter); 278 while (iter.charpos < len) { 279 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 281 280 if (c >= 128) 282 281 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, 283 282 "can't convert unicode string to ascii"); … … 557 556 return ret_val < 0 ? -1 : 1; 558 557 } 559 558 else { 560 UINTVAL offs; 561 ENCODING_ITER_INIT(interp, rhs, &iter); 562 for (offs = 0; offs < min_len; ++offs) { 563 const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, offs); 564 const UINTVAL cr = iter.get_and_advance(interp, &iter); 559 STRING_ITER_INIT(interp, &iter); 560 while (iter.charpos < min_len) { 561 const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, iter.charpos); 562 const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &iter); 565 563 if (cl != cr) 566 564 return cl < cr ? -1 : 1; 567 565 } … … 596 594 { 597 595 ASSERT_ARGS(mixed_cs_index) 598 596 String_iter src_iter, search_iter; 599 UINTVAL len; 600 INTVAL start; 601 602 ENCODING_ITER_INIT(interp, src, &src_iter); 603 src_iter.set_position(interp, &src_iter, offs); 604 ENCODING_ITER_INIT(interp, search, &search_iter); 605 len = search->strlen; 606 607 start = -1; 608 for (; len && offs < src->strlen; ++offs) { 609 const UINTVAL c1 = src_iter.get_and_advance(interp, &src_iter); 610 const UINTVAL c2 = search_iter.get_and_advance(interp, &search_iter); 611 if (c1 == c2) { 612 --len; 613 if (start == -1) 614 start = offs; 615 } 616 else { 617 len = search->strlen; 618 start = -1; 619 search_iter.set_position(interp, &search_iter, 0); 597 const UINTVAL len = search->strlen; 598 UINTVAL start; 599 600 STRING_ITER_INIT(interp, &src_iter); 601 STRING_ITER_SET_POSITION(interp, src, &src_iter, offs); 602 STRING_ITER_INIT(interp, &search_iter); 603 604 start = src_iter.charpos; 605 while (search_iter.charpos < len && src_iter.charpos < src->strlen) { 606 const UINTVAL c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, &src_iter); 607 const UINTVAL c2 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter); 608 if (c1 != c2) { 609 start = src_iter.charpos; 610 STRING_ITER_INIT(interp, &search_iter); 620 611 } 621 612 } 622 if ( len == 0)613 if (search_iter.charpos >= len) 623 614 return start; 624 615 return -1; 625 616 } … … 700 691 validate(PARROT_INTERP, ARGIN(STRING *src)) 701 692 { 702 693 ASSERT_ARGS(validate) 703 UINTVAL offset;694 const UINTVAL len = Parrot_str_byte_length(interp, src); 704 695 String_iter iter; 705 696 706 ENCODING_ITER_INIT(interp, src, &iter);707 for (offset = 0; offset < Parrot_str_byte_length(interp, src); ++offset) {708 const UINTVAL codepoint = iter.get_and_advance(interp, &iter);697 STRING_ITER_INIT(interp, &iter); 698 while (iter.charpos < len) { 699 const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 709 700 if (codepoint >= 0x80) 710 701 return 0; 711 702 } -
src/string/charset/iso-8859-1.c
diff --git a/src/string/charset/iso-8859-1.c b/src/string/charset/iso-8859-1.c index b88c11d..65c663a 100644
a b 215 215 to_iso_8859_1(PARROT_INTERP, ARGIN(STRING *src), ARGMOD_NULLOK(STRING *dest)) 216 216 { 217 217 ASSERT_ARGS(to_iso_8859_1) 218 UINTVAL offs,src_len;218 UINTVAL src_len; 219 219 String_iter iter; 220 220 221 ENCODING_ITER_INIT(interp, src, &iter);221 STRING_ITER_INIT(interp, &iter); 222 222 src_len = src->strlen; 223 223 if (dest) { 224 224 Parrot_gc_reallocate_string_storage(interp, dest, src_len); … … 229 229 dest = src; 230 230 } 231 231 dest->bufused = src_len; 232 dest->charset = Parrot_iso_8859_1_charset_ptr; 233 dest->encoding = Parrot_fixed_8_encoding_ptr; 234 for (offs = 0; offs < src_len; ++offs) { 235 const UINTVAL c = iter.get_and_advance(interp, &iter); 232 while (iter.charpos < src_len) { 233 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 236 234 if (c >= 0x100) 237 235 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, 238 236 "lossy conversion to iso-8559-1"); 239 237 240 ENCODING_SET_BYTE(interp, dest, offs, c);238 Parrot_fixed_8_encoding_ptr->set_byte(interp, dest, iter.charpos - 1, c); 241 239 } 240 dest->charset = Parrot_iso_8859_1_charset_ptr; 241 dest->encoding = Parrot_fixed_8_encoding_ptr; 242 242 return dest; 243 243 } 244 244 … … 258 258 { 259 259 ASSERT_ARGS(to_unicode) 260 260 if (dest) { 261 UINTVAL offs;262 261 String_iter iter; 263 262 264 263 dest->charset = Parrot_unicode_charset_ptr; 265 264 dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest); 266 265 Parrot_gc_reallocate_string_storage(interp, dest, src->strlen); 267 ENCODING_ITER_INIT(interp, dest, &iter);268 for (offs = 0; offs < src->strlen; ++offs) {269 const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);266 STRING_ITER_INIT(interp, &iter); 267 while (iter.charpos < src->strlen) { 268 const UINTVAL c = ENCODING_GET_BYTE(interp, src, iter.charpos); 270 269 271 270 if (iter.bytepos >= Buffer_buflen(dest) - 4) { 272 UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);271 UINTVAL need = (UINTVAL)((src->strlen - iter.charpos) * 1.5); 273 272 if (need < 16) 274 273 need = 16; 275 274 Parrot_gc_reallocate_string_storage(interp, dest, 276 275 Buffer_buflen(dest) + need); 277 276 } 278 iter.set_and_advance(interp, &iter, c);277 STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, c); 279 278 } 280 279 dest->bufused = iter.bytepos; 281 280 dest->strlen = iter.charpos; -
src/string/charset/unicode.c
diff --git a/src/string/charset/unicode.c b/src/string/charset/unicode.c index 77b0893..98f6e84 100644
a b 704 704 { 705 705 ASSERT_ARGS(compare) 706 706 String_iter l_iter, r_iter; 707 UINTVAL offs, cl, cr,min_len, l_len, r_len;707 UINTVAL min_len, l_len, r_len; 708 708 709 709 /* TODO make optimized equal - strings are equal length then already */ 710 ENCODING_ITER_INIT(interp, lhs, &l_iter);711 ENCODING_ITER_INIT(interp, rhs, &r_iter);710 STRING_ITER_INIT(interp, &l_iter); 711 STRING_ITER_INIT(interp, &r_iter); 712 712 713 713 l_len = lhs->strlen; 714 714 r_len = rhs->strlen; 715 715 716 716 min_len = l_len > r_len ? r_len : l_len; 717 717 718 for (offs = 0; offs < min_len; ++offs) {719 cl = l_iter.get_and_advance(interp, &l_iter);720 cr = r_iter.get_and_advance(interp, &r_iter);718 while (l_iter.charpos < min_len) { 719 UINTVAL cl = STRING_ITER_GET_AND_ADVANCE(interp, lhs, &l_iter); 720 UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &r_iter); 721 721 722 722 if (cl != cr) 723 723 return cl < cr ? -1 : 1; … … 769 769 validate(PARROT_INTERP, ARGIN(STRING *src)) 770 770 { 771 771 ASSERT_ARGS(validate) 772 UINTVAL offset;772 UINTVAL len = Parrot_str_byte_length(interp, src); 773 773 String_iter iter; 774 774 775 ENCODING_ITER_INIT(interp, src, &iter);776 for (offset = 0; offset < Parrot_str_byte_length(interp, src); ++offset) {777 const UINTVAL codepoint = iter.get_and_advance(interp, &iter);775 STRING_ITER_INIT(interp, &iter); 776 while (iter.charpos < len) { 777 const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 778 778 /* Check for Unicode non-characters */ 779 779 if (codepoint >= 0xfdd0 780 780 && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe) … … 924 924 ASSERT_ARGS(find_cclass) 925 925 String_iter iter; 926 926 UINTVAL codepoint; 927 UINTVAL pos = offset;928 927 UINTVAL end = offset + count; 929 928 930 ENCODING_ITER_INIT(interp, source_string, &iter); 931 932 iter.set_position(interp, &iter, pos); 929 STRING_ITER_INIT(interp, &iter); 930 STRING_ITER_SET_POSITION(interp, source_string, &iter, offset); 933 931 934 932 end = source_string->strlen < end ? source_string->strlen : end; 935 933 936 for (; pos < end; ++pos) {937 codepoint = iter.get_and_advance(interp, &iter);934 while (iter.charpos < end) { 935 codepoint = STRING_ITER_GET_AND_ADVANCE(interp, source_string, &iter); 938 936 if (codepoint >= 256) { 939 937 if (u_iscclass(interp, codepoint, flags)) 940 return pos;938 return iter.charpos - 1; 941 939 } 942 940 else { 943 941 if (Parrot_iso_8859_1_typetable[codepoint] & flags) 944 return pos;942 return iter.charpos - 1; 945 943 } 946 944 } 947 945 … … 965 963 ASSERT_ARGS(find_not_cclass) 966 964 String_iter iter; 967 965 UINTVAL codepoint; 968 UINTVAL pos = offset;969 966 UINTVAL end = offset + count; 970 967 int bit; 971 968 972 if ( pos> source_string->strlen) {969 if (offset > source_string->strlen) { 973 970 /* XXX: Throw in this case? */ 974 971 return offset + count; 975 972 } 976 973 977 ENCODING_ITER_INIT(interp, source_string, &iter);974 STRING_ITER_INIT(interp, &iter); 978 975 979 if ( pos)980 iter.set_position(interp, &iter, pos);976 if (offset) 977 STRING_ITER_SET_POSITION(interp, source_string, &iter, offset); 981 978 982 979 end = source_string->strlen < end ? source_string->strlen : end; 983 980 984 981 if (flags == enum_cclass_any) 985 982 return end; 986 983 987 for (; pos < end; ++pos) {988 codepoint = iter.get_and_advance(interp, &iter);984 while (iter.charpos < end) { 985 codepoint = STRING_ITER_GET_AND_ADVANCE(interp, source_string, &iter); 989 986 if (codepoint >= 256) { 990 987 for (bit = enum_cclass_uppercase; 991 988 bit <= enum_cclass_word ; bit <<= 1) { 992 989 if ((bit & flags) && !u_iscclass(interp, codepoint, bit)) 993 return pos;990 return iter.charpos - 1; 994 991 } 995 992 } 996 993 else { 997 994 if (!(Parrot_iso_8859_1_typetable[codepoint] & flags)) 998 return pos;995 return iter.charpos - 1; 999 996 } 1000 997 } 1001 998 … … 1023 1020 1024 1021 dest->strlen = 1; 1025 1022 1026 ENCODING_ITER_INIT(interp, dest, &iter);1027 iter.set_and_advance(interp, &iter, codepoint);1023 STRING_ITER_INIT(interp, &iter); 1024 STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, codepoint); 1028 1025 dest->bufused = iter.bytepos; 1029 1026 1030 1027 return dest; … … 1047 1044 { 1048 1045 ASSERT_ARGS(compute_hash) 1049 1046 String_iter iter; 1050 UINTVAL offs;1051 1047 size_t hashval = seed; 1052 1048 1053 ENCODING_ITER_INIT(interp, src, &iter);1049 STRING_ITER_INIT(interp, &iter); 1054 1050 1055 for (offs = 0; offs < src->strlen; ++offs) {1056 const UINTVAL c = iter.get_and_advance(interp, &iter);1051 while (iter.charpos < src->strlen) { 1052 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 1057 1053 hashval += hashval << 5; 1058 1054 hashval += c; 1059 1055 } -
src/string/encoding/fixed_8.c
diff --git a/src/string/encoding/fixed_8.c b/src/string/encoding/fixed_8.c index dd41129..712479d 100644
a b 50 50 __attribute__nonnull__(2) 51 51 FUNC_MODIFIES(*iter); 52 52 53 static UINTVAL fixed8_iter_get(PARROT_INTERP, 54 ARGIN(const STRING *str), 55 ARGIN(const String_iter *iter), 56 INTVAL offset) 57 __attribute__nonnull__(1) 58 __attribute__nonnull__(2) 59 __attribute__nonnull__(3); 60 61 static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, 62 ARGIN(const STRING *str), 63 ARGMOD(String_iter *iter)) 64 __attribute__nonnull__(1) 65 __attribute__nonnull__(2) 66 __attribute__nonnull__(3) 67 FUNC_MODIFIES(*iter); 68 69 static void fixed8_iter_set_and_advance(PARROT_INTERP, 70 ARGMOD(STRING *str), 71 ARGMOD(String_iter *iter), 72 UINTVAL c) 73 __attribute__nonnull__(1) 74 __attribute__nonnull__(2) 75 __attribute__nonnull__(3) 76 FUNC_MODIFIES(*str) 77 FUNC_MODIFIES(*iter); 78 79 static void fixed8_iter_set_position(SHIM_INTERP, 80 ARGIN(const STRING *str), 81 ARGMOD(String_iter *iter), 82 UINTVAL pos) 83 __attribute__nonnull__(2) 84 __attribute__nonnull__(3) 85 FUNC_MODIFIES(*iter); 86 87 static void fixed8_iter_skip(SHIM_INTERP, 88 ARGIN(const STRING *str), 89 ARGMOD(String_iter *iter), 90 INTVAL skip) 91 __attribute__nonnull__(2) 92 __attribute__nonnull__(3) 93 FUNC_MODIFIES(*iter); 94 53 95 static void fixed8_set_next(PARROT_INTERP, 54 96 ARGMOD(String_iter *iter), 55 97 UINTVAL c) … … 181 223 #define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 182 224 PARROT_ASSERT_ARG(interp) \ 183 225 , PARROT_ASSERT_ARG(iter)) 226 #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 227 PARROT_ASSERT_ARG(interp) \ 228 , PARROT_ASSERT_ARG(str) \ 229 , PARROT_ASSERT_ARG(iter)) 230 #define ASSERT_ARGS_fixed8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 231 PARROT_ASSERT_ARG(interp) \ 232 , PARROT_ASSERT_ARG(str) \ 233 , PARROT_ASSERT_ARG(iter)) 234 #define ASSERT_ARGS_fixed8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 235 PARROT_ASSERT_ARG(interp) \ 236 , PARROT_ASSERT_ARG(str) \ 237 , PARROT_ASSERT_ARG(iter)) 238 #define ASSERT_ARGS_fixed8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 239 PARROT_ASSERT_ARG(str) \ 240 , PARROT_ASSERT_ARG(iter)) 241 #define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 242 PARROT_ASSERT_ARG(str) \ 243 , PARROT_ASSERT_ARG(iter)) 184 244 #define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 185 245 PARROT_ASSERT_ARG(interp) \ 186 246 , PARROT_ASSERT_ARG(iter)) … … 581 641 582 642 /* 583 643 644 =item C<static UINTVAL fixed8_iter_get(PARROT_INTERP, const STRING *str, const 645 String_iter *iter, INTVAL offset)> 646 647 Get the character at C<iter> plus C<offset>. 648 649 =cut 650 651 */ 652 653 static UINTVAL 654 fixed8_iter_get(PARROT_INTERP, 655 ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset) 656 { 657 ASSERT_ARGS(fixed8_iter_get) 658 return get_byte(interp, str, iter->charpos + offset); 659 } 660 661 /* 662 663 =item C<static void fixed8_iter_skip(PARROT_INTERP, const STRING *str, 664 String_iter *iter, INTVAL skip)> 665 666 Moves the string iterator C<i> by C<skip> characters. 667 668 =cut 669 670 */ 671 672 static void 673 fixed8_iter_skip(SHIM_INTERP, 674 ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip) 675 { 676 ASSERT_ARGS(fixed8_iter_skip) 677 iter->bytepos += skip; 678 iter->charpos += skip; 679 PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str)); 680 } 681 682 /* 683 684 =item C<static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, const STRING 685 *str, String_iter *iter)> 686 687 Moves the string iterator C<i> to the next codepoint. 688 689 =cut 690 691 */ 692 693 static UINTVAL 694 fixed8_iter_get_and_advance(PARROT_INTERP, 695 ARGIN(const STRING *str), ARGMOD(String_iter *iter)) 696 { 697 ASSERT_ARGS(fixed8_iter_get_and_advance) 698 const UINTVAL c = get_byte(interp, str, iter->charpos++); 699 iter->bytepos++; 700 return c; 701 } 702 703 /* 704 705 =item C<static void fixed8_iter_set_and_advance(PARROT_INTERP, STRING *str, 706 String_iter *iter, UINTVAL c)> 707 708 With the string iterator C<i>, appends the codepoint C<c> and advances to the 709 next position in the string. 710 711 =cut 712 713 */ 714 715 static void 716 fixed8_iter_set_and_advance(PARROT_INTERP, 717 ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c) 718 { 719 ASSERT_ARGS(fixed8_iter_set_and_advance) 720 set_byte(interp, str, iter->charpos++, c); 721 iter->bytepos++; 722 } 723 724 /* 725 726 =item C<static void fixed8_iter_set_position(PARROT_INTERP, const STRING *str, 727 String_iter *iter, UINTVAL pos)> 728 729 Moves the string iterator C<i> to the position C<n> in the string. 730 731 =cut 732 733 */ 734 735 static void 736 fixed8_iter_set_position(SHIM_INTERP, 737 ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos) 738 { 739 ASSERT_ARGS(fixed8_iter_set_position) 740 iter->bytepos = iter->charpos = pos; 741 PARROT_ASSERT(pos <= Buffer_buflen(str)); 742 } 743 744 /* 745 584 746 =item C<static UINTVAL fixed8_get_next(PARROT_INTERP, String_iter *iter)> 585 747 586 748 Moves the string iterator C<i> to the next codepoint. … … 695 857 codepoints, 696 858 bytes, 697 859 iter_init, 698 find_cclass 860 find_cclass, 861 fixed8_iter_get, 862 fixed8_iter_skip, 863 fixed8_iter_get_and_advance, 864 fixed8_iter_set_and_advance, 865 fixed8_iter_set_position 699 866 700 867 }; 701 868 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); -
src/string/encoding/ucs2.c
diff --git a/src/string/encoding/ucs2.c b/src/string/encoding/ucs2.c index 71ef8b1..6a7459c 100644
a b 164 164 __attribute__nonnull__(2) 165 165 FUNC_MODIFIES(*i); 166 166 167 static UINTVAL ucs2_iter_get(PARROT_INTERP, 168 ARGIN(const STRING *str), 169 ARGIN(const String_iter *i), 170 INTVAL offset) 171 __attribute__nonnull__(1) 172 __attribute__nonnull__(2) 173 __attribute__nonnull__(3); 174 175 static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP, 176 ARGIN(const STRING *str), 177 ARGMOD(String_iter *i)) 178 __attribute__nonnull__(1) 179 __attribute__nonnull__(2) 180 __attribute__nonnull__(3) 181 FUNC_MODIFIES(*i); 182 183 static void ucs2_iter_set_and_advance(PARROT_INTERP, 184 ARGMOD(STRING *str), 185 ARGMOD(String_iter *i), 186 UINTVAL c) 187 __attribute__nonnull__(1) 188 __attribute__nonnull__(2) 189 __attribute__nonnull__(3) 190 FUNC_MODIFIES(*str) 191 FUNC_MODIFIES(*i); 192 193 static void ucs2_iter_set_position(SHIM_INTERP, 194 ARGIN(const STRING *str), 195 ARGMOD(String_iter *i), 196 UINTVAL n) 197 __attribute__nonnull__(2) 198 __attribute__nonnull__(3) 199 FUNC_MODIFIES(*i); 200 201 static void ucs2_iter_skip(SHIM_INTERP, 202 ARGIN(const STRING *str), 203 ARGMOD(String_iter *i), 204 INTVAL skip) 205 __attribute__nonnull__(2) 206 __attribute__nonnull__(3) 207 FUNC_MODIFIES(*i); 208 167 209 static void ucs2_set_position(SHIM_INTERP, 168 210 ARGMOD(String_iter *i), 169 211 UINTVAL n) … … 219 261 #define ASSERT_ARGS_ucs2_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 220 262 PARROT_ASSERT_ARG(interp) \ 221 263 , PARROT_ASSERT_ARG(i)) 264 #define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 265 PARROT_ASSERT_ARG(interp) \ 266 , PARROT_ASSERT_ARG(str) \ 267 , PARROT_ASSERT_ARG(i)) 268 #define ASSERT_ARGS_ucs2_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 269 PARROT_ASSERT_ARG(interp) \ 270 , PARROT_ASSERT_ARG(str) \ 271 , PARROT_ASSERT_ARG(i)) 272 #define ASSERT_ARGS_ucs2_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 273 PARROT_ASSERT_ARG(interp) \ 274 , PARROT_ASSERT_ARG(str) \ 275 , PARROT_ASSERT_ARG(i)) 276 #define ASSERT_ARGS_ucs2_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 277 PARROT_ASSERT_ARG(str) \ 278 , PARROT_ASSERT_ARG(i)) 279 #define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 280 PARROT_ASSERT_ARG(str) \ 281 , PARROT_ASSERT_ARG(i)) 222 282 #define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 223 283 PARROT_ASSERT_ARG(i)) 224 284 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ … … 397 457 String_iter iter; 398 458 UINTVAL start; 399 459 400 iter_init(interp, src, &iter);401 iter.set_position(interp, &iter, offset);460 STRING_ITER_INIT(interp, &iter); 461 ucs2_iter_set_position(interp, src, &iter, offset); 402 462 start = iter.bytepos; 403 463 return_string->strstart = (char *)return_string->strstart + start; 404 iter.set_position(interp, &iter, offset + count);464 ucs2_iter_set_position(interp, src, &iter, offset + count); 405 465 return_string->bufused = iter.bytepos - start; 406 466 } 407 467 #endif … … 576 636 577 637 /* 578 638 639 =item C<static UINTVAL ucs2_iter_get(PARROT_INTERP, const STRING *str, const 640 String_iter *i, INTVAL offset)> 641 642 Get the character at C<i> + C<offset>. 643 644 =cut 645 646 */ 647 648 static UINTVAL 649 ucs2_iter_get(PARROT_INTERP, 650 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 651 { 652 ASSERT_ARGS(ucs2_iter_get) 653 return get_codepoint(interp, str, i->charpos + offset); 654 } 655 656 /* 657 658 =item C<static void ucs2_iter_skip(PARROT_INTERP, const STRING *str, String_iter 659 *i, INTVAL skip)> 660 661 Moves the string iterator C<i> by C<skip> characters. 662 663 =cut 664 665 */ 666 667 static void 668 ucs2_iter_skip(SHIM_INTERP, 669 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 670 { 671 ASSERT_ARGS(ucs2_iter_skip) 672 673 #if PARROT_HAS_ICU 674 i->charpos += skip; 675 i->bytepos += skip * sizeof (UChar); 676 #else 677 /* This function must never be called if compiled without ICU. 678 * See TT #557 679 */ 680 PARROT_ASSERT(0); 681 #endif 682 } 683 684 /* 685 686 =item C<static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP, const STRING 687 *str, String_iter *i)> 688 689 Moves the string iterator C<i> to the next UCS-2 codepoint. 690 691 =cut 692 693 */ 694 695 static UINTVAL 696 ucs2_iter_get_and_advance(PARROT_INTERP, 697 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 698 { 699 ASSERT_ARGS(ucs2_iter_get_and_advance) 700 701 #if PARROT_HAS_ICU 702 UChar * const s = (UChar*) str->strstart; 703 size_t pos = i->bytepos / sizeof (UChar); 704 705 /* TODO either make sure that we don't go past end or use SAFE 706 * iter versions 707 */ 708 const UChar c = s[pos++]; 709 i->charpos++; 710 i->bytepos = pos * sizeof (UChar); 711 return c; 712 #else 713 /* This function must never be called if compiled without ICU. 714 * See TT #557 715 */ 716 PARROT_ASSERT(0); 717 return (UINTVAL)0; /* Stop the static analyzers from panicing */ 718 #endif 719 } 720 721 /* 722 723 =item C<static void ucs2_iter_set_and_advance(PARROT_INTERP, STRING *str, 724 String_iter *i, UINTVAL c)> 725 726 With the string iterator C<i>, appends the codepoint C<c> and advances to the 727 next position in the string. 728 729 =cut 730 731 */ 732 733 static void 734 ucs2_iter_set_and_advance(PARROT_INTERP, 735 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 736 { 737 ASSERT_ARGS(ucs2_iter_set_and_advance) 738 739 #if PARROT_HAS_ICU 740 UChar * const s = (UChar*) str->strstart; 741 UINTVAL pos = i->bytepos / sizeof (UChar); 742 s[pos++] = (UChar)c; 743 i->charpos++; 744 i->bytepos = pos * sizeof (UChar); 745 #else 746 /* This function must never be called if compiled without ICU. 747 * See TT #557 748 */ 749 PARROT_ASSERT(0); 750 #endif 751 } 752 753 /* 754 755 =item C<static void ucs2_iter_set_position(PARROT_INTERP, const STRING *str, 756 String_iter *i, UINTVAL n)> 757 758 Moves the string iterator C<i> to the position C<n> in the string. 759 760 =cut 761 762 */ 763 764 static void 765 ucs2_iter_set_position(SHIM_INTERP, 766 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n) 767 { 768 ASSERT_ARGS(ucs2_iter_set_position) 769 770 #if PARROT_HAS_ICU 771 i->charpos = n; 772 i->bytepos = n * sizeof (UChar); 773 #else 774 /* This function must never be called if compiled without ICU. 775 * See TT #557 776 */ 777 PARROT_ASSERT(0); 778 #endif 779 } 780 781 /* 782 579 783 =item C<static UINTVAL ucs2_decode_and_advance(PARROT_INTERP, String_iter *i)> 580 784 581 785 Moves the string iterator C<i> to the next UCS-2 codepoint. … … 729 933 codepoints, 730 934 bytes, 731 935 iter_init, 732 find_cclass 936 find_cclass, 937 ucs2_iter_get, 938 ucs2_iter_skip, 939 ucs2_iter_get_and_advance, 940 ucs2_iter_set_and_advance, 941 ucs2_iter_set_position 733 942 }; 734 943 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 735 944 Parrot_register_encoding(interp, "ucs2", return_encoding); -
src/string/encoding/utf16.c
diff --git a/src/string/encoding/utf16.c b/src/string/encoding/utf16.c index f94e6e8..4d810a7 100644
a b 161 161 __attribute__nonnull__(2) 162 162 FUNC_MODIFIES(*i); 163 163 164 static UINTVAL utf16_iter_get(PARROT_INTERP, 165 ARGIN(const STRING *str), 166 ARGIN(const String_iter *i), 167 INTVAL offset) 168 __attribute__nonnull__(1) 169 __attribute__nonnull__(2) 170 __attribute__nonnull__(3); 171 172 PARROT_WARN_UNUSED_RESULT 173 static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP, 174 ARGIN(const STRING *str), 175 ARGMOD(String_iter *i)) 176 __attribute__nonnull__(1) 177 __attribute__nonnull__(2) 178 __attribute__nonnull__(3) 179 FUNC_MODIFIES(*i); 180 181 static void utf16_iter_set_and_advance(PARROT_INTERP, 182 ARGMOD(STRING *str), 183 ARGMOD(String_iter *i), 184 UINTVAL c) 185 __attribute__nonnull__(1) 186 __attribute__nonnull__(2) 187 __attribute__nonnull__(3) 188 FUNC_MODIFIES(*str) 189 FUNC_MODIFIES(*i); 190 191 static void utf16_iter_set_position(PARROT_INTERP, 192 ARGIN(const STRING *str), 193 ARGMOD(String_iter *i), 194 UINTVAL n) 195 __attribute__nonnull__(1) 196 __attribute__nonnull__(2) 197 __attribute__nonnull__(3) 198 FUNC_MODIFIES(*i); 199 200 static void utf16_iter_skip(PARROT_INTERP, 201 ARGIN(const STRING *str), 202 ARGMOD(String_iter *i), 203 INTVAL skip) 204 __attribute__nonnull__(1) 205 __attribute__nonnull__(2) 206 __attribute__nonnull__(3) 207 FUNC_MODIFIES(*i); 208 164 209 static void utf16_set_position(PARROT_INTERP, 165 210 ARGMOD(String_iter *i), 166 211 UINTVAL n) … … 223 268 #define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 224 269 PARROT_ASSERT_ARG(interp) \ 225 270 , PARROT_ASSERT_ARG(i)) 271 #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 272 PARROT_ASSERT_ARG(interp) \ 273 , PARROT_ASSERT_ARG(str) \ 274 , PARROT_ASSERT_ARG(i)) 275 #define ASSERT_ARGS_utf16_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 276 PARROT_ASSERT_ARG(interp) \ 277 , PARROT_ASSERT_ARG(str) \ 278 , PARROT_ASSERT_ARG(i)) 279 #define ASSERT_ARGS_utf16_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 280 PARROT_ASSERT_ARG(interp) \ 281 , PARROT_ASSERT_ARG(str) \ 282 , PARROT_ASSERT_ARG(i)) 283 #define ASSERT_ARGS_utf16_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 284 PARROT_ASSERT_ARG(interp) \ 285 , PARROT_ASSERT_ARG(str) \ 286 , PARROT_ASSERT_ARG(i)) 287 #define ASSERT_ARGS_utf16_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 288 PARROT_ASSERT_ARG(interp) \ 289 , PARROT_ASSERT_ARG(str) \ 290 , PARROT_ASSERT_ARG(i)) 226 291 #define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 227 292 PARROT_ASSERT_ARG(interp) \ 228 293 , PARROT_ASSERT_ARG(i)) … … 498 563 UINTVAL start; 499 564 STRING * const return_string = Parrot_str_new_COW(interp, src); 500 565 501 iter_init(interp, src, &iter);502 iter.set_position(interp, &iter, offset);566 STRING_ITER_INIT(interp, &iter); 567 utf16_iter_set_position(interp, src, &iter, offset); 503 568 start = iter.bytepos; 504 569 return_string->strstart = (char *)return_string->strstart + start ; 505 iter.set_position(interp, &iter, offset +count);570 utf16_iter_skip(interp, src, &iter, count); 506 571 return_string->bufused = iter.bytepos - start; 507 572 return_string->strlen = count; 508 573 return_string->hashval = 0; … … 532 597 String_iter iter; 533 598 UINTVAL start; 534 599 Parrot_str_reuse_COW(interp, src, return_string); 535 iter_init(interp, src, &iter);536 iter.set_position(interp, &iter, offset);600 STRING_ITER_INIT(interp, &iter); 601 utf16_iter_set_position(interp, src, &iter, offset); 537 602 start = iter.bytepos; 538 603 return_string->strstart = (char *)return_string->strstart + start ; 539 iter.set_position(interp, &iter, offset +count);604 utf16_iter_skip(interp, src, &iter, count); 540 605 return_string->bufused = iter.bytepos - start; 541 606 return_string->strlen = count; 542 607 return_string->hashval = 0; … … 675 740 codepoints(PARROT_INTERP, ARGIN(STRING *src)) 676 741 { 677 742 ASSERT_ARGS(codepoints) 678 String_iter iter; 743 #if PARROT_HAS_ICU 744 UChar *s = (UChar*) src->strstart; 745 UINTVAL pos = 0; 679 746 /* 680 747 * this is used to initially calculate src->strlen, 681 748 * therefore we must scan the whole string 682 749 */ 683 iter_init(interp, src, &iter); 684 while (iter.bytepos < src->bufused) 685 iter.get_and_advance(interp, &iter); 686 return iter.charpos; 750 while (pos * sizeof(UChar) < src->bufused) { 751 U16_FWD_1_UNSAFE(s, pos); 752 } 753 return pos * sizeof(UChar); 754 #else 755 UNUSED(src); 756 757 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 758 "no ICU lib loaded"); 759 #endif 687 760 } 688 761 689 762 /* … … 704 777 return src->bufused; 705 778 } 706 779 780 /* 781 782 =item C<static UINTVAL utf16_iter_get(PARROT_INTERP, const STRING *str, const 783 String_iter *i, INTVAL offset)> 784 785 Get the character at C<i> plus C<offset>. 786 787 =cut 788 789 */ 790 791 static UINTVAL 792 utf16_iter_get(PARROT_INTERP, 793 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 794 { 795 ASSERT_ARGS(utf16_iter_get) 796 #if PARROT_HAS_ICU 797 UChar *s = (UChar*) str->strstart; 798 UINTVAL c, pos; 799 800 pos = i->bytepos / sizeof (UChar); 801 if (offset > 0) { 802 U16_FWD_N_UNSAFE(s, pos, offset); 803 } 804 else if (offset < 0) { 805 U16_BACK_N_UNSAFE(s, pos, -offset); 806 } 807 U16_GET_UNSAFE(s, pos, c); 808 809 return c; 810 #else 811 UNUSED(str); 812 UNUSED(i); 813 UNUSED(offset); 814 815 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 816 "no ICU lib loaded"); 817 #endif 818 } 819 820 /* 821 822 =item C<static void utf16_iter_skip(PARROT_INTERP, const STRING *str, 823 String_iter *i, INTVAL skip)> 824 825 Moves the string iterator C<i> by C<skip> characters. 826 827 =cut 828 829 */ 830 831 static void 832 utf16_iter_skip(PARROT_INTERP, 833 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 834 { 835 ASSERT_ARGS(utf16_iter_skip) 836 #if PARROT_HAS_ICU 837 UChar * const s = (UChar*) str->strstart; 838 UINTVAL pos = i->bytepos / sizeof (UChar); 839 840 if (skip > 0) { 841 U16_FWD_N_UNSAFE(s, pos, skip); 842 } 843 else if (skip < 0) { 844 U16_BACK_N_UNSAFE(s, pos, -skip); 845 } 846 847 i->charpos += skip; 848 i->bytepos = pos * sizeof (UChar); 849 #else 850 UNUSED(str); 851 UNUSED(i); 852 UNUSED(skip); 853 854 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 855 "no ICU lib loaded"); 856 #endif 857 } 858 859 /* 860 861 =item C<static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP, const STRING 862 *str, String_iter *i)> 863 864 Moves the string iterator C<i> to the next UTF-16 codepoint. 865 866 =cut 867 868 */ 869 870 PARROT_WARN_UNUSED_RESULT 871 static UINTVAL 872 utf16_iter_get_and_advance(PARROT_INTERP, 873 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 874 { 875 ASSERT_ARGS(utf16_iter_get_and_advance) 876 #if PARROT_HAS_ICU 877 UChar *s = (UChar*) str->strstart; 878 UINTVAL c, pos; 879 pos = i->bytepos / sizeof (UChar); 880 /* TODO either make sure that we don't go past end or use SAFE 881 * iter versions 882 */ 883 U16_NEXT_UNSAFE(s, pos, c); 884 i->charpos++; 885 i->bytepos = pos * sizeof (UChar); 886 return c; 887 #else 888 UNUSED(str); 889 UNUSED(i); 890 891 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 892 "no ICU lib loaded"); 893 #endif 894 } 895 896 /* 897 898 =item C<static void utf16_iter_set_and_advance(PARROT_INTERP, STRING *str, 899 String_iter *i, UINTVAL c)> 900 901 With the string iterator C<i>, appends the codepoint C<c> and advances to the 902 next position in the string. 903 904 =cut 905 906 */ 907 908 static void 909 utf16_iter_set_and_advance(PARROT_INTERP, 910 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 911 { 912 ASSERT_ARGS(utf16_iter_set_and_advance) 913 #if PARROT_HAS_ICU 914 UChar *s = (UChar*) str->strstart; 915 UINTVAL pos; 916 pos = i->bytepos / sizeof (UChar); 917 U16_APPEND_UNSAFE(s, pos, c); 918 i->charpos++; 919 i->bytepos = pos * sizeof (UChar); 920 #else 921 UNUSED(str); 922 UNUSED(i); 923 UNUSED(c); 924 925 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 926 "no ICU lib loaded"); 927 #endif 928 } 929 930 /* 931 932 =item C<static void utf16_iter_set_position(PARROT_INTERP, const STRING *str, 933 String_iter *i, UINTVAL n)> 934 935 Moves the string iterator C<i> to the position C<n> in the string. 936 937 =cut 938 939 */ 940 941 static void 942 utf16_iter_set_position(PARROT_INTERP, 943 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n) 944 { 945 ASSERT_ARGS(utf16_iter_set_position) 946 #if PARROT_HAS_ICU 947 UChar * const s = (UChar*) str->strstart; 948 UINTVAL pos; 949 pos = 0; 950 U16_FWD_N_UNSAFE(s, pos, n); 951 i->charpos = n; 952 i->bytepos = pos * sizeof (UChar); 953 #else 954 UNUSED(str); 955 UNUSED(i); 956 UNUSED(n); 957 958 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 959 "no ICU lib loaded"); 960 #endif 961 } 962 707 963 #if PARROT_HAS_ICU 708 964 /* 709 965 … … 843 1099 codepoints, 844 1100 bytes, 845 1101 iter_init, 846 find_cclass 1102 find_cclass, 1103 utf16_iter_get, 1104 utf16_iter_skip, 1105 utf16_iter_get_and_advance, 1106 utf16_iter_set_and_advance, 1107 utf16_iter_set_position 847 1108 }; 848 1109 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 849 1110 Parrot_register_encoding(interp, "utf16", return_encoding); -
src/string/encoding/utf8.c
diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index 1da82fa..4706596 100644
a b 170 170 __attribute__nonnull__(2) 171 171 FUNC_MODIFIES(*i); 172 172 173 static UINTVAL utf8_iter_get(PARROT_INTERP, 174 ARGIN(const STRING *str), 175 ARGIN(const String_iter *i), 176 INTVAL offset) 177 __attribute__nonnull__(1) 178 __attribute__nonnull__(2) 179 __attribute__nonnull__(3); 180 181 static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, 182 ARGIN(const STRING *str), 183 ARGMOD(String_iter *i)) 184 __attribute__nonnull__(1) 185 __attribute__nonnull__(2) 186 __attribute__nonnull__(3) 187 FUNC_MODIFIES(*i); 188 189 static void utf8_iter_set_and_advance(PARROT_INTERP, 190 ARGMOD(STRING *str), 191 ARGMOD(String_iter *i), 192 UINTVAL c) 193 __attribute__nonnull__(1) 194 __attribute__nonnull__(2) 195 __attribute__nonnull__(3) 196 FUNC_MODIFIES(*str) 197 FUNC_MODIFIES(*i); 198 199 static void utf8_iter_set_position(SHIM_INTERP, 200 ARGIN(const STRING *str), 201 ARGMOD(String_iter *i), 202 UINTVAL pos) 203 __attribute__nonnull__(2) 204 __attribute__nonnull__(3) 205 FUNC_MODIFIES(*i); 206 207 static void utf8_iter_skip(SHIM_INTERP, 208 ARGIN(const STRING *str), 209 ARGMOD(String_iter *i), 210 INTVAL skip) 211 __attribute__nonnull__(2) 212 __attribute__nonnull__(3) 213 FUNC_MODIFIES(*i); 214 173 215 static void utf8_set_position(SHIM_INTERP, 174 216 ARGMOD(String_iter *i), 175 217 UINTVAL pos) … … 244 286 #define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 245 287 PARROT_ASSERT_ARG(interp) \ 246 288 , PARROT_ASSERT_ARG(i)) 289 #define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 290 PARROT_ASSERT_ARG(interp) \ 291 , PARROT_ASSERT_ARG(str) \ 292 , PARROT_ASSERT_ARG(i)) 293 #define ASSERT_ARGS_utf8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 294 PARROT_ASSERT_ARG(interp) \ 295 , PARROT_ASSERT_ARG(str) \ 296 , PARROT_ASSERT_ARG(i)) 297 #define ASSERT_ARGS_utf8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 298 PARROT_ASSERT_ARG(interp) \ 299 , PARROT_ASSERT_ARG(str) \ 300 , PARROT_ASSERT_ARG(i)) 301 #define ASSERT_ARGS_utf8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 302 PARROT_ASSERT_ARG(str) \ 303 , PARROT_ASSERT_ARG(i)) 304 #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 305 PARROT_ASSERT_ARG(str) \ 306 , PARROT_ASSERT_ARG(i)) 247 307 #define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 248 308 PARROT_ASSERT_ARG(i)) 249 309 #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 456 516 457 517 /* 458 518 519 =item C<static UINTVAL utf8_iter_get(PARROT_INTERP, const STRING *str, const 520 String_iter *i, INTVAL offset)> 521 522 Get the character at C<i> plus C<offset>. 523 524 =cut 525 526 */ 527 528 static UINTVAL 529 utf8_iter_get(PARROT_INTERP, 530 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 531 { 532 ASSERT_ARGS(utf8_iter_get) 533 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 534 535 if (offset > 0) { 536 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, offset); 537 } 538 else if (offset < 0) { 539 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, offset); 540 } 541 542 return utf8_decode(interp, u8ptr); 543 } 544 545 /* 546 547 =item C<static void utf8_iter_skip(PARROT_INTERP, const STRING *str, String_iter 548 *i, INTVAL skip)> 549 550 Moves the string iterator C<i> by C<skip> characters. 551 552 =cut 553 554 */ 555 556 static void 557 utf8_iter_skip(SHIM_INTERP, 558 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 559 { 560 ASSERT_ARGS(utf8_iter_skip) 561 const utf8_t *u8ptr = (const utf8_t *)str->strstart; 562 563 if (skip > 0) { 564 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, skip); 565 } 566 else if (skip < 0) { 567 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -skip); 568 } 569 570 i->charpos += skip; 571 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 572 } 573 574 /* 575 576 =item C<static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, const STRING 577 *str, String_iter *i)> 578 579 The UTF-8 implementation of the string iterator's C<get_and_advance> 580 function. 581 582 =cut 583 584 */ 585 586 static UINTVAL 587 utf8_iter_get_and_advance(PARROT_INTERP, 588 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 589 { 590 ASSERT_ARGS(utf8_iter_get_and_advance) 591 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 592 UINTVAL c = *u8ptr; 593 594 if (UTF8_IS_START(c)) { 595 UINTVAL len = UTF8SKIP(u8ptr); 596 597 c &= UTF8_START_MASK(len); 598 i->bytepos += len; 599 for (len--; len; len--) { 600 u8ptr++; 601 602 if (!UTF8_IS_CONTINUATION(*u8ptr)) 603 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 604 "Malformed UTF-8 string\n"); 605 c = UTF8_ACCUMULATE(c, *u8ptr); 606 } 607 608 if (UNICODE_IS_SURROGATE(c)) 609 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 610 "Surrogate in UTF-8 string\n"); 611 } 612 else if (!UNICODE_IS_INVARIANT(c)) { 613 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 614 "Malformed UTF-8 string\n"); 615 } 616 else { 617 i->bytepos++; 618 } 619 620 i->charpos++; 621 return c; 622 } 623 624 /* 625 626 =item C<static void utf8_iter_set_and_advance(PARROT_INTERP, STRING *str, 627 String_iter *i, UINTVAL c)> 628 629 The UTF-8 implementation of the string iterator's C<set_and_advance> 630 function. 631 632 =cut 633 634 */ 635 636 static void 637 utf8_iter_set_and_advance(PARROT_INTERP, 638 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 639 { 640 ASSERT_ARGS(utf8_iter_set_and_advance) 641 unsigned char * const pos = (unsigned char *)str->strstart + i->bytepos; 642 unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c); 643 644 i->bytepos += (new_pos - pos); 645 /* XXX possible buffer overrun exception? */ 646 PARROT_ASSERT(i->bytepos <= Buffer_buflen(str)); 647 i->charpos++; 648 } 649 650 /* 651 652 =item C<static void utf8_iter_set_position(PARROT_INTERP, const STRING *str, 653 String_iter *i, UINTVAL pos)> 654 655 The UTF-8 implementation of the string iterator's C<set_position> 656 function. 657 658 =cut 659 660 */ 661 662 static void 663 utf8_iter_set_position(SHIM_INTERP, 664 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL pos) 665 { 666 ASSERT_ARGS(utf8_iter_set_position) 667 const utf8_t *u8ptr = (const utf8_t *)str->strstart; 668 669 if (pos == 0) { 670 i->charpos = 0; 671 i->bytepos = 0; 672 return; 673 } 674 675 /* 676 * we know the byte offsets of three positions: start, current and end 677 * now find the shortest way to reach pos 678 */ 679 if (pos < i->charpos) { 680 if (pos <= (i->charpos >> 1)) { 681 /* go forward from start */ 682 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, pos); 683 } 684 else { 685 /* go backward from current */ 686 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + i->bytepos, i->charpos - pos); 687 } 688 } 689 else { 690 const UINTVAL len = str->strlen; 691 if (pos <= i->charpos + ((len - i->charpos) >> 1)) { 692 /* go forward from current */ 693 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr + i->bytepos, pos - i->charpos); 694 } 695 else { 696 /* go backward from end */ 697 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + str->bufused, len - pos); 698 } 699 } 700 701 i->charpos = pos; 702 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 703 } 704 705 /* 706 459 707 =item C<static UINTVAL utf8_decode_and_advance(PARROT_INTERP, String_iter *i)> 460 708 461 709 The UTF-8 implementation of the string iterator's C<get_and_advance> … … 582 830 { 583 831 ASSERT_ARGS(to_encoding) 584 832 STRING *result; 585 String_iter src_iter;586 UINTVAL offs,dest_len, dest_pos, src_len;833 const ENCODING *src_encoding; 834 UINTVAL dest_len, dest_pos, src_len; 587 835 const int in_place = (dest == NULL); 588 836 unsigned char *new_pos, *pos, *p; 589 837 … … 597 845 result = dest; 598 846 } 599 847 600 /* init iter before possilby changing encoding*/601 ENCODING_ITER_INIT(interp, src, &src_iter);848 /* save source encoding before possibly changing it */ 849 src_encoding = src->encoding; 602 850 result->charset = Parrot_unicode_charset_ptr; 603 851 result->encoding = Parrot_utf8_encoding_ptr; 604 852 result->strlen = src_len; … … 621 869 result->bufused = dest_len; 622 870 } 623 871 else { 872 String_iter src_iter; 873 STRING_ITER_INIT(interp, &src_iter); 624 874 dest_len = src_len; 625 875 dest_pos = 0; 626 for (offs = 0; offs < src_len; ++offs) {627 const UINTVAL c = src_ iter.get_and_advance(interp, &src_iter);876 while (src_iter.charpos < src_len) { 877 const UINTVAL c = src_encoding->iter_get_and_advance(interp, src, &src_iter); 628 878 if (dest_len - dest_pos < 6) { 629 UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);879 UINTVAL need = (UINTVAL)((src->strlen - src_iter.charpos + 1) * 1.5); 630 880 if (need < 16) 631 881 need = 16; 632 882 dest_len += need; … … 790 1040 String_iter iter; 791 1041 UINTVAL start; 792 1042 793 iter_init(interp, src, &iter);1043 STRING_ITER_INIT(interp, &iter); 794 1044 795 1045 if (offset) 796 iter.set_position(interp, &iter, offset);1046 utf8_iter_set_position(interp, src, &iter, offset); 797 1047 798 1048 start = iter.bytepos; 799 1049 return_string->strstart = (char *)return_string->strstart + start; 800 1050 801 1051 if (count) 802 iter.set_position(interp, &iter, offset + count);1052 utf8_iter_set_position(interp, src, &iter, offset + count); 803 1053 804 1054 return_string->bufused = iter.bytepos - start; 805 1055 return_string->strlen = count; … … 860 1110 UINTVAL start; 861 1111 862 1112 Parrot_str_reuse_COW(interp, src, return_string); 863 iter_init(interp, src, &iter);864 iter.set_position(interp, &iter, offset);1113 STRING_ITER_INIT(interp, &iter); 1114 utf8_iter_set_position(interp, src, &iter, offset); 865 1115 866 1116 start = iter.bytepos; 867 1117 868 1118 return_string->strstart = (char *)return_string->strstart + start; 869 iter.set_position(interp, &iter, offset + count);1119 utf8_iter_set_position(interp, src, &iter, offset + count); 870 1120 871 1121 return_string->bufused = iter.bytepos - start; 872 1122 return_string->strlen = count; … … 973 1223 * this is used to initially calculate src->strlen, 974 1224 * therefore we must scan the whole string 975 1225 */ 976 iter_init(interp, src, &iter);1226 STRING_ITER_INIT(interp, &iter); 977 1227 while (iter.bytepos < src->bufused) 978 iter.get_and_advance(interp, &iter);1228 utf8_iter_get_and_advance(interp, src, &iter); 979 1229 return iter.charpos; 980 1230 } 981 1231 … … 1055 1305 codepoints, 1056 1306 bytes, 1057 1307 iter_init, 1058 find_cclass 1308 find_cclass, 1309 utf8_iter_get, 1310 utf8_iter_skip, 1311 utf8_iter_get_and_advance, 1312 utf8_iter_set_and_advance, 1313 utf8_iter_set_position 1059 1314 }; 1060 1315 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 1061 1316 Parrot_register_encoding(interp, "utf8", return_encoding);