Ticket #1456: string-iter-v5.diff
File string-iter-v5.diff, 71.2 KB (added by nwellnhof, 12 years ago) |
---|
-
include/parrot/encoding.h
diff --git a/include/parrot/encoding.h b/include/parrot/encoding.h index 2771a7e..23023bb 100644
a b 37 37 38 38 typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src, 39 39 struct string_iterator_t *); 40 typedef UINTVAL (*encoding_iter_get_t)( 41 PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset); 42 typedef void (*encoding_iter_skip_t)( 43 PARROT_INTERP, const STRING *str, String_iter *i, INTVAL skip); 44 typedef UINTVAL (*encoding_iter_get_and_advance_t)( 45 PARROT_INTERP, const STRING *str, String_iter *i); 46 typedef void (*encoding_iter_set_and_advance_t)( 47 PARROT_INTERP, STRING *str, String_iter *i, UINTVAL c); 48 typedef void (*encoding_iter_set_position_t)( 49 PARROT_INTERP, const STRING *str, String_iter *i, UINTVAL pos); 40 50 41 51 struct _encoding { 42 52 ARGIN(const char *name); … … 57 67 encoding_bytes_t bytes; 58 68 encoding_iter_init_t iter_init; 59 69 encoding_find_cclass_t find_cclass; 70 encoding_iter_get_t iter_get; 71 encoding_iter_skip_t iter_skip; 72 encoding_iter_get_and_advance_t iter_get_and_advance; 73 encoding_iter_set_and_advance_t iter_set_and_advance; 74 encoding_iter_set_position_t iter_set_position; 60 75 }; 61 76 62 77 typedef struct _encoding ENCODING; -
include/parrot/string.h
diff --git a/include/parrot/string.h b/include/parrot/string.h index fb6a3be..7d87f8e 100644
a b 37 37 void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos); 38 38 } String_iter; 39 39 40 #define STRING_ITER_INIT(i, iter) \ 41 (iter)->charpos = (iter)->bytepos = 0 42 #define STRING_ITER_GET(i, str, iter, offset) \ 43 ((str)->encoding)->iter_get((i), (str), (iter), (offset)) 44 #define STRING_ITER_SKIP(i, str, iter, skip) \ 45 ((str)->encoding)->iter_skip((i), (str), (iter), (skip)) 46 #define STRING_ITER_GET_AND_ADVANCE(i, str, iter) \ 47 ((str)->encoding)->iter_get_and_advance((i), (str), (iter)) 48 #define STRING_ITER_SET_AND_ADVANCE(i, str, iter, c) \ 49 ((str)->encoding)->iter_set_and_advance((i), (str), (iter), (c)) 50 #define STRING_ITER_SET_POSITION(i, str, iter, pos) \ 51 ((str)->encoding)->iter_set_position((i), (str), (iter), (pos)) 52 40 53 #define STREQ(x, y) (strcmp((x), (y))==0) 41 54 #define STRNEQ(x, y) (strcmp((x), (y))!=0) 42 55 -
include/parrot/string_funcs.h
diff --git a/include/parrot/string_funcs.h b/include/parrot/string_funcs.h index f54af57..d445368 100644
a b 253 253 INTVAL Parrot_str_is_null(SHIM_INTERP, ARGIN_NULLOK(const STRING *s)); 254 254 255 255 PARROT_EXPORT 256 INTVAL Parrot_str_iter_index(PARROT_INTERP, 257 ARGIN(const STRING *src), 258 ARGMOD(String_iter *start), 259 ARGMOD(String_iter *end), 260 ARGIN(const STRING *search)) 261 __attribute__nonnull__(1) 262 __attribute__nonnull__(2) 263 __attribute__nonnull__(3) 264 __attribute__nonnull__(4) 265 __attribute__nonnull__(5) 266 FUNC_MODIFIES(*start) 267 FUNC_MODIFIES(*end); 268 269 PARROT_EXPORT 270 PARROT_CANNOT_RETURN_NULL 271 PARROT_WARN_UNUSED_RESULT 272 STRING * Parrot_str_iter_substr(PARROT_INTERP, 273 ARGMOD(STRING *str), 274 ARGIN(const String_iter *l), 275 ARGIN_NULLOK(const String_iter *r)) 276 __attribute__nonnull__(1) 277 __attribute__nonnull__(2) 278 __attribute__nonnull__(3) 279 FUNC_MODIFIES(*str); 280 281 PARROT_EXPORT 256 282 PARROT_WARN_UNUSED_RESULT 257 283 PARROT_CANNOT_RETURN_NULL 258 284 STRING* Parrot_str_join(PARROT_INTERP, … … 631 657 PARROT_ASSERT_ARG(interp) \ 632 658 , PARROT_ASSERT_ARG(s)) 633 659 #define ASSERT_ARGS_Parrot_str_is_null __attribute__unused__ int _ASSERT_ARGS_CHECK = (0) 660 #define ASSERT_ARGS_Parrot_str_iter_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 661 PARROT_ASSERT_ARG(interp) \ 662 , PARROT_ASSERT_ARG(src) \ 663 , PARROT_ASSERT_ARG(start) \ 664 , PARROT_ASSERT_ARG(end) \ 665 , PARROT_ASSERT_ARG(search)) 666 #define ASSERT_ARGS_Parrot_str_iter_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 667 PARROT_ASSERT_ARG(interp) \ 668 , PARROT_ASSERT_ARG(str) \ 669 , PARROT_ASSERT_ARG(l)) 634 670 #define ASSERT_ARGS_Parrot_str_join __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 635 671 PARROT_ASSERT_ARG(interp) \ 636 672 , PARROT_ASSERT_ARG(ar)) -
src/io/utf8.c
diff --git a/src/io/utf8.c b/src/io/utf8.c index 0df3d22..f2b3b5d 100644
a b 57 57 s->encoding = Parrot_utf8_encoding_ptr; 58 58 59 59 /* count chars, verify utf8 */ 60 Parrot_utf8_encoding_ptr->iter_init(interp, s, &iter);60 STRING_ITER_INIT(interp, &iter); 61 61 62 62 while (iter.bytepos < s->bufused) { 63 63 if (iter.bytepos + 4 > s->bufused) { … … 92 92 } 93 93 } 94 94 ok: 95 iter.get_and_advance(interp, &iter);95 Parrot_utf8_encoding_ptr->iter_get_and_advance(interp, *buf, &iter); 96 96 } 97 97 s->strlen = iter.charpos; 98 98 return len; -
src/pmc/stringiterator.pmc
diff --git a/src/pmc/stringiterator.pmc b/src/pmc/stringiterator.pmc index af58972..986dc8c 100644
a b 23 23 24 24 25 25 pmclass StringIterator auto_attrs extends Iterator { 26 ATTR PMC *string; /* String to iterate over */ 27 ATTR INTVAL pos; /* Current position of iterator for forward iterator */ 28 /* Previous position of iterator for reverse iterator */ 29 ATTR INTVAL length; /* Length of C<string> */ 30 ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */ 26 ATTR PMC *string; /* String PMC to iterate over */ 27 ATTR STRING *str_val; /* The actual string */ 28 ATTR String_iter iter; /* String iterator */ 29 ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */ 31 30 32 31 /* 33 32 … … 39 38 40 39 */ 41 40 VTABLE void init_pmc(PMC *string) { 41 Parrot_StringIterator_attributes * const attrs = 42 PARROT_STRINGITERATOR(SELF); 43 STRING * const str_val = VTABLE_get_string(INTERP, string); 44 42 45 SET_ATTR_string(INTERP, SELF, string); 46 SET_ATTR_str_val(INTERP, SELF, str_val); 47 STRING_ITER_INIT(INTERP, &attrs->iter); 43 48 44 49 /* by default, iterate from start */ 45 50 SELF.set_integer_native(ITERATE_FROM_START); … … 58 63 59 64 VTABLE void mark() { 60 65 PMC *string; 66 STRING *str_val; 67 61 68 GET_ATTR_string(INTERP, SELF, string); 62 69 Parrot_gc_mark_PMC_alive(INTERP, string); 70 GET_ATTR_str_val(INTERP, SELF, str_val); 71 Parrot_gc_mark_STRING_alive(INTERP, str_val); 63 72 } 64 73 65 74 /* … … 77 86 Parrot_StringIterator_attributes * const clone_attrs = 78 87 PARROT_STRINGITERATOR(clone); 79 88 80 clone_attrs->pos = attrs->pos; 89 /* TODO: this isn't safe if the string PMC has changed */ 90 clone_attrs->iter = attrs->iter; 81 91 clone_attrs->reverse = attrs->reverse; 82 92 return clone; 83 93 } … … 110 120 Parrot_StringIterator_attributes * const attrs = 111 121 PARROT_STRINGITERATOR(SELF); 112 122 if (attrs->reverse) 113 return attrs-> pos;123 return attrs->iter.charpos; 114 124 else 115 return attrs-> length - attrs->pos;125 return attrs->str_val->strlen - attrs->iter.charpos; 116 126 } 117 127 118 128 VTABLE INTVAL get_integer() { … … 137 147 PARROT_STRINGITERATOR(SELF); 138 148 if (value == ITERATE_FROM_START) { 139 149 attrs->reverse = 0; 140 attrs->pos = 0; 141 attrs->length = VTABLE_elements(INTERP, attrs->string); 150 STRING_ITER_SET_POSITION(INTERP, attrs->str_val, &attrs->iter, 0); 142 151 } 143 152 else if (value == ITERATE_FROM_END) { 144 153 attrs->reverse = 1; 145 attrs->pos = attrs->length 146 = VTABLE_elements(INTERP, attrs->string); 154 STRING_ITER_SET_POSITION(INTERP, attrs->str_val, &attrs->iter, attrs->str_val->strlen); 147 155 } 148 156 else 149 157 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_OPERATION, … … 179 187 Parrot_StringIterator_attributes * const attrs = 180 188 PARROT_STRINGITERATOR(SELF); 181 189 PMC *ret; 190 STRING *str; 191 const String_iter old_iter = attrs->iter; 182 192 183 if (attrs-> pos >= attrs->length)193 if (attrs->iter.charpos >= attrs->str_val->strlen) 184 194 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 185 195 "StopIteration"); 186 196 187 197 ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String)); 188 VTABLE_set_string_native(INTERP, ret, 189 VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++)); 198 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, 1); 199 str = Parrot_str_iter_substr(INTERP, attrs->str_val, &old_iter, &attrs->iter); 200 VTABLE_set_string_native(INTERP, ret, str); 190 201 return ret; 191 202 } 192 203 … … 202 213 VTABLE STRING *shift_string() { 203 214 Parrot_StringIterator_attributes * const attrs = 204 215 PARROT_STRINGITERATOR(SELF); 216 const String_iter old_iter = attrs->iter; 205 217 206 if (attrs-> pos >= attrs->length)218 if (attrs->iter.charpos >= attrs->str_val->strlen) 207 219 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 208 220 "StopIteration"); 209 221 210 return VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++); 222 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, 1); 223 return Parrot_str_iter_substr(INTERP, attrs->str_val, &old_iter, &attrs->iter); 211 224 } 212 225 213 226 /* … … 223 236 Parrot_StringIterator_attributes * const attrs = 224 237 PARROT_STRINGITERATOR(SELF); 225 238 226 if (attrs-> pos >= attrs->length)239 if (attrs->iter.charpos >= attrs->str_val->strlen) 227 240 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 228 241 "StopIteration"); 229 242 230 return VTABLE_get_integer_keyed_int(INTERP, attrs->string, attrs->pos++);243 return STRING_ITER_GET_AND_ADVANCE(INTERP, attrs->str_val, &attrs->iter); 231 244 } 232 245 233 246 /* … … 243 256 Parrot_StringIterator_attributes * const attrs = 244 257 PARROT_STRINGITERATOR(SELF); 245 258 PMC *ret; 259 STRING * str; 260 const String_iter old_iter = attrs->iter; 246 261 247 if ( !STATICSELF.get_bool())262 if (attrs->iter.charpos <= 0) 248 263 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 249 264 "StopIteration"); 250 265 251 266 ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String)); 252 VTABLE_set_string_native(INTERP, ret, 253 VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos)); 267 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, -1); 268 str = Parrot_str_iter_substr(INTERP, attrs->str_val, &attrs->iter, &old_iter); 269 VTABLE_set_string_native(INTERP, ret, str); 254 270 return ret; 255 271 } 256 272 … … 266 282 VTABLE STRING *pop_string() { 267 283 Parrot_StringIterator_attributes * const attrs = 268 284 PARROT_STRINGITERATOR(SELF); 285 const String_iter old_iter = attrs->iter; 269 286 270 if ( !STATICSELF.get_bool())287 if (attrs->iter.charpos <= 0) 271 288 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 272 289 "StopIteration"); 273 290 274 return VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos); 291 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, -1); 292 return Parrot_str_iter_substr(INTERP, attrs->str_val, &attrs->iter, &old_iter); 275 293 } 276 294 277 295 /* … … 287 305 Parrot_StringIterator_attributes * const attrs = 288 306 PARROT_STRINGITERATOR(SELF); 289 307 290 if ( !STATICSELF.get_bool())308 if (attrs->iter.charpos <= 0) 291 309 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 292 310 "StopIteration"); 293 311 294 return VTABLE_get_integer_keyed_int(INTERP, attrs->string, --attrs->pos); 312 STRING_ITER_SKIP(INTERP, attrs->str_val, &attrs->iter, -1); 313 return STRING_ITER_GET(INTERP, attrs->str_val, &attrs->iter, 0); 295 314 } 296 315 297 316 /* … … 305 324 */ 306 325 307 326 VTABLE INTVAL get_integer_keyed_int(INTVAL idx) { 308 return VTABLE_get_integer_keyed_int(INTERP, STATICSELF.get_pmc(), 309 PARROT_STRINGITERATOR(SELF)->pos + idx); 327 Parrot_StringIterator_attributes * const attrs = 328 PARROT_STRINGITERATOR(SELF); 329 const UINTVAL offset = attrs->iter.charpos + idx; 330 331 if (offset >= attrs->str_val->strlen) 332 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 333 "StopIteration"); 334 335 return STRING_ITER_GET(INTERP, attrs->str_val, &attrs->iter, idx); 310 336 } 311 337 312 338 /* … … 320 346 */ 321 347 322 348 VTABLE STRING *get_string_keyed_int(INTVAL idx) { 323 return VTABLE_get_string_keyed_int(INTERP, STATICSELF.get_pmc(), 324 PARROT_STRINGITERATOR(SELF)->pos + idx); 349 Parrot_StringIterator_attributes * const attrs = 350 PARROT_STRINGITERATOR(SELF); 351 const UINTVAL offset = attrs->iter.charpos + idx; 352 String_iter iter, next_iter; 353 354 if (offset >= attrs->str_val->strlen) 355 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 356 "StopIteration"); 357 358 iter = attrs->iter; 359 if (idx != 0) 360 STRING_ITER_SKIP(INTERP, attrs->str_val, &iter, idx); 361 next_iter = iter; 362 STRING_ITER_SKIP(INTERP, attrs->str_val, &next_iter, 1); 363 364 return Parrot_str_iter_substr(INTERP, attrs->str_val, &iter, &next_iter); 325 365 } 326 366 } 327 367 -
src/string/api.c
diff --git a/src/string/api.c b/src/string/api.c index 1de008e..a73393a 100644
a b 1289 1289 } 1290 1290 } 1291 1291 1292 /* 1293 1294 =item C<STRING * Parrot_str_iter_substr(PARROT_INTERP, STRING *str, const 1295 String_iter *l, const String_iter *r)> 1296 1297 Returns the substring between iterators C<l> and C<r>. 1298 1299 =cut 1300 1301 */ 1302 1303 PARROT_EXPORT 1304 PARROT_CANNOT_RETURN_NULL 1305 PARROT_WARN_UNUSED_RESULT 1306 STRING * 1307 Parrot_str_iter_substr(PARROT_INTERP, 1308 ARGMOD(STRING *str), 1309 ARGIN(const String_iter *l), ARGIN_NULLOK(const String_iter *r)) 1310 { 1311 ASSERT_ARGS(Parrot_str_iter_substr) 1312 STRING *dest = Parrot_str_new_COW(interp, str); 1313 1314 dest->strstart = (char *)dest->strstart + l->bytepos; 1315 1316 if (r == NULL) { 1317 dest->bufused = str->bufused - l->bytepos; 1318 dest->strlen = str->strlen - l->charpos; 1319 } 1320 else { 1321 dest->bufused = r->bytepos - l->bytepos; 1322 dest->strlen = r->charpos - l->charpos; 1323 } 1324 1325 dest->hashval = 0; 1326 1327 return dest; 1328 } 1329 1330 /* 1331 1332 =item C<INTVAL Parrot_str_iter_index(PARROT_INTERP, const STRING *src, 1333 String_iter *start, String_iter *end, const STRING *search)> 1334 1335 Find the next occurence of STRING C<search> in STRING C<src> starting at 1336 String_iter C<start>. If C<search> is found C<start> is modified to mark the 1337 beginning of C<search> and String_iter C<end> is set to the character after 1338 C<search> in C<src>. Returns the character position where C<search> was found 1339 or -1 if it wasn't found. 1340 1341 =cut 1342 1343 */ 1344 1345 PARROT_EXPORT 1346 INTVAL 1347 Parrot_str_iter_index(PARROT_INTERP, 1348 ARGIN(const STRING *src), 1349 ARGMOD(String_iter *start), ARGMOD(String_iter *end), 1350 ARGIN(const STRING *search)) 1351 { 1352 ASSERT_ARGS(Parrot_str_iter_index) 1353 String_iter search_iter; 1354 const UINTVAL len = search->strlen; 1355 1356 *end = *start; 1357 1358 if (len == 0) { 1359 return start->charpos; 1360 } 1361 1362 STRING_ITER_INIT(interp, &search_iter); 1363 1364 if (len == 1) { 1365 const UINTVAL c0 = STRING_ITER_GET(interp, search, &search_iter, 0); 1366 1367 while (start->charpos < src->strlen) { 1368 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, end); 1369 if (c == c0) 1370 return start->charpos; 1371 *start = *end; 1372 } 1373 } 1374 else { 1375 const UINTVAL c0 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter); 1376 String_iter search_start = search_iter; 1377 1378 while (1) { 1379 String_iter src_start_iter; 1380 UINTVAL c1, c2; 1381 1382 do { 1383 *start = *end; 1384 if (start->charpos + len > src->strlen) 1385 return -1; 1386 c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, end); 1387 } while (c1 != c0); 1388 1389 do { 1390 if (search_iter.charpos >= len) 1391 return start->charpos; 1392 c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, end); 1393 c2 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter); 1394 } while (c1 == c2); 1395 1396 STRING_ITER_SKIP(interp, src, start, 1); 1397 *end = *start; 1398 search_iter = search_start; 1399 } 1400 } 1401 1402 return -1; 1403 } 1404 1292 1405 1293 1406 /* 1294 1407 … … 1383 1496 } 1384 1497 1385 1498 /* get byte position of the part that will be replaced */ 1386 ENCODING_ITER_INIT(interp, src, &iter);1499 STRING_ITER_INIT(interp, &iter); 1387 1500 1388 iter.set_position(interp, &iter, true_offset);1501 STRING_ITER_SET_POSITION(interp, src, &iter, true_offset); 1389 1502 start_byte = iter.bytepos; 1390 1503 1391 iter.set_position(interp, &iter, true_offset + true_length);1504 STRING_ITER_SET_POSITION(interp, src, &iter, true_offset + true_length); 1392 1505 end_byte = iter.bytepos; 1393 1506 1394 1507 /* not possible.... */ … … 1486 1599 Parrot_str_chopn_inplace(PARROT_INTERP, ARGMOD(STRING *s), INTVAL n) 1487 1600 { 1488 1601 ASSERT_ARGS(Parrot_str_chopn_inplace) 1489 UINTVAL new_length , uchar_size;1602 UINTVAL new_length; 1490 1603 1491 1604 if (n < 0) { 1492 1605 new_length = -n; … … 1507 1620 return; 1508 1621 } 1509 1622 1510 uchar_size = s->bufused / s->strlen;1511 s->strlen = new_length;1512 1513 1623 if (s->encoding == Parrot_fixed_8_encoding_ptr) { 1514 1624 s->bufused = new_length; 1515 1625 } 1516 1626 else if (s->encoding == Parrot_ucs2_encoding_ptr) { 1627 const UINTVAL uchar_size = s->bufused / s->strlen; 1517 1628 s->bufused = new_length * uchar_size; 1518 1629 } 1519 1630 else { 1520 1631 String_iter iter; 1521 1632 1522 ENCODING_ITER_INIT(interp, s, &iter);1523 iter.set_position(interp, &iter, new_length);1633 STRING_ITER_INIT(interp, &iter); 1634 STRING_ITER_SET_POSITION(interp, s, &iter, new_length); 1524 1635 s->bufused = iter.bytepos; 1525 1636 } 1526 1637 1638 s->strlen = new_length; 1639 1527 1640 return; 1528 1641 } 1529 1642 … … 2159 2272 int sign = 1; 2160 2273 INTVAL i = 0; 2161 2274 String_iter iter; 2162 UINTVAL offs;2163 2275 number_parse_state state = parse_start; 2164 2276 2165 ENCODING_ITER_INIT(interp, s, &iter);2277 STRING_ITER_INIT(interp, &iter); 2166 2278 2167 for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {2168 const UINTVAL c = iter.get_and_advance(interp, &iter);2279 while (state != parse_end && iter.charpos < s->strlen) { 2280 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 2169 2281 /* Check for overflow */ 2170 2282 if (c > 255) 2171 2283 break; … … 2250 2362 int d_length = 0; 2251 2363 int check_nan = 0; /* Check for NaN and Inf after main loop */ 2252 2364 String_iter iter; 2253 UINTVAL offs;2254 2365 number_parse_state state = parse_start; 2255 2366 2256 2367 if (!s) 2257 2368 return 0.0; 2258 2369 2259 ENCODING_ITER_INIT(interp, s, &iter);2370 STRING_ITER_INIT(interp, &iter); 2260 2371 2261 2372 /* Handcrafter FSM to read float value */ 2262 for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {2263 const UINTVAL c = iter.get_and_advance(interp, &iter);2373 while (state != parse_end && iter.charpos < s->strlen) { 2374 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 2264 2375 /* Check for overflow */ 2265 2376 if (c > 255) 2266 2377 break; … … 2635 2746 { 2636 2747 ASSERT_ARGS(Parrot_str_to_hashval) 2637 2748 String_iter iter; 2638 UINTVAL offs;2639 2749 size_t hashval = interp->hash_seed; 2640 2750 2641 2751 if (!s) … … 2644 2754 /* ZZZZZ workaround for something not setting up encodings right */ 2645 2755 saneify_string(s); 2646 2756 2647 ENCODING_ITER_INIT(interp, s, &iter);2757 STRING_ITER_INIT(interp, &iter); 2648 2758 2649 for (offs = 0; offs < s->strlen; ++offs) {2650 const UINTVAL c = iter.get_and_advance(interp, &iter);2759 while (iter.charpos < s->strlen) { 2760 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 2651 2761 hashval += hashval << 5; 2652 2762 hashval += c; 2653 2763 } … … 2725 2835 Parrot_fixed_8_encoding_ptr, Parrot_ascii_charset_ptr, 0); 2726 2836 2727 2837 /* more work TODO */ 2728 ENCODING_ITER_INIT(interp, src, &iter);2838 STRING_ITER_INIT(interp, &iter); 2729 2839 dp = (unsigned char *)result->strstart; 2730 2840 2731 2841 for (i = 0; len > 0; --len) { 2732 UINTVAL c = iter.get_and_advance(interp, &iter);2842 UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 2733 2843 if (c < 0x7f) { 2734 2844 /* process ASCII chars */ 2735 2845 if (i >= charlen - 2) { … … 2883 2993 encoding = result->encoding; 2884 2994 } 2885 2995 2886 encoding->iter_init(interp, result, &iter);2996 STRING_ITER_INIT(interp, &iter); 2887 2997 2888 2998 for (offs = d = 0; offs < clength; ++offs) { 2889 2999 r = (Parrot_UInt4)((unsigned char *)result->strstart)[offs]; … … 2906 3016 } 2907 3017 2908 3018 PARROT_ASSERT(d < offs); 2909 iter.set_and_advance(interp, &iter, r);3019 encoding->iter_set_and_advance(interp, result, &iter, r); 2910 3020 ++d; 2911 3021 } 2912 3022 … … 3441 3551 ARGIN_NULLOK(STRING *delim), ARGIN_NULLOK(STRING *str)) 3442 3552 { 3443 3553 ASSERT_ARGS(Parrot_str_split) 3444 PMC *res; 3445 INTVAL slen, dlen, ps, pe; 3554 PMC *res; 3555 STRING *tstr; 3556 UINTVAL slen, dlen; 3557 String_iter iter; 3446 3558 3447 3559 if (STRING_IS_NULL(delim) || STRING_IS_NULL(str)) 3448 3560 return PMCNULL; … … 3453 3565 if (!slen) 3454 3566 return res; 3455 3567 3568 STRING_ITER_INIT(interp, &iter); 3456 3569 dlen = Parrot_str_byte_length(interp, delim); 3457 3570 3458 3571 if (dlen == 0) { 3459 int i;3460 3572 VTABLE_set_integer_native(interp, res, slen); 3461 3573 3462 for (i = 0; i < slen; ++i) { 3463 STRING * const p = Parrot_str_substr(interp, str, i, 1, NULL, 0); 3464 VTABLE_set_string_keyed_int(interp, res, i, p); 3465 } 3466 3467 return res; 3468 } 3574 do { 3575 const String_iter old_iter = iter; 3469 3576 3470 pe = Parrot_str_find_index(interp, str, delim, 0); 3577 STRING_ITER_SKIP(interp, str, &iter, 1); 3578 tstr = Parrot_str_iter_substr(interp, str, &old_iter, &iter); 3579 VTABLE_set_string_keyed_int(interp, res, old_iter.charpos, tstr); 3580 } while (iter.charpos < slen); 3471 3581 3472 if (pe < 0) {3473 VTABLE_push_string(interp, res, str);3474 3582 return res; 3475 3583 } 3476 3584 3477 ps = 0; 3478 3479 while (ps <= slen) { 3480 const int pl = pe - ps; 3481 STRING * const tstr = Parrot_str_substr(interp, str, ps, pl, NULL, 0); 3482 3483 VTABLE_push_string(interp, res, tstr); 3484 ps = pe + Parrot_str_byte_length(interp, delim); 3585 do { 3586 String_iter start, end; 3587 INTVAL pos; 3485 3588 3486 if (ps > slen) 3589 start = iter; 3590 if (Parrot_str_iter_index(interp, str, &start, &end, delim) < 0) 3487 3591 break; 3488 3592 3489 pe = Parrot_str_find_index(interp, str, delim, ps); 3593 tstr = Parrot_str_iter_substr(interp, str, &iter, &start); 3594 VTABLE_push_string(interp, res, tstr); 3595 iter = end; 3596 } while (iter.charpos < slen); 3490 3597 3491 if (pe < 0) 3492 pe = slen; 3493 } 3598 tstr = Parrot_str_iter_substr(interp, str, &iter, NULL); 3599 VTABLE_push_string(interp, res, tstr); 3494 3600 3495 3601 return res; 3496 3602 } -
src/string/charset/ascii.c
diff --git a/src/string/charset/ascii.c b/src/string/charset/ascii.c index 1cb0f23..f5ba605 100644
a b 263 263 { 264 264 ASSERT_ARGS(to_ascii) 265 265 String_iter iter; 266 UINTVAL offs;267 266 unsigned char *p; 268 267 const UINTVAL len = src->strlen; 269 268 … … 275 274 dest = src; 276 275 } 277 276 p = (unsigned char *)dest->strstart; 278 ENCODING_ITER_INIT(interp, src, &iter);279 for (offs = 0; offs < len; ++offs) {280 const UINTVAL c = iter.get_and_advance(interp, &iter);277 STRING_ITER_INIT(interp, &iter); 278 while (iter.charpos < len) { 279 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 281 280 if (c >= 128) 282 281 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, 283 282 "can't convert unicode string to ascii"); … … 557 556 return ret_val < 0 ? -1 : 1; 558 557 } 559 558 else { 560 UINTVAL offs; 561 ENCODING_ITER_INIT(interp, rhs, &iter); 562 for (offs = 0; offs < min_len; ++offs) { 563 const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, offs); 564 const UINTVAL cr = iter.get_and_advance(interp, &iter); 559 STRING_ITER_INIT(interp, &iter); 560 while (iter.charpos < min_len) { 561 const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, iter.charpos); 562 const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &iter); 565 563 if (cl != cr) 566 564 return cl < cr ? -1 : 1; 567 565 } … … 595 593 UINTVAL offs) 596 594 { 597 595 ASSERT_ARGS(mixed_cs_index) 598 String_iter src_iter, search_iter; 599 UINTVAL len, next_pos; 600 INTVAL found_at; 601 602 ENCODING_ITER_INIT(interp, src, &src_iter); 603 src_iter.set_position(interp, &src_iter, offs); 604 ENCODING_ITER_INIT(interp, search, &search_iter); 605 len = search->strlen; 606 607 found_at = -1; 608 next_pos = offs; 609 610 for (; len && offs < src->strlen ;) { 611 const UINTVAL c1 = src_iter.get_and_advance(interp, &src_iter); 612 const UINTVAL c2 = search_iter.get_and_advance(interp, &search_iter); 613 614 if (c1 == c2) { 615 --len; 616 if (found_at == -1) 617 found_at = offs; 618 ++offs; 619 } 620 else { 621 len = search->strlen; 622 ++offs; 623 ++next_pos; 624 if (offs != next_pos) { 625 src_iter.set_position(interp, &src_iter, next_pos); 626 offs = next_pos; 627 } 628 629 found_at = -1; 630 search_iter.set_position(interp, &search_iter, 0); 631 } 632 } 633 if (len == 0) 634 return found_at; 635 return -1; 596 String_iter start, end; 597 598 STRING_ITER_INIT(interp, &start); 599 STRING_ITER_SET_POSITION(interp, src, &start, offs); 600 601 return Parrot_str_iter_index(interp, src, &start, &end, search); 636 602 } 637 603 638 604 /* … … 711 677 validate(PARROT_INTERP, ARGIN(STRING *src)) 712 678 { 713 679 ASSERT_ARGS(validate) 714 UINTVAL offset;680 const UINTVAL len = Parrot_str_byte_length(interp, src); 715 681 String_iter iter; 716 682 717 ENCODING_ITER_INIT(interp, src, &iter);718 for (offset = 0; offset < Parrot_str_byte_length(interp, src); ++offset) {719 const UINTVAL codepoint = iter.get_and_advance(interp, &iter);683 STRING_ITER_INIT(interp, &iter); 684 while (iter.charpos < len) { 685 const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 720 686 if (codepoint >= 0x80) 721 687 return 0; 722 688 } -
src/string/charset/iso-8859-1.c
diff --git a/src/string/charset/iso-8859-1.c b/src/string/charset/iso-8859-1.c index b88c11d..65c663a 100644
a b 215 215 to_iso_8859_1(PARROT_INTERP, ARGIN(STRING *src), ARGMOD_NULLOK(STRING *dest)) 216 216 { 217 217 ASSERT_ARGS(to_iso_8859_1) 218 UINTVAL offs,src_len;218 UINTVAL src_len; 219 219 String_iter iter; 220 220 221 ENCODING_ITER_INIT(interp, src, &iter);221 STRING_ITER_INIT(interp, &iter); 222 222 src_len = src->strlen; 223 223 if (dest) { 224 224 Parrot_gc_reallocate_string_storage(interp, dest, src_len); … … 229 229 dest = src; 230 230 } 231 231 dest->bufused = src_len; 232 dest->charset = Parrot_iso_8859_1_charset_ptr; 233 dest->encoding = Parrot_fixed_8_encoding_ptr; 234 for (offs = 0; offs < src_len; ++offs) { 235 const UINTVAL c = iter.get_and_advance(interp, &iter); 232 while (iter.charpos < src_len) { 233 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 236 234 if (c >= 0x100) 237 235 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, 238 236 "lossy conversion to iso-8559-1"); 239 237 240 ENCODING_SET_BYTE(interp, dest, offs, c);238 Parrot_fixed_8_encoding_ptr->set_byte(interp, dest, iter.charpos - 1, c); 241 239 } 240 dest->charset = Parrot_iso_8859_1_charset_ptr; 241 dest->encoding = Parrot_fixed_8_encoding_ptr; 242 242 return dest; 243 243 } 244 244 … … 258 258 { 259 259 ASSERT_ARGS(to_unicode) 260 260 if (dest) { 261 UINTVAL offs;262 261 String_iter iter; 263 262 264 263 dest->charset = Parrot_unicode_charset_ptr; 265 264 dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest); 266 265 Parrot_gc_reallocate_string_storage(interp, dest, src->strlen); 267 ENCODING_ITER_INIT(interp, dest, &iter);268 for (offs = 0; offs < src->strlen; ++offs) {269 const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);266 STRING_ITER_INIT(interp, &iter); 267 while (iter.charpos < src->strlen) { 268 const UINTVAL c = ENCODING_GET_BYTE(interp, src, iter.charpos); 270 269 271 270 if (iter.bytepos >= Buffer_buflen(dest) - 4) { 272 UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);271 UINTVAL need = (UINTVAL)((src->strlen - iter.charpos) * 1.5); 273 272 if (need < 16) 274 273 need = 16; 275 274 Parrot_gc_reallocate_string_storage(interp, dest, 276 275 Buffer_buflen(dest) + need); 277 276 } 278 iter.set_and_advance(interp, &iter, c);277 STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, c); 279 278 } 280 279 dest->bufused = iter.bytepos; 281 280 dest->strlen = iter.charpos; -
src/string/charset/unicode.c
diff --git a/src/string/charset/unicode.c b/src/string/charset/unicode.c index 77b0893..98f6e84 100644
a b 704 704 { 705 705 ASSERT_ARGS(compare) 706 706 String_iter l_iter, r_iter; 707 UINTVAL offs, cl, cr,min_len, l_len, r_len;707 UINTVAL min_len, l_len, r_len; 708 708 709 709 /* TODO make optimized equal - strings are equal length then already */ 710 ENCODING_ITER_INIT(interp, lhs, &l_iter);711 ENCODING_ITER_INIT(interp, rhs, &r_iter);710 STRING_ITER_INIT(interp, &l_iter); 711 STRING_ITER_INIT(interp, &r_iter); 712 712 713 713 l_len = lhs->strlen; 714 714 r_len = rhs->strlen; 715 715 716 716 min_len = l_len > r_len ? r_len : l_len; 717 717 718 for (offs = 0; offs < min_len; ++offs) {719 cl = l_iter.get_and_advance(interp, &l_iter);720 cr = r_iter.get_and_advance(interp, &r_iter);718 while (l_iter.charpos < min_len) { 719 UINTVAL cl = STRING_ITER_GET_AND_ADVANCE(interp, lhs, &l_iter); 720 UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &r_iter); 721 721 722 722 if (cl != cr) 723 723 return cl < cr ? -1 : 1; … … 769 769 validate(PARROT_INTERP, ARGIN(STRING *src)) 770 770 { 771 771 ASSERT_ARGS(validate) 772 UINTVAL offset;772 UINTVAL len = Parrot_str_byte_length(interp, src); 773 773 String_iter iter; 774 774 775 ENCODING_ITER_INIT(interp, src, &iter);776 for (offset = 0; offset < Parrot_str_byte_length(interp, src); ++offset) {777 const UINTVAL codepoint = iter.get_and_advance(interp, &iter);775 STRING_ITER_INIT(interp, &iter); 776 while (iter.charpos < len) { 777 const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 778 778 /* Check for Unicode non-characters */ 779 779 if (codepoint >= 0xfdd0 780 780 && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe) … … 924 924 ASSERT_ARGS(find_cclass) 925 925 String_iter iter; 926 926 UINTVAL codepoint; 927 UINTVAL pos = offset;928 927 UINTVAL end = offset + count; 929 928 930 ENCODING_ITER_INIT(interp, source_string, &iter); 931 932 iter.set_position(interp, &iter, pos); 929 STRING_ITER_INIT(interp, &iter); 930 STRING_ITER_SET_POSITION(interp, source_string, &iter, offset); 933 931 934 932 end = source_string->strlen < end ? source_string->strlen : end; 935 933 936 for (; pos < end; ++pos) {937 codepoint = iter.get_and_advance(interp, &iter);934 while (iter.charpos < end) { 935 codepoint = STRING_ITER_GET_AND_ADVANCE(interp, source_string, &iter); 938 936 if (codepoint >= 256) { 939 937 if (u_iscclass(interp, codepoint, flags)) 940 return pos;938 return iter.charpos - 1; 941 939 } 942 940 else { 943 941 if (Parrot_iso_8859_1_typetable[codepoint] & flags) 944 return pos;942 return iter.charpos - 1; 945 943 } 946 944 } 947 945 … … 965 963 ASSERT_ARGS(find_not_cclass) 966 964 String_iter iter; 967 965 UINTVAL codepoint; 968 UINTVAL pos = offset;969 966 UINTVAL end = offset + count; 970 967 int bit; 971 968 972 if ( pos> source_string->strlen) {969 if (offset > source_string->strlen) { 973 970 /* XXX: Throw in this case? */ 974 971 return offset + count; 975 972 } 976 973 977 ENCODING_ITER_INIT(interp, source_string, &iter);974 STRING_ITER_INIT(interp, &iter); 978 975 979 if ( pos)980 iter.set_position(interp, &iter, pos);976 if (offset) 977 STRING_ITER_SET_POSITION(interp, source_string, &iter, offset); 981 978 982 979 end = source_string->strlen < end ? source_string->strlen : end; 983 980 984 981 if (flags == enum_cclass_any) 985 982 return end; 986 983 987 for (; pos < end; ++pos) {988 codepoint = iter.get_and_advance(interp, &iter);984 while (iter.charpos < end) { 985 codepoint = STRING_ITER_GET_AND_ADVANCE(interp, source_string, &iter); 989 986 if (codepoint >= 256) { 990 987 for (bit = enum_cclass_uppercase; 991 988 bit <= enum_cclass_word ; bit <<= 1) { 992 989 if ((bit & flags) && !u_iscclass(interp, codepoint, bit)) 993 return pos;990 return iter.charpos - 1; 994 991 } 995 992 } 996 993 else { 997 994 if (!(Parrot_iso_8859_1_typetable[codepoint] & flags)) 998 return pos;995 return iter.charpos - 1; 999 996 } 1000 997 } 1001 998 … … 1023 1020 1024 1021 dest->strlen = 1; 1025 1022 1026 ENCODING_ITER_INIT(interp, dest, &iter);1027 iter.set_and_advance(interp, &iter, codepoint);1023 STRING_ITER_INIT(interp, &iter); 1024 STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, codepoint); 1028 1025 dest->bufused = iter.bytepos; 1029 1026 1030 1027 return dest; … … 1047 1044 { 1048 1045 ASSERT_ARGS(compute_hash) 1049 1046 String_iter iter; 1050 UINTVAL offs;1051 1047 size_t hashval = seed; 1052 1048 1053 ENCODING_ITER_INIT(interp, src, &iter);1049 STRING_ITER_INIT(interp, &iter); 1054 1050 1055 for (offs = 0; offs < src->strlen; ++offs) {1056 const UINTVAL c = iter.get_and_advance(interp, &iter);1051 while (iter.charpos < src->strlen) { 1052 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 1057 1053 hashval += hashval << 5; 1058 1054 hashval += c; 1059 1055 } -
src/string/encoding/fixed_8.c
diff --git a/src/string/encoding/fixed_8.c b/src/string/encoding/fixed_8.c index dd41129..712479d 100644
a b 50 50 __attribute__nonnull__(2) 51 51 FUNC_MODIFIES(*iter); 52 52 53 static UINTVAL fixed8_iter_get(PARROT_INTERP, 54 ARGIN(const STRING *str), 55 ARGIN(const String_iter *iter), 56 INTVAL offset) 57 __attribute__nonnull__(1) 58 __attribute__nonnull__(2) 59 __attribute__nonnull__(3); 60 61 static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, 62 ARGIN(const STRING *str), 63 ARGMOD(String_iter *iter)) 64 __attribute__nonnull__(1) 65 __attribute__nonnull__(2) 66 __attribute__nonnull__(3) 67 FUNC_MODIFIES(*iter); 68 69 static void fixed8_iter_set_and_advance(PARROT_INTERP, 70 ARGMOD(STRING *str), 71 ARGMOD(String_iter *iter), 72 UINTVAL c) 73 __attribute__nonnull__(1) 74 __attribute__nonnull__(2) 75 __attribute__nonnull__(3) 76 FUNC_MODIFIES(*str) 77 FUNC_MODIFIES(*iter); 78 79 static void fixed8_iter_set_position(SHIM_INTERP, 80 ARGIN(const STRING *str), 81 ARGMOD(String_iter *iter), 82 UINTVAL pos) 83 __attribute__nonnull__(2) 84 __attribute__nonnull__(3) 85 FUNC_MODIFIES(*iter); 86 87 static void fixed8_iter_skip(SHIM_INTERP, 88 ARGIN(const STRING *str), 89 ARGMOD(String_iter *iter), 90 INTVAL skip) 91 __attribute__nonnull__(2) 92 __attribute__nonnull__(3) 93 FUNC_MODIFIES(*iter); 94 53 95 static void fixed8_set_next(PARROT_INTERP, 54 96 ARGMOD(String_iter *iter), 55 97 UINTVAL c) … … 181 223 #define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 182 224 PARROT_ASSERT_ARG(interp) \ 183 225 , PARROT_ASSERT_ARG(iter)) 226 #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 227 PARROT_ASSERT_ARG(interp) \ 228 , PARROT_ASSERT_ARG(str) \ 229 , PARROT_ASSERT_ARG(iter)) 230 #define ASSERT_ARGS_fixed8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 231 PARROT_ASSERT_ARG(interp) \ 232 , PARROT_ASSERT_ARG(str) \ 233 , PARROT_ASSERT_ARG(iter)) 234 #define ASSERT_ARGS_fixed8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 235 PARROT_ASSERT_ARG(interp) \ 236 , PARROT_ASSERT_ARG(str) \ 237 , PARROT_ASSERT_ARG(iter)) 238 #define ASSERT_ARGS_fixed8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 239 PARROT_ASSERT_ARG(str) \ 240 , PARROT_ASSERT_ARG(iter)) 241 #define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 242 PARROT_ASSERT_ARG(str) \ 243 , PARROT_ASSERT_ARG(iter)) 184 244 #define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 185 245 PARROT_ASSERT_ARG(interp) \ 186 246 , PARROT_ASSERT_ARG(iter)) … … 581 641 582 642 /* 583 643 644 =item C<static UINTVAL fixed8_iter_get(PARROT_INTERP, const STRING *str, const 645 String_iter *iter, INTVAL offset)> 646 647 Get the character at C<iter> plus C<offset>. 648 649 =cut 650 651 */ 652 653 static UINTVAL 654 fixed8_iter_get(PARROT_INTERP, 655 ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset) 656 { 657 ASSERT_ARGS(fixed8_iter_get) 658 return get_byte(interp, str, iter->charpos + offset); 659 } 660 661 /* 662 663 =item C<static void fixed8_iter_skip(PARROT_INTERP, const STRING *str, 664 String_iter *iter, INTVAL skip)> 665 666 Moves the string iterator C<i> by C<skip> characters. 667 668 =cut 669 670 */ 671 672 static void 673 fixed8_iter_skip(SHIM_INTERP, 674 ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip) 675 { 676 ASSERT_ARGS(fixed8_iter_skip) 677 iter->bytepos += skip; 678 iter->charpos += skip; 679 PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str)); 680 } 681 682 /* 683 684 =item C<static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, const STRING 685 *str, String_iter *iter)> 686 687 Moves the string iterator C<i> to the next codepoint. 688 689 =cut 690 691 */ 692 693 static UINTVAL 694 fixed8_iter_get_and_advance(PARROT_INTERP, 695 ARGIN(const STRING *str), ARGMOD(String_iter *iter)) 696 { 697 ASSERT_ARGS(fixed8_iter_get_and_advance) 698 const UINTVAL c = get_byte(interp, str, iter->charpos++); 699 iter->bytepos++; 700 return c; 701 } 702 703 /* 704 705 =item C<static void fixed8_iter_set_and_advance(PARROT_INTERP, STRING *str, 706 String_iter *iter, UINTVAL c)> 707 708 With the string iterator C<i>, appends the codepoint C<c> and advances to the 709 next position in the string. 710 711 =cut 712 713 */ 714 715 static void 716 fixed8_iter_set_and_advance(PARROT_INTERP, 717 ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c) 718 { 719 ASSERT_ARGS(fixed8_iter_set_and_advance) 720 set_byte(interp, str, iter->charpos++, c); 721 iter->bytepos++; 722 } 723 724 /* 725 726 =item C<static void fixed8_iter_set_position(PARROT_INTERP, const STRING *str, 727 String_iter *iter, UINTVAL pos)> 728 729 Moves the string iterator C<i> to the position C<n> in the string. 730 731 =cut 732 733 */ 734 735 static void 736 fixed8_iter_set_position(SHIM_INTERP, 737 ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos) 738 { 739 ASSERT_ARGS(fixed8_iter_set_position) 740 iter->bytepos = iter->charpos = pos; 741 PARROT_ASSERT(pos <= Buffer_buflen(str)); 742 } 743 744 /* 745 584 746 =item C<static UINTVAL fixed8_get_next(PARROT_INTERP, String_iter *iter)> 585 747 586 748 Moves the string iterator C<i> to the next codepoint. … … 695 857 codepoints, 696 858 bytes, 697 859 iter_init, 698 find_cclass 860 find_cclass, 861 fixed8_iter_get, 862 fixed8_iter_skip, 863 fixed8_iter_get_and_advance, 864 fixed8_iter_set_and_advance, 865 fixed8_iter_set_position 699 866 700 867 }; 701 868 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); -
src/string/encoding/ucs2.c
diff --git a/src/string/encoding/ucs2.c b/src/string/encoding/ucs2.c index 71ef8b1..6a7459c 100644
a b 164 164 __attribute__nonnull__(2) 165 165 FUNC_MODIFIES(*i); 166 166 167 static UINTVAL ucs2_iter_get(PARROT_INTERP, 168 ARGIN(const STRING *str), 169 ARGIN(const String_iter *i), 170 INTVAL offset) 171 __attribute__nonnull__(1) 172 __attribute__nonnull__(2) 173 __attribute__nonnull__(3); 174 175 static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP, 176 ARGIN(const STRING *str), 177 ARGMOD(String_iter *i)) 178 __attribute__nonnull__(1) 179 __attribute__nonnull__(2) 180 __attribute__nonnull__(3) 181 FUNC_MODIFIES(*i); 182 183 static void ucs2_iter_set_and_advance(PARROT_INTERP, 184 ARGMOD(STRING *str), 185 ARGMOD(String_iter *i), 186 UINTVAL c) 187 __attribute__nonnull__(1) 188 __attribute__nonnull__(2) 189 __attribute__nonnull__(3) 190 FUNC_MODIFIES(*str) 191 FUNC_MODIFIES(*i); 192 193 static void ucs2_iter_set_position(SHIM_INTERP, 194 ARGIN(const STRING *str), 195 ARGMOD(String_iter *i), 196 UINTVAL n) 197 __attribute__nonnull__(2) 198 __attribute__nonnull__(3) 199 FUNC_MODIFIES(*i); 200 201 static void ucs2_iter_skip(SHIM_INTERP, 202 ARGIN(const STRING *str), 203 ARGMOD(String_iter *i), 204 INTVAL skip) 205 __attribute__nonnull__(2) 206 __attribute__nonnull__(3) 207 FUNC_MODIFIES(*i); 208 167 209 static void ucs2_set_position(SHIM_INTERP, 168 210 ARGMOD(String_iter *i), 169 211 UINTVAL n) … … 219 261 #define ASSERT_ARGS_ucs2_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 220 262 PARROT_ASSERT_ARG(interp) \ 221 263 , PARROT_ASSERT_ARG(i)) 264 #define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 265 PARROT_ASSERT_ARG(interp) \ 266 , PARROT_ASSERT_ARG(str) \ 267 , PARROT_ASSERT_ARG(i)) 268 #define ASSERT_ARGS_ucs2_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 269 PARROT_ASSERT_ARG(interp) \ 270 , PARROT_ASSERT_ARG(str) \ 271 , PARROT_ASSERT_ARG(i)) 272 #define ASSERT_ARGS_ucs2_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 273 PARROT_ASSERT_ARG(interp) \ 274 , PARROT_ASSERT_ARG(str) \ 275 , PARROT_ASSERT_ARG(i)) 276 #define ASSERT_ARGS_ucs2_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 277 PARROT_ASSERT_ARG(str) \ 278 , PARROT_ASSERT_ARG(i)) 279 #define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 280 PARROT_ASSERT_ARG(str) \ 281 , PARROT_ASSERT_ARG(i)) 222 282 #define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 223 283 PARROT_ASSERT_ARG(i)) 224 284 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ … … 397 457 String_iter iter; 398 458 UINTVAL start; 399 459 400 iter_init(interp, src, &iter);401 iter.set_position(interp, &iter, offset);460 STRING_ITER_INIT(interp, &iter); 461 ucs2_iter_set_position(interp, src, &iter, offset); 402 462 start = iter.bytepos; 403 463 return_string->strstart = (char *)return_string->strstart + start; 404 iter.set_position(interp, &iter, offset + count);464 ucs2_iter_set_position(interp, src, &iter, offset + count); 405 465 return_string->bufused = iter.bytepos - start; 406 466 } 407 467 #endif … … 576 636 577 637 /* 578 638 639 =item C<static UINTVAL ucs2_iter_get(PARROT_INTERP, const STRING *str, const 640 String_iter *i, INTVAL offset)> 641 642 Get the character at C<i> + C<offset>. 643 644 =cut 645 646 */ 647 648 static UINTVAL 649 ucs2_iter_get(PARROT_INTERP, 650 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 651 { 652 ASSERT_ARGS(ucs2_iter_get) 653 return get_codepoint(interp, str, i->charpos + offset); 654 } 655 656 /* 657 658 =item C<static void ucs2_iter_skip(PARROT_INTERP, const STRING *str, String_iter 659 *i, INTVAL skip)> 660 661 Moves the string iterator C<i> by C<skip> characters. 662 663 =cut 664 665 */ 666 667 static void 668 ucs2_iter_skip(SHIM_INTERP, 669 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 670 { 671 ASSERT_ARGS(ucs2_iter_skip) 672 673 #if PARROT_HAS_ICU 674 i->charpos += skip; 675 i->bytepos += skip * sizeof (UChar); 676 #else 677 /* This function must never be called if compiled without ICU. 678 * See TT #557 679 */ 680 PARROT_ASSERT(0); 681 #endif 682 } 683 684 /* 685 686 =item C<static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP, const STRING 687 *str, String_iter *i)> 688 689 Moves the string iterator C<i> to the next UCS-2 codepoint. 690 691 =cut 692 693 */ 694 695 static UINTVAL 696 ucs2_iter_get_and_advance(PARROT_INTERP, 697 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 698 { 699 ASSERT_ARGS(ucs2_iter_get_and_advance) 700 701 #if PARROT_HAS_ICU 702 UChar * const s = (UChar*) str->strstart; 703 size_t pos = i->bytepos / sizeof (UChar); 704 705 /* TODO either make sure that we don't go past end or use SAFE 706 * iter versions 707 */ 708 const UChar c = s[pos++]; 709 i->charpos++; 710 i->bytepos = pos * sizeof (UChar); 711 return c; 712 #else 713 /* This function must never be called if compiled without ICU. 714 * See TT #557 715 */ 716 PARROT_ASSERT(0); 717 return (UINTVAL)0; /* Stop the static analyzers from panicing */ 718 #endif 719 } 720 721 /* 722 723 =item C<static void ucs2_iter_set_and_advance(PARROT_INTERP, STRING *str, 724 String_iter *i, UINTVAL c)> 725 726 With the string iterator C<i>, appends the codepoint C<c> and advances to the 727 next position in the string. 728 729 =cut 730 731 */ 732 733 static void 734 ucs2_iter_set_and_advance(PARROT_INTERP, 735 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 736 { 737 ASSERT_ARGS(ucs2_iter_set_and_advance) 738 739 #if PARROT_HAS_ICU 740 UChar * const s = (UChar*) str->strstart; 741 UINTVAL pos = i->bytepos / sizeof (UChar); 742 s[pos++] = (UChar)c; 743 i->charpos++; 744 i->bytepos = pos * sizeof (UChar); 745 #else 746 /* This function must never be called if compiled without ICU. 747 * See TT #557 748 */ 749 PARROT_ASSERT(0); 750 #endif 751 } 752 753 /* 754 755 =item C<static void ucs2_iter_set_position(PARROT_INTERP, const STRING *str, 756 String_iter *i, UINTVAL n)> 757 758 Moves the string iterator C<i> to the position C<n> in the string. 759 760 =cut 761 762 */ 763 764 static void 765 ucs2_iter_set_position(SHIM_INTERP, 766 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n) 767 { 768 ASSERT_ARGS(ucs2_iter_set_position) 769 770 #if PARROT_HAS_ICU 771 i->charpos = n; 772 i->bytepos = n * sizeof (UChar); 773 #else 774 /* This function must never be called if compiled without ICU. 775 * See TT #557 776 */ 777 PARROT_ASSERT(0); 778 #endif 779 } 780 781 /* 782 579 783 =item C<static UINTVAL ucs2_decode_and_advance(PARROT_INTERP, String_iter *i)> 580 784 581 785 Moves the string iterator C<i> to the next UCS-2 codepoint. … … 729 933 codepoints, 730 934 bytes, 731 935 iter_init, 732 find_cclass 936 find_cclass, 937 ucs2_iter_get, 938 ucs2_iter_skip, 939 ucs2_iter_get_and_advance, 940 ucs2_iter_set_and_advance, 941 ucs2_iter_set_position 733 942 }; 734 943 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 735 944 Parrot_register_encoding(interp, "ucs2", return_encoding); -
src/string/encoding/utf16.c
diff --git a/src/string/encoding/utf16.c b/src/string/encoding/utf16.c index 6fa5bb4..f0749d9 100644
a b 161 161 __attribute__nonnull__(2) 162 162 FUNC_MODIFIES(*i); 163 163 164 static UINTVAL utf16_iter_get(PARROT_INTERP, 165 ARGIN(const STRING *str), 166 ARGIN(const String_iter *i), 167 INTVAL offset) 168 __attribute__nonnull__(1) 169 __attribute__nonnull__(2) 170 __attribute__nonnull__(3); 171 172 PARROT_WARN_UNUSED_RESULT 173 static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP, 174 ARGIN(const STRING *str), 175 ARGMOD(String_iter *i)) 176 __attribute__nonnull__(1) 177 __attribute__nonnull__(2) 178 __attribute__nonnull__(3) 179 FUNC_MODIFIES(*i); 180 181 static void utf16_iter_set_and_advance(PARROT_INTERP, 182 ARGMOD(STRING *str), 183 ARGMOD(String_iter *i), 184 UINTVAL c) 185 __attribute__nonnull__(1) 186 __attribute__nonnull__(2) 187 __attribute__nonnull__(3) 188 FUNC_MODIFIES(*str) 189 FUNC_MODIFIES(*i); 190 191 static void utf16_iter_set_position(PARROT_INTERP, 192 ARGIN(const STRING *str), 193 ARGMOD(String_iter *i), 194 UINTVAL n) 195 __attribute__nonnull__(1) 196 __attribute__nonnull__(2) 197 __attribute__nonnull__(3) 198 FUNC_MODIFIES(*i); 199 200 static void utf16_iter_skip(PARROT_INTERP, 201 ARGIN(const STRING *str), 202 ARGMOD(String_iter *i), 203 INTVAL skip) 204 __attribute__nonnull__(1) 205 __attribute__nonnull__(2) 206 __attribute__nonnull__(3) 207 FUNC_MODIFIES(*i); 208 164 209 static void utf16_set_position(PARROT_INTERP, 165 210 ARGMOD(String_iter *i), 166 211 UINTVAL n) … … 223 268 #define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 224 269 PARROT_ASSERT_ARG(interp) \ 225 270 , PARROT_ASSERT_ARG(i)) 271 #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 272 PARROT_ASSERT_ARG(interp) \ 273 , PARROT_ASSERT_ARG(str) \ 274 , PARROT_ASSERT_ARG(i)) 275 #define ASSERT_ARGS_utf16_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 276 PARROT_ASSERT_ARG(interp) \ 277 , PARROT_ASSERT_ARG(str) \ 278 , PARROT_ASSERT_ARG(i)) 279 #define ASSERT_ARGS_utf16_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 280 PARROT_ASSERT_ARG(interp) \ 281 , PARROT_ASSERT_ARG(str) \ 282 , PARROT_ASSERT_ARG(i)) 283 #define ASSERT_ARGS_utf16_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 284 PARROT_ASSERT_ARG(interp) \ 285 , PARROT_ASSERT_ARG(str) \ 286 , PARROT_ASSERT_ARG(i)) 287 #define ASSERT_ARGS_utf16_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 288 PARROT_ASSERT_ARG(interp) \ 289 , PARROT_ASSERT_ARG(str) \ 290 , PARROT_ASSERT_ARG(i)) 226 291 #define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 227 292 PARROT_ASSERT_ARG(interp) \ 228 293 , PARROT_ASSERT_ARG(i)) … … 498 563 UINTVAL start; 499 564 STRING * const return_string = Parrot_str_new_COW(interp, src); 500 565 501 iter_init(interp, src, &iter);502 iter.set_position(interp, &iter, offset);566 STRING_ITER_INIT(interp, &iter); 567 utf16_iter_set_position(interp, src, &iter, offset); 503 568 start = iter.bytepos; 504 569 return_string->strstart = (char *)return_string->strstart + start ; 505 iter.set_position(interp, &iter, offset +count);570 utf16_iter_skip(interp, src, &iter, count); 506 571 return_string->bufused = iter.bytepos - start; 507 572 return_string->strlen = count; 508 573 return_string->hashval = 0; … … 532 597 String_iter iter; 533 598 UINTVAL start; 534 599 Parrot_str_reuse_COW(interp, src, return_string); 535 iter_init(interp, src, &iter);536 iter.set_position(interp, &iter, offset);600 STRING_ITER_INIT(interp, &iter); 601 utf16_iter_set_position(interp, src, &iter, offset); 537 602 start = iter.bytepos; 538 603 return_string->strstart = (char *)return_string->strstart + start ; 539 iter.set_position(interp, &iter, offset +count);604 utf16_iter_skip(interp, src, &iter, count); 540 605 return_string->bufused = iter.bytepos - start; 541 606 return_string->strlen = count; 542 607 return_string->hashval = 0; … … 675 740 codepoints(PARROT_INTERP, ARGIN(STRING *src)) 676 741 { 677 742 ASSERT_ARGS(codepoints) 678 String_iter iter; 743 #if PARROT_HAS_ICU 744 UChar *s = (UChar*) src->strstart; 745 UINTVAL pos = 0; 679 746 /* 680 747 * this is used to initially calculate src->strlen, 681 748 * therefore we must scan the whole string 682 749 */ 683 iter_init(interp, src, &iter); 684 while (iter.bytepos < src->bufused) 685 iter.get_and_advance(interp, &iter); 686 return iter.charpos; 750 while (pos * sizeof(UChar) < src->bufused) { 751 U16_FWD_1_UNSAFE(s, pos); 752 } 753 return pos * sizeof(UChar); 754 #else 755 UNUSED(src); 756 757 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 758 "no ICU lib loaded"); 759 #endif 687 760 } 688 761 689 762 /* … … 704 777 return src->bufused; 705 778 } 706 779 780 /* 781 782 =item C<static UINTVAL utf16_iter_get(PARROT_INTERP, const STRING *str, const 783 String_iter *i, INTVAL offset)> 784 785 Get the character at C<i> plus C<offset>. 786 787 =cut 788 789 */ 790 791 static UINTVAL 792 utf16_iter_get(PARROT_INTERP, 793 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 794 { 795 ASSERT_ARGS(utf16_iter_get) 796 #if PARROT_HAS_ICU 797 UChar *s = (UChar*) str->strstart; 798 UINTVAL c, pos; 799 800 pos = i->bytepos / sizeof (UChar); 801 if (offset > 0) { 802 U16_FWD_N_UNSAFE(s, pos, offset); 803 } 804 else if (offset < 0) { 805 U16_BACK_N_UNSAFE(s, pos, -offset); 806 } 807 U16_GET_UNSAFE(s, pos, c); 808 809 return c; 810 #else 811 UNUSED(str); 812 UNUSED(i); 813 UNUSED(offset); 814 815 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 816 "no ICU lib loaded"); 817 #endif 818 } 819 820 /* 821 822 =item C<static void utf16_iter_skip(PARROT_INTERP, const STRING *str, 823 String_iter *i, INTVAL skip)> 824 825 Moves the string iterator C<i> by C<skip> characters. 826 827 =cut 828 829 */ 830 831 static void 832 utf16_iter_skip(PARROT_INTERP, 833 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 834 { 835 ASSERT_ARGS(utf16_iter_skip) 836 #if PARROT_HAS_ICU 837 UChar * const s = (UChar*) str->strstart; 838 UINTVAL pos = i->bytepos / sizeof (UChar); 839 840 if (skip > 0) { 841 U16_FWD_N_UNSAFE(s, pos, skip); 842 } 843 else if (skip < 0) { 844 U16_BACK_N_UNSAFE(s, pos, -skip); 845 } 846 847 i->charpos += skip; 848 i->bytepos = pos * sizeof (UChar); 849 #else 850 UNUSED(str); 851 UNUSED(i); 852 UNUSED(skip); 853 854 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 855 "no ICU lib loaded"); 856 #endif 857 } 858 859 /* 860 861 =item C<static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP, const STRING 862 *str, String_iter *i)> 863 864 Moves the string iterator C<i> to the next UTF-16 codepoint. 865 866 =cut 867 868 */ 869 870 PARROT_WARN_UNUSED_RESULT 871 static UINTVAL 872 utf16_iter_get_and_advance(PARROT_INTERP, 873 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 874 { 875 ASSERT_ARGS(utf16_iter_get_and_advance) 876 #if PARROT_HAS_ICU 877 UChar *s = (UChar*) str->strstart; 878 UINTVAL c, pos; 879 pos = i->bytepos / sizeof (UChar); 880 /* TODO either make sure that we don't go past end or use SAFE 881 * iter versions 882 */ 883 U16_NEXT_UNSAFE(s, pos, c); 884 i->charpos++; 885 i->bytepos = pos * sizeof (UChar); 886 return c; 887 #else 888 UNUSED(str); 889 UNUSED(i); 890 891 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 892 "no ICU lib loaded"); 893 #endif 894 } 895 896 /* 897 898 =item C<static void utf16_iter_set_and_advance(PARROT_INTERP, STRING *str, 899 String_iter *i, UINTVAL c)> 900 901 With the string iterator C<i>, appends the codepoint C<c> and advances to the 902 next position in the string. 903 904 =cut 905 906 */ 907 908 static void 909 utf16_iter_set_and_advance(PARROT_INTERP, 910 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 911 { 912 ASSERT_ARGS(utf16_iter_set_and_advance) 913 #if PARROT_HAS_ICU 914 UChar *s = (UChar*) str->strstart; 915 UINTVAL pos; 916 pos = i->bytepos / sizeof (UChar); 917 U16_APPEND_UNSAFE(s, pos, c); 918 i->charpos++; 919 i->bytepos = pos * sizeof (UChar); 920 #else 921 UNUSED(str); 922 UNUSED(i); 923 UNUSED(c); 924 925 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 926 "no ICU lib loaded"); 927 #endif 928 } 929 930 /* 931 932 =item C<static void utf16_iter_set_position(PARROT_INTERP, const STRING *str, 933 String_iter *i, UINTVAL n)> 934 935 Moves the string iterator C<i> to the position C<n> in the string. 936 937 =cut 938 939 */ 940 941 static void 942 utf16_iter_set_position(PARROT_INTERP, 943 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n) 944 { 945 ASSERT_ARGS(utf16_iter_set_position) 946 #if PARROT_HAS_ICU 947 UChar * const s = (UChar*) str->strstart; 948 UINTVAL pos; 949 pos = 0; 950 U16_FWD_N_UNSAFE(s, pos, n); 951 i->charpos = n; 952 i->bytepos = pos * sizeof (UChar); 953 #else 954 UNUSED(str); 955 UNUSED(i); 956 UNUSED(n); 957 958 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 959 "no ICU lib loaded"); 960 #endif 961 } 962 707 963 #if PARROT_HAS_ICU 708 964 /* 709 965 … … 843 1099 codepoints, 844 1100 bytes, 845 1101 iter_init, 846 find_cclass 1102 find_cclass, 1103 utf16_iter_get, 1104 utf16_iter_skip, 1105 utf16_iter_get_and_advance, 1106 utf16_iter_set_and_advance, 1107 utf16_iter_set_position 847 1108 }; 848 1109 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 849 1110 Parrot_register_encoding(interp, "utf16", return_encoding); -
src/string/encoding/utf8.c
diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index e81505f..fc8c262 100644
a b 170 170 __attribute__nonnull__(2) 171 171 FUNC_MODIFIES(*i); 172 172 173 static UINTVAL utf8_iter_get(PARROT_INTERP, 174 ARGIN(const STRING *str), 175 ARGIN(const String_iter *i), 176 INTVAL offset) 177 __attribute__nonnull__(1) 178 __attribute__nonnull__(2) 179 __attribute__nonnull__(3); 180 181 static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, 182 ARGIN(const STRING *str), 183 ARGMOD(String_iter *i)) 184 __attribute__nonnull__(1) 185 __attribute__nonnull__(2) 186 __attribute__nonnull__(3) 187 FUNC_MODIFIES(*i); 188 189 static void utf8_iter_set_and_advance(PARROT_INTERP, 190 ARGMOD(STRING *str), 191 ARGMOD(String_iter *i), 192 UINTVAL c) 193 __attribute__nonnull__(1) 194 __attribute__nonnull__(2) 195 __attribute__nonnull__(3) 196 FUNC_MODIFIES(*str) 197 FUNC_MODIFIES(*i); 198 199 static void utf8_iter_set_position(SHIM_INTERP, 200 ARGIN(const STRING *str), 201 ARGMOD(String_iter *i), 202 UINTVAL pos) 203 __attribute__nonnull__(2) 204 __attribute__nonnull__(3) 205 FUNC_MODIFIES(*i); 206 207 static void utf8_iter_skip(SHIM_INTERP, 208 ARGIN(const STRING *str), 209 ARGMOD(String_iter *i), 210 INTVAL skip) 211 __attribute__nonnull__(2) 212 __attribute__nonnull__(3) 213 FUNC_MODIFIES(*i); 214 173 215 static void utf8_set_position(SHIM_INTERP, 174 216 ARGMOD(String_iter *i), 175 217 UINTVAL pos) … … 244 286 #define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 245 287 PARROT_ASSERT_ARG(interp) \ 246 288 , PARROT_ASSERT_ARG(i)) 289 #define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 290 PARROT_ASSERT_ARG(interp) \ 291 , PARROT_ASSERT_ARG(str) \ 292 , PARROT_ASSERT_ARG(i)) 293 #define ASSERT_ARGS_utf8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 294 PARROT_ASSERT_ARG(interp) \ 295 , PARROT_ASSERT_ARG(str) \ 296 , PARROT_ASSERT_ARG(i)) 297 #define ASSERT_ARGS_utf8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 298 PARROT_ASSERT_ARG(interp) \ 299 , PARROT_ASSERT_ARG(str) \ 300 , PARROT_ASSERT_ARG(i)) 301 #define ASSERT_ARGS_utf8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 302 PARROT_ASSERT_ARG(str) \ 303 , PARROT_ASSERT_ARG(i)) 304 #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 305 PARROT_ASSERT_ARG(str) \ 306 , PARROT_ASSERT_ARG(i)) 247 307 #define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 248 308 PARROT_ASSERT_ARG(i)) 249 309 #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 456 516 457 517 /* 458 518 519 =item C<static UINTVAL utf8_iter_get(PARROT_INTERP, const STRING *str, const 520 String_iter *i, INTVAL offset)> 521 522 Get the character at C<i> plus C<offset>. 523 524 =cut 525 526 */ 527 528 static UINTVAL 529 utf8_iter_get(PARROT_INTERP, 530 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 531 { 532 ASSERT_ARGS(utf8_iter_get) 533 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 534 535 if (offset > 0) { 536 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, offset); 537 } 538 else if (offset < 0) { 539 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -offset); 540 } 541 542 return utf8_decode(interp, u8ptr); 543 } 544 545 /* 546 547 =item C<static void utf8_iter_skip(PARROT_INTERP, const STRING *str, String_iter 548 *i, INTVAL skip)> 549 550 Moves the string iterator C<i> by C<skip> characters. 551 552 =cut 553 554 */ 555 556 static void 557 utf8_iter_skip(SHIM_INTERP, 558 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 559 { 560 ASSERT_ARGS(utf8_iter_skip) 561 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 562 563 if (skip > 0) { 564 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, skip); 565 } 566 else if (skip < 0) { 567 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -skip); 568 } 569 570 i->charpos += skip; 571 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 572 } 573 574 /* 575 576 =item C<static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, const STRING 577 *str, String_iter *i)> 578 579 The UTF-8 implementation of the string iterator's C<get_and_advance> 580 function. 581 582 =cut 583 584 */ 585 586 static UINTVAL 587 utf8_iter_get_and_advance(PARROT_INTERP, 588 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 589 { 590 ASSERT_ARGS(utf8_iter_get_and_advance) 591 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 592 UINTVAL c = *u8ptr; 593 594 if (UTF8_IS_START(c)) { 595 UINTVAL len = UTF8SKIP(u8ptr); 596 597 c &= UTF8_START_MASK(len); 598 i->bytepos += len; 599 for (len--; len; len--) { 600 u8ptr++; 601 602 if (!UTF8_IS_CONTINUATION(*u8ptr)) 603 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 604 "Malformed UTF-8 string\n"); 605 c = UTF8_ACCUMULATE(c, *u8ptr); 606 } 607 608 if (UNICODE_IS_SURROGATE(c)) 609 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 610 "Surrogate in UTF-8 string\n"); 611 } 612 else if (!UNICODE_IS_INVARIANT(c)) { 613 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 614 "Malformed UTF-8 string\n"); 615 } 616 else { 617 i->bytepos++; 618 } 619 620 i->charpos++; 621 return c; 622 } 623 624 /* 625 626 =item C<static void utf8_iter_set_and_advance(PARROT_INTERP, STRING *str, 627 String_iter *i, UINTVAL c)> 628 629 The UTF-8 implementation of the string iterator's C<set_and_advance> 630 function. 631 632 =cut 633 634 */ 635 636 static void 637 utf8_iter_set_and_advance(PARROT_INTERP, 638 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 639 { 640 ASSERT_ARGS(utf8_iter_set_and_advance) 641 unsigned char * const pos = (unsigned char *)str->strstart + i->bytepos; 642 unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c); 643 644 i->bytepos += (new_pos - pos); 645 /* XXX possible buffer overrun exception? */ 646 PARROT_ASSERT(i->bytepos <= Buffer_buflen(str)); 647 i->charpos++; 648 } 649 650 /* 651 652 =item C<static void utf8_iter_set_position(PARROT_INTERP, const STRING *str, 653 String_iter *i, UINTVAL pos)> 654 655 The UTF-8 implementation of the string iterator's C<set_position> 656 function. 657 658 =cut 659 660 */ 661 662 static void 663 utf8_iter_set_position(SHIM_INTERP, 664 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL pos) 665 { 666 ASSERT_ARGS(utf8_iter_set_position) 667 const utf8_t *u8ptr = (const utf8_t *)str->strstart; 668 669 if (pos == 0) { 670 i->charpos = 0; 671 i->bytepos = 0; 672 return; 673 } 674 675 /* 676 * we know the byte offsets of three positions: start, current and end 677 * now find the shortest way to reach pos 678 */ 679 if (pos < i->charpos) { 680 if (pos <= (i->charpos >> 1)) { 681 /* go forward from start */ 682 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, pos); 683 } 684 else { 685 /* go backward from current */ 686 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + i->bytepos, i->charpos - pos); 687 } 688 } 689 else { 690 const UINTVAL len = str->strlen; 691 if (pos <= i->charpos + ((len - i->charpos) >> 1)) { 692 /* go forward from current */ 693 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr + i->bytepos, pos - i->charpos); 694 } 695 else { 696 /* go backward from end */ 697 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + str->bufused, len - pos); 698 } 699 } 700 701 i->charpos = pos; 702 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 703 } 704 705 /* 706 459 707 =item C<static UINTVAL utf8_decode_and_advance(PARROT_INTERP, String_iter *i)> 460 708 461 709 The UTF-8 implementation of the string iterator's C<get_and_advance> … … 582 830 { 583 831 ASSERT_ARGS(to_encoding) 584 832 STRING *result; 585 String_iter src_iter;586 UINTVAL offs,dest_len, dest_pos, src_len;833 const ENCODING *src_encoding; 834 UINTVAL dest_len, dest_pos, src_len; 587 835 const int in_place = (dest == NULL); 588 836 unsigned char *new_pos, *pos, *p; 589 837 … … 597 845 result = dest; 598 846 } 599 847 600 /* init iter before possilby changing encoding*/601 ENCODING_ITER_INIT(interp, src, &src_iter);848 /* save source encoding before possibly changing it */ 849 src_encoding = src->encoding; 602 850 result->charset = Parrot_unicode_charset_ptr; 603 851 result->encoding = Parrot_utf8_encoding_ptr; 604 852 result->strlen = src_len; … … 621 869 result->bufused = dest_len; 622 870 } 623 871 else { 872 String_iter src_iter; 873 STRING_ITER_INIT(interp, &src_iter); 624 874 dest_len = src_len; 625 875 dest_pos = 0; 626 for (offs = 0; offs < src_len; ++offs) {627 const UINTVAL c = src_ iter.get_and_advance(interp, &src_iter);876 while (src_iter.charpos < src_len) { 877 const UINTVAL c = src_encoding->iter_get_and_advance(interp, src, &src_iter); 628 878 if (dest_len - dest_pos < 6) { 629 UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);879 UINTVAL need = (UINTVAL)((src->strlen - src_iter.charpos + 1) * 1.5); 630 880 if (need < 16) 631 881 need = 16; 632 882 dest_len += need; … … 790 1040 String_iter iter; 791 1041 UINTVAL start; 792 1042 793 iter_init(interp, src, &iter);1043 STRING_ITER_INIT(interp, &iter); 794 1044 795 1045 if (offset) 796 iter.set_position(interp, &iter, offset);1046 utf8_iter_set_position(interp, src, &iter, offset); 797 1047 798 1048 start = iter.bytepos; 799 1049 return_string->strstart = (char *)return_string->strstart + start; 800 1050 801 1051 if (count) 802 iter.set_position(interp, &iter, offset + count);1052 utf8_iter_set_position(interp, src, &iter, offset + count); 803 1053 804 1054 return_string->bufused = iter.bytepos - start; 805 1055 return_string->strlen = count; … … 860 1110 UINTVAL start; 861 1111 862 1112 Parrot_str_reuse_COW(interp, src, return_string); 863 iter_init(interp, src, &iter);864 iter.set_position(interp, &iter, offset);1113 STRING_ITER_INIT(interp, &iter); 1114 utf8_iter_set_position(interp, src, &iter, offset); 865 1115 866 1116 start = iter.bytepos; 867 1117 868 1118 return_string->strstart = (char *)return_string->strstart + start; 869 iter.set_position(interp, &iter, offset + count);1119 utf8_iter_set_position(interp, src, &iter, offset + count); 870 1120 871 1121 return_string->bufused = iter.bytepos - start; 872 1122 return_string->strlen = count; … … 973 1223 * this is used to initially calculate src->strlen, 974 1224 * therefore we must scan the whole string 975 1225 */ 976 iter_init(interp, src, &iter);1226 STRING_ITER_INIT(interp, &iter); 977 1227 while (iter.bytepos < src->bufused) 978 iter.get_and_advance(interp, &iter);1228 utf8_iter_get_and_advance(interp, src, &iter); 979 1229 return iter.charpos; 980 1230 } 981 1231 … … 1055 1305 codepoints, 1056 1306 bytes, 1057 1307 iter_init, 1058 find_cclass 1308 find_cclass, 1309 utf8_iter_get, 1310 utf8_iter_skip, 1311 utf8_iter_get_and_advance, 1312 utf8_iter_set_and_advance, 1313 utf8_iter_set_position 1059 1314 }; 1060 1315 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 1061 1316 Parrot_register_encoding(interp, "utf8", return_encoding);