Ticket #1456: string-iter-v7.diff
| File string-iter-v7.diff, 84.2 KB (added by nwellnhof, 3 years ago) |
|---|
-
include/parrot/encoding.h
diff --git a/include/parrot/encoding.h b/include/parrot/encoding.h index 84d42eb..5965ad6 100644
a b 32 32 33 33 typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src, 34 34 struct string_iterator_t *); 35 typedef UINTVAL (*encoding_iter_get_t)( 36 PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset); 37 typedef void (*encoding_iter_skip_t)( 38 PARROT_INTERP, const STRING *str, String_iter *i, INTVAL skip); 39 typedef UINTVAL (*encoding_iter_get_and_advance_t)( 40 PARROT_INTERP, const STRING *str, String_iter *i); 41 typedef void (*encoding_iter_set_and_advance_t)( 42 PARROT_INTERP, STRING *str, String_iter *i, UINTVAL c); 43 typedef void (*encoding_iter_set_position_t)( 44 PARROT_INTERP, const STRING *str, String_iter *i, UINTVAL pos); 35 45 36 46 struct _encoding { 37 47 ARGIN(const char *name); … … 47 57 encoding_iter_init_t iter_init; 48 58 encoding_find_cclass_t find_cclass; 49 59 encoding_hash_t hash; 60 encoding_iter_get_t iter_get; 61 encoding_iter_skip_t iter_skip; 62 encoding_iter_get_and_advance_t iter_get_and_advance; 63 encoding_iter_set_and_advance_t iter_set_and_advance; 64 encoding_iter_set_position_t iter_set_position; 50 65 }; 51 66 52 67 typedef struct _encoding ENCODING; -
include/parrot/string.h
diff --git a/include/parrot/string.h b/include/parrot/string.h index 8914db3..d02f5c1 100644
a b 38 38 void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos); 39 39 } String_iter; 40 40 41 #define STRING_ITER_INIT(i, iter) \ 42 (iter)->charpos = (iter)->bytepos = 0 43 #define STRING_ITER_GET(i, str, iter, offset) \ 44 ((str)->encoding)->iter_get((i), (str), (iter), (offset)) 45 #define STRING_ITER_SKIP(i, str, iter, skip) \ 46 ((str)->encoding)->iter_skip((i), (str), (iter), (skip)) 47 #define STRING_ITER_GET_AND_ADVANCE(i, str, iter) \ 48 ((str)->encoding)->iter_get_and_advance((i), (str), (iter)) 49 #define STRING_ITER_SET_AND_ADVANCE(i, str, iter, c) \ 50 ((str)->encoding)->iter_set_and_advance((i), (str), (iter), (c)) 51 #define STRING_ITER_SET_POSITION(i, str, iter, pos) \ 52 ((str)->encoding)->iter_set_position((i), (str), (iter), (pos)) 53 41 54 #define STREQ(x, y) (strcmp((x), (y))==0) 42 55 #define STRNEQ(x, y) (strcmp((x), (y))!=0) 43 56 -
include/parrot/string_funcs.h
diff --git a/include/parrot/string_funcs.h b/include/parrot/string_funcs.h index 7c35265..63501fe 100644
a b 226 226 INTVAL Parrot_str_is_null(SHIM_INTERP, ARGIN_NULLOK(const STRING *s)); 227 227 228 228 PARROT_EXPORT 229 INTVAL Parrot_str_iter_index(PARROT_INTERP, 230 ARGIN(const STRING *src), 231 ARGMOD(String_iter *start), 232 ARGOUT(String_iter *end), 233 ARGIN(const STRING *search)) 234 __attribute__nonnull__(1) 235 __attribute__nonnull__(2) 236 __attribute__nonnull__(3) 237 __attribute__nonnull__(4) 238 __attribute__nonnull__(5) 239 FUNC_MODIFIES(*start) 240 FUNC_MODIFIES(*end); 241 242 PARROT_EXPORT 243 PARROT_CANNOT_RETURN_NULL 244 PARROT_WARN_UNUSED_RESULT 245 STRING * Parrot_str_iter_substr(PARROT_INTERP, 246 ARGIN(const STRING *str), 247 ARGIN(const String_iter *l), 248 ARGIN_NULLOK(const String_iter *r)) 249 __attribute__nonnull__(1) 250 __attribute__nonnull__(2) 251 __attribute__nonnull__(3); 252 253 PARROT_EXPORT 229 254 PARROT_WARN_UNUSED_RESULT 230 255 PARROT_CANNOT_RETURN_NULL 231 256 STRING* Parrot_str_join(PARROT_INTERP, … … 559 584 PARROT_ASSERT_ARG(interp) \ 560 585 , PARROT_ASSERT_ARG(s)) 561 586 #define ASSERT_ARGS_Parrot_str_is_null __attribute__unused__ int _ASSERT_ARGS_CHECK = (0) 587 #define ASSERT_ARGS_Parrot_str_iter_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 588 PARROT_ASSERT_ARG(interp) \ 589 , PARROT_ASSERT_ARG(src) \ 590 , PARROT_ASSERT_ARG(start) \ 591 , PARROT_ASSERT_ARG(end) \ 592 , PARROT_ASSERT_ARG(search)) 593 #define ASSERT_ARGS_Parrot_str_iter_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 594 PARROT_ASSERT_ARG(interp) \ 595 , PARROT_ASSERT_ARG(str) \ 596 , PARROT_ASSERT_ARG(l)) 562 597 #define ASSERT_ARGS_Parrot_str_join __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 563 598 PARROT_ASSERT_ARG(interp) \ 564 599 , PARROT_ASSERT_ARG(ar)) -
src/io/utf8.c
diff --git a/src/io/utf8.c b/src/io/utf8.c index a4dc6a4..1c5b034 100644
a b 57 57 s->encoding = Parrot_utf8_encoding_ptr; 58 58 59 59 /* count chars, verify utf8 */ 60 Parrot_utf8_encoding_ptr->iter_init(interp, s, &iter);60 STRING_ITER_INIT(interp, &iter); 61 61 62 62 while (iter.bytepos < s->bufused) { 63 63 if (iter.bytepos + 4 > s->bufused) { … … 84 84 85 85 s->strlen = iter.charpos; 86 86 s = Parrot_str_concat(interp, s, s2); 87 /* String is updated. Poke into iterator to replace old string */88 iter.str = s;89 87 *buf = s; 90 88 len += len2 + 1; 91 89 … … 93 91 } 94 92 } 95 93 ok: 96 iter.get_and_advance(interp, &iter);94 Parrot_utf8_encoding_ptr->iter_get_and_advance(interp, *buf, &iter); 97 95 } 98 96 s->strlen = iter.charpos; 99 97 return len; -
src/pmc/stringiterator.pmc
diff --git a/src/pmc/stringiterator.pmc b/src/pmc/stringiterator.pmc index ce9001c..bda81b7 100644
a b 27 27 /* HEADERIZER END: static */ 28 28 29 29 pmclass StringIterator auto_attrs extends Iterator { 30 ATTR PMC *string; /* String to iterate over */ 31 ATTR INTVAL pos; /* Current position of iterator for forward iterator */ 32 /* Previous position of iterator for reverse iterator */ 33 ATTR INTVAL length; /* Length of C<string> */ 34 ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */ 30 ATTR STRING *str_val; /* String to iterate over */ 31 ATTR String_iter iter; /* String iterator */ 32 ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */ 35 33 36 34 /* 37 35 … … 43 41 44 42 */ 45 43 VTABLE void init_pmc(PMC *string) { 46 SET_ATTR_string(INTERP, SELF, string); 44 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 45 STRING * const str_val = VTABLE_get_string(INTERP, string); 46 47 SET_ATTR_str_val(INTERP, SELF, str_val); 48 STRING_ITER_INIT(INTERP, iter); 49 SET_ATTR_reverse(INTERP, SELF, ITERATE_FROM_START); 47 50 48 /* by default, iterate from start */49 SELF.set_integer_native(ITERATE_FROM_START);50 51 PObj_custom_mark_SET(SELF); 51 52 } 52 53 … … 61 62 */ 62 63 63 64 VTABLE void mark() { 64 PMC *string; 65 GET_ATTR_string(INTERP, SELF, string); 66 Parrot_gc_mark_PMC_alive(INTERP, string); 65 STRING *str_val; 66 67 GET_ATTR_str_val(INTERP, SELF, str_val); 68 Parrot_gc_mark_STRING_alive(INTERP, str_val); 67 69 } 68 70 69 71 /* … … 74 76 75 77 */ 76 78 VTABLE PMC* clone() { 77 Parrot_StringIterator_attributes * const attrs = 78 PARROT_STRINGITERATOR(SELF); 79 PMC * const clone = 80 Parrot_pmc_new_init(INTERP, enum_class_StringIterator, attrs->string); 81 Parrot_StringIterator_attributes * const clone_attrs = 82 PARROT_STRINGITERATOR(clone); 83 84 clone_attrs->pos = attrs->pos; 85 clone_attrs->reverse = attrs->reverse; 79 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 80 PMC *clone, *str_pmc; 81 String_iter *clone_iter; 82 STRING *str_val; 83 INTVAL reverse; 84 85 str_pmc = Parrot_pmc_new(INTERP, enum_class_String); 86 GET_ATTR_str_val(INTERP, SELF, str_val); 87 VTABLE_set_string_native(INTERP, str_pmc, str_val); 88 clone = Parrot_pmc_new_init(INTERP, enum_class_StringIterator, str_pmc); 89 clone_iter = &PARROT_STRINGITERATOR(clone)->iter; 90 *clone_iter = *iter; 91 GET_ATTR_reverse(INTERP, SELF, reverse); 92 SET_ATTR_reverse(INTERP, clone, reverse); 93 86 94 return clone; 87 95 } 88 96 … … 111 119 */ 112 120 113 121 VTABLE INTVAL elements() { 114 Parrot_StringIterator_attributes * const attrs = 115 PARROT_STRINGITERATOR(SELF); 116 if (attrs->reverse) 117 return attrs->pos; 122 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 123 STRING *str_val; 124 INTVAL reverse; 125 126 GET_ATTR_str_val(INTERP, SELF, str_val); 127 GET_ATTR_reverse(INTERP, SELF, reverse); 128 129 if (reverse) 130 return iter->charpos; 118 131 else 119 return attrs->length - attrs->pos;132 return str_val->strlen - iter->charpos; 120 133 } 121 134 122 135 VTABLE INTVAL get_integer() { … … 137 150 */ 138 151 139 152 VTABLE void set_integer_native(INTVAL value) { 140 Parrot_StringIterator_attributes * const attrs = 141 PARROT_STRINGITERATOR(SELF); 142 switch (value) { 143 case ITERATE_FROM_START: 144 attrs->reverse = 0; 145 attrs->pos = 0; 146 attrs->length = VTABLE_elements(INTERP, attrs->string); 147 break; 148 case ITERATE_FROM_END: 149 attrs->reverse = 1; 150 attrs->pos = attrs->length 151 = VTABLE_elements(INTERP, attrs->string); 152 break; 153 default: 153 STRING *str_val; 154 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 155 156 GET_ATTR_str_val(INTERP, SELF, str_val); 157 if (value == ITERATE_FROM_START) { 158 SET_ATTR_reverse(INTERP, SELF, 0); 159 STRING_ITER_SET_POSITION(INTERP, str_val, iter, 0); 160 } 161 else if (value == ITERATE_FROM_END) { 162 SET_ATTR_reverse(INTERP, SELF, 1); 163 STRING_ITER_SET_POSITION(INTERP, str_val, iter, str_val->strlen); 164 } 165 else { 154 166 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_OPERATION, 155 167 "Wrong direction for StringIterator"); 156 168 } … … 167 179 */ 168 180 169 181 VTABLE PMC *get_pmc() { 170 PMC *string; 171 GET_ATTR_string(INTERP, SELF, string); 172 return string ? string : PMCNULL; 182 PMC * const string = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type( 183 interp, enum_class_String)); 184 STRING *str_val; 185 186 GET_ATTR_str_val(INTERP, SELF, str_val); 187 VTABLE_set_string_native(interp, string, str_val); 188 return string; 173 189 } 174 190 175 191 /* … … 182 198 183 199 */ 184 200 VTABLE PMC *shift_pmc() { 185 Parrot_StringIterator_attributes * const attrs = 186 PARROT_STRINGITERATOR(SELF); 201 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 187 202 PMC *ret; 203 STRING *str_val, *substr; 204 const String_iter old_iter = *iter; 188 205 189 if (attrs->pos >= attrs->length) 206 GET_ATTR_str_val(INTERP, SELF, str_val); 207 if (iter->charpos >= str_val->strlen) 190 208 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 191 209 "StopIteration"); 192 210 193 211 ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String)); 194 VTABLE_set_string_native(INTERP, ret, 195 VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++)); 212 STRING_ITER_SKIP(INTERP, str_val, iter, 1); 213 substr = Parrot_str_iter_substr(INTERP, str_val, &old_iter, iter); 214 VTABLE_set_string_native(INTERP, ret, substr); 196 215 return ret; 197 216 } 198 217 … … 206 225 207 226 */ 208 227 VTABLE STRING *shift_string() { 209 Parrot_StringIterator_attributes * const attrs = 210 PARROT_STRINGITERATOR(SELF); 228 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 229 STRING *str_val; 230 const String_iter old_iter = *iter; 211 231 212 if (attrs->pos >= attrs->length) 232 GET_ATTR_str_val(INTERP, SELF, str_val); 233 if (iter->charpos >= str_val->strlen) 213 234 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 214 235 "StopIteration"); 215 236 216 return VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++); 237 STRING_ITER_SKIP(INTERP, str_val, iter, 1); 238 return Parrot_str_iter_substr(INTERP, str_val, &old_iter, iter); 217 239 } 218 240 219 241 /* … … 226 248 227 249 */ 228 250 VTABLE INTVAL shift_integer() { 229 Parrot_StringIterator_attributes * const attrs =230 PARROT_STRINGITERATOR(SELF);251 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 252 STRING *str_val; 231 253 232 if (attrs->pos >= attrs->length) 254 GET_ATTR_str_val(INTERP, SELF, str_val); 255 if (iter->charpos >= str_val->strlen) 233 256 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 234 257 "StopIteration"); 235 258 236 return VTABLE_get_integer_keyed_int(INTERP, attrs->string, attrs->pos++);259 return STRING_ITER_GET_AND_ADVANCE(INTERP, str_val, iter); 237 260 } 238 261 239 262 /* … … 246 269 247 270 */ 248 271 VTABLE PMC *pop_pmc() { 249 Parrot_StringIterator_attributes * const attrs =250 PARROT_STRINGITERATOR(SELF);272 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 273 STRING *str_val, *substr; 251 274 PMC *ret; 275 const String_iter old_iter = *iter; 252 276 253 if (!STATICSELF.get_bool()) 277 GET_ATTR_str_val(INTERP, SELF, str_val); 278 /* Shouldn't this test be (iter->charpos <= 0) ? */ 279 if (SELF.elements() <= 0) 254 280 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 255 281 "StopIteration"); 256 282 257 283 ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String)); 258 VTABLE_set_string_native(INTERP, ret, 259 VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos)); 284 STRING_ITER_SKIP(INTERP, str_val, iter, -1); 285 substr = Parrot_str_iter_substr(INTERP, str_val, iter, &old_iter); 286 VTABLE_set_string_native(INTERP, ret, substr); 260 287 return ret; 261 288 } 262 289 … … 270 297 271 298 */ 272 299 VTABLE STRING *pop_string() { 273 Parrot_StringIterator_attributes * const attrs = 274 PARROT_STRINGITERATOR(SELF); 300 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 301 STRING *str_val; 302 const String_iter old_iter = *iter; 275 303 276 if (!STATICSELF.get_bool()) 304 GET_ATTR_str_val(INTERP, SELF, str_val); 305 /* Shouldn't this test be (iter->charpos <= 0) ? */ 306 if (SELF.elements() <= 0) 277 307 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 278 308 "StopIteration"); 279 309 280 return VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos); 310 STRING_ITER_SKIP(INTERP, str_val, iter, -1); 311 return Parrot_str_iter_substr(INTERP, str_val, iter, &old_iter); 281 312 } 282 313 283 314 /* … … 290 321 291 322 */ 292 323 VTABLE INTVAL pop_integer() { 293 Parrot_StringIterator_attributes * const attrs =294 PARROT_STRINGITERATOR(SELF);324 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 325 STRING *str_val; 295 326 296 if (!STATICSELF.get_bool()) 327 GET_ATTR_str_val(INTERP, SELF, str_val); 328 /* Shouldn't this test be (iter->charpos <= 0) ? */ 329 if (SELF.elements() <= 0) 297 330 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 298 331 "StopIteration"); 299 332 300 return VTABLE_get_integer_keyed_int(INTERP, attrs->string, --attrs->pos); 333 STRING_ITER_SKIP(INTERP, str_val, iter, -1); 334 return STRING_ITER_GET(INTERP, str_val, iter, 0); 301 335 } 302 336 303 337 /* … … 311 345 */ 312 346 313 347 VTABLE INTVAL get_integer_keyed_int(INTVAL idx) { 314 return VTABLE_get_integer_keyed_int(INTERP, STATICSELF.get_pmc(), 315 PARROT_STRINGITERATOR(SELF)->pos + idx); 348 String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter; 349 STRING *str_val; 350 const UINTVAL offset = iter->charpos + idx; 351 352 GET_ATTR_str_val(INTERP, SELF, str_val); 353 if (offset >= str_val->strlen) 354 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 355 "StopIteration"); 356 357 return STRING_ITER_GET(INTERP, str_val, iter, idx); 316 358 } 317 359 318 360 /* … … 326 368 */ 327 369 328 370 VTABLE STRING *get_string_keyed_int(INTVAL idx) { 329 return VTABLE_get_string_keyed_int(INTERP, STATICSELF.get_pmc(), 330 PARROT_STRINGITERATOR(SELF)->pos + idx); 371 String_iter iter = PARROT_STRINGITERATOR(SELF)->iter; 372 String_iter next_iter; 373 STRING *str_val; 374 const UINTVAL offset = iter.charpos + idx; 375 376 GET_ATTR_str_val(INTERP, SELF, str_val); 377 if (offset >= str_val->strlen) 378 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS, 379 "StopIteration"); 380 381 if (idx != 0) 382 STRING_ITER_SKIP(INTERP, str_val, &iter, idx); 383 next_iter = iter; 384 STRING_ITER_SKIP(INTERP, str_val, &next_iter, 1); 385 386 return Parrot_str_iter_substr(INTERP, str_val, &iter, &next_iter); 331 387 } 332 388 } 333 389 -
src/string/api.c
diff --git a/src/string/api.c b/src/string/api.c index e37354e..e2a558e 100644
a b 1104 1104 return CHARSET_GET_CODEPOINTS(interp, src, true_offset, true_length); 1105 1105 } 1106 1106 1107 /* 1108 1109 =item C<STRING * Parrot_str_iter_substr(PARROT_INTERP, const STRING *str, const 1110 String_iter *l, const String_iter *r)> 1111 1112 Returns the substring between iterators C<l> and C<r>. 1113 1114 =cut 1115 1116 */ 1117 1118 PARROT_EXPORT 1119 PARROT_CANNOT_RETURN_NULL 1120 PARROT_WARN_UNUSED_RESULT 1121 STRING * 1122 Parrot_str_iter_substr(PARROT_INTERP, 1123 ARGIN(const STRING *str), 1124 ARGIN(const String_iter *l), ARGIN_NULLOK(const String_iter *r)) 1125 { 1126 ASSERT_ARGS(Parrot_str_iter_substr) 1127 STRING *dest = Parrot_str_copy(interp, str); 1128 1129 dest->strstart = (char *)dest->strstart + l->bytepos; 1130 1131 if (r == NULL) { 1132 dest->bufused = str->bufused - l->bytepos; 1133 dest->strlen = str->strlen - l->charpos; 1134 } 1135 else { 1136 dest->bufused = r->bytepos - l->bytepos; 1137 dest->strlen = r->charpos - l->charpos; 1138 } 1139 1140 dest->hashval = 0; 1141 1142 return dest; 1143 } 1144 1145 /* 1146 1147 =item C<INTVAL Parrot_str_iter_index(PARROT_INTERP, const STRING *src, 1148 String_iter *start, String_iter *end, const STRING *search)> 1149 1150 Find the next occurence of STRING C<search> in STRING C<src> starting at 1151 String_iter C<start>. If C<search> is found C<start> is modified to mark the 1152 beginning of C<search> and String_iter C<end> is set to the character after 1153 C<search> in C<src>. Returns the character position where C<search> was found 1154 or -1 if it wasn't found. 1155 1156 =cut 1157 1158 */ 1159 1160 PARROT_EXPORT 1161 INTVAL 1162 Parrot_str_iter_index(PARROT_INTERP, 1163 ARGIN(const STRING *src), 1164 ARGMOD(String_iter *start), ARGOUT(String_iter *end), 1165 ARGIN(const STRING *search)) 1166 { 1167 ASSERT_ARGS(Parrot_str_iter_index) 1168 String_iter search_iter, search_start, next_start; 1169 const UINTVAL len = search->strlen; 1170 UINTVAL c0; 1171 1172 if (len == 0) { 1173 *end = *start; 1174 return start->charpos; 1175 } 1176 1177 STRING_ITER_INIT(interp, &search_iter); 1178 c0 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter); 1179 search_start = search_iter; 1180 next_start = *start; 1181 1182 while (start->charpos + len <= src->strlen) { 1183 UINTVAL c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, &next_start); 1184 1185 if (c1 == c0) { 1186 UINTVAL c2; 1187 *end = next_start; 1188 1189 do { 1190 if (search_iter.charpos >= len) 1191 return start->charpos; 1192 c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, end); 1193 c2 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter); 1194 } while (c1 == c2); 1195 1196 search_iter = search_start; 1197 } 1198 1199 *start = next_start; 1200 } 1201 1202 return -1; 1203 } 1204 1107 1205 1108 1206 /* 1109 1207 … … 1145 1243 UINTVAL true_offset = (UINTVAL)offset; 1146 1244 UINTVAL true_length = (UINTVAL)length; 1147 1245 1148 UINTVAL start_byte, end_byte ;1246 UINTVAL start_byte, end_byte, start_char, end_char; 1149 1247 INTVAL buf_size; 1150 1248 1151 1249 if (STRING_IS_NULL(src)) { … … 1181 1279 } 1182 1280 1183 1281 /* get byte position of the part that will be replaced */ 1184 ENCODING_ITER_INIT(interp, src, &iter);1282 STRING_ITER_INIT(interp, &iter); 1185 1283 1186 iter.set_position(interp, &iter, true_offset);1284 STRING_ITER_SET_POSITION(interp, src, &iter, true_offset); 1187 1285 start_byte = iter.bytepos; 1286 start_char = iter.charpos; 1188 1287 1189 iter.set_position(interp, &iter, true_offset +true_length);1288 STRING_ITER_SKIP(interp, src, &iter, true_length); 1190 1289 end_byte = iter.bytepos; 1290 end_char = iter.charpos; 1191 1291 1192 1292 /* not possible.... */ 1193 1293 if (end_byte < start_byte) … … 1226 1326 (char *)src->strstart + end_byte, 1227 1327 src->bufused - end_byte); 1228 1328 1229 dest->strlen = CHARSET_CODEPOINTS(interp, dest);1329 dest->strlen = src->strlen - (end_char - start_char) + rep->strlen; 1230 1330 dest->hashval = 0; 1231 1331 1232 1332 return dest; … … 1252 1352 ASSERT_ARGS(Parrot_str_chopn) 1253 1353 1254 1354 STRING * const chopped = Parrot_str_copy(interp, s); 1255 UINTVAL new_length , uchar_size;1355 UINTVAL new_length; 1256 1356 1257 1357 if (n < 0) { 1258 1358 new_length = -n; … … 1273 1373 return chopped; 1274 1374 } 1275 1375 1276 uchar_size = chopped->bufused / chopped->strlen;1277 chopped->strlen = new_length;1278 1279 1376 if (chopped->encoding == Parrot_fixed_8_encoding_ptr) { 1280 1377 chopped->bufused = new_length; 1281 1378 } 1282 1379 else if (chopped->encoding == Parrot_ucs2_encoding_ptr) { 1380 const UINTVAL uchar_size = chopped->bufused / chopped->strlen; 1283 1381 chopped->bufused = new_length * uchar_size; 1284 1382 } 1285 1383 else { 1286 1384 String_iter iter; 1287 1385 1288 ENCODING_ITER_INIT(interp, s, &iter);1289 iter.set_position(interp, &iter, new_length);1386 STRING_ITER_INIT(interp, &iter); 1387 STRING_ITER_SET_POSITION(interp, s, &iter, new_length); 1290 1388 chopped->bufused = iter.bytepos; 1291 1389 } 1292 1390 1391 chopped->strlen = new_length; 1392 1293 1393 return chopped; 1294 1394 } 1295 1395 … … 1860 1960 int sign = 1; 1861 1961 UINTVAL i = 0; 1862 1962 String_iter iter; 1863 UINTVAL offs;1864 1963 number_parse_state state = parse_start; 1865 1964 1866 ENCODING_ITER_INIT(interp, s, &iter);1965 STRING_ITER_INIT(interp, &iter); 1867 1966 1868 for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {1869 const UINTVAL c = iter.get_and_advance(interp, &iter);1967 while (state != parse_end && iter.charpos < s->strlen) { 1968 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 1870 1969 /* Check for overflow */ 1871 1970 if (c > 255) 1872 1971 break; … … 1956 2055 int d_length = 0; 1957 2056 int check_nan = 0; /* Check for NaN and Inf after main loop */ 1958 2057 String_iter iter; 1959 UINTVAL offs;1960 2058 number_parse_state state = parse_start; 1961 2059 1962 2060 if (STRING_IS_NULL(s)) 1963 2061 return 0.0; 1964 2062 1965 ENCODING_ITER_INIT(interp, s, &iter);2063 STRING_ITER_INIT(interp, &iter); 1966 2064 1967 2065 /* Handcrafter FSM to read float value */ 1968 for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {1969 const UINTVAL c = iter.get_and_advance(interp, &iter);2066 while (state != parse_end && iter.charpos < s->strlen) { 2067 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter); 1970 2068 /* Check for overflow */ 1971 2069 if (c > 255) 1972 2070 break; … … 2417 2515 Parrot_fixed_8_encoding_ptr, Parrot_ascii_charset_ptr, 0); 2418 2516 2419 2517 /* more work TODO */ 2420 ENCODING_ITER_INIT(interp, src, &iter);2518 STRING_ITER_INIT(interp, &iter); 2421 2519 dp = (unsigned char *)result->strstart; 2422 2520 2423 2521 for (i = 0; len > 0; --len) { 2424 UINTVAL c = iter.get_and_advance(interp, &iter);2522 UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 2425 2523 if (c < 0x7f) { 2426 2524 /* process ASCII chars */ 2427 2525 if (i >= charlen - 2) { … … 2561 2659 Parrot_gc_allocate_string_storage(interp, result, reserved); 2562 2660 result->bufused = reserved; 2563 2661 2564 src->encoding->iter_init(interp, src, &itersrc);2565 encoding->iter_init(interp, result, &iterdest);2662 STRING_ITER_INIT(interp, &itersrc); 2663 STRING_ITER_INIT(interp, &iterdest); 2566 2664 while (itersrc.bytepos < srclen) { 2567 INTVAL c = itersrc.get_and_advance(interp, &itersrc);2665 INTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc); 2568 2666 INTVAL next; 2569 2667 2570 2668 do { 2571 2669 pending = 0; 2572 2670 next = c; 2573 2671 if (c == '\\') { 2574 c = itersrc.get_and_advance(interp, &itersrc);2672 c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc); 2575 2673 switch (c) { 2576 2674 /* Common one char sequences */ 2577 2675 case 'a': next = '\a'; break; … … 2584 2682 case 'e': next = '\e'; break; 2585 2683 /* Escape character */ 2586 2684 case 'c': 2587 c = itersrc.get_and_advance(interp, &itersrc);2685 c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc); 2588 2686 /* This assumes ascii-alike encoding */ 2589 2687 if (c < 'A' || c > 'Z') 2590 2688 throw_illegal_escape(interp); … … 2592 2690 break; 2593 2691 case 'x': 2594 2692 digcount = 0; 2595 c = itersrc.get_and_advance(interp, &itersrc);2693 c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc); 2596 2694 if (c == '{') { 2597 2695 /* \x{h..h} 1..8 hex digits */ 2598 2696 while (itersrc.bytepos < srclen) { 2599 c = itersrc.get_and_advance(interp, &itersrc);2697 c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc); 2600 2698 if (c == '}') 2601 2699 break; 2602 2700 if (!isxdigit(c)) … … 2620 2718 pending = 0; 2621 2719 break; 2622 2720 } 2623 c = itersrc.get_and_advance(interp, &itersrc);2721 c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc); 2624 2722 } 2625 2723 } 2626 2724 if (digcount == 0) … … 2631 2729 case 'u': 2632 2730 /* \uhhhh 4 hex digits */ 2633 2731 for (digcount = 0; digcount < 4; ++digcount) { 2634 c = itersrc.get_and_advance(interp, &itersrc);2732 c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc); 2635 2733 if (!isxdigit(c)) 2636 2734 throw_illegal_escape(interp); 2637 2735 digbuf[digcount] = c; … … 2642 2740 case 'U': 2643 2741 /* \Uhhhhhhhh 8 hex digits */ 2644 2742 for (digcount = 0; digcount < 8; ++digcount) { 2645 c = itersrc.get_and_advance(interp, &itersrc);2743 c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc); 2646 2744 if (!isxdigit(c)) 2647 2745 throw_illegal_escape(interp); 2648 2746 digbuf[digcount] = c; … … 2655 2753 /* \ooo 1..3 oct digits */ 2656 2754 digbuf[0] = c; 2657 2755 for (digcount = 1; digcount < 3; ++digcount) { 2658 c = itersrc.get_and_advance(interp, &itersrc);2756 c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc); 2659 2757 if (c < '0' || c > '7') 2660 2758 break; 2661 2759 digbuf[digcount] = c; … … 2669 2767 next = c; 2670 2768 } 2671 2769 } 2672 iterdest.set_and_advance(interp, &iterdest, next);2770 STRING_ITER_SET_AND_ADVANCE(interp, result, &iterdest, next); 2673 2771 } while (pending); 2674 2772 } 2675 2773 result->bufused = iterdest.bytepos; … … 2750 2848 encoding = result->encoding; 2751 2849 } 2752 2850 2753 encoding->iter_init(interp, result, &iter);2851 STRING_ITER_INIT(interp, &iter); 2754 2852 2755 2853 for (offs = d = 0; offs < clength; ++offs) { 2756 2854 r = (Parrot_UInt4)((unsigned char *)result->strstart)[offs]; … … 2773 2871 } 2774 2872 2775 2873 PARROT_ASSERT(d < offs); 2776 iter.set_and_advance(interp, &iter, r);2874 encoding->iter_set_and_advance(interp, result, &iter, r); 2777 2875 ++d; 2778 2876 } 2779 2877 … … 3271 3369 ARGIN_NULLOK(const STRING *delim), ARGIN_NULLOK(STRING *str)) 3272 3370 { 3273 3371 ASSERT_ARGS(Parrot_str_split) 3274 PMC *res; 3275 INTVAL slen, dlen, ps, pe; 3372 PMC *res; 3373 STRING *tstr; 3374 UINTVAL slen, dlen; 3375 String_iter iter; 3276 3376 3277 3377 if (STRING_IS_NULL(delim) || STRING_IS_NULL(str)) 3278 3378 return PMCNULL; … … 3284 3384 if (!slen) 3285 3385 return res; 3286 3386 3387 STRING_ITER_INIT(interp, &iter); 3287 3388 dlen = Parrot_str_length(interp, delim); 3288 3389 3289 3390 if (dlen == 0) { 3290 int i;3291 3391 VTABLE_set_integer_native(interp, res, slen); 3292 3392 3293 for (i = 0; i < slen; ++i) { 3294 STRING * const p = Parrot_str_substr(interp, str, i, 1); 3295 VTABLE_set_string_keyed_int(interp, res, i, p); 3296 } 3297 3298 return res; 3299 } 3393 do { 3394 const String_iter old_iter = iter; 3300 3395 3301 pe = Parrot_str_find_index(interp, str, delim, 0); 3396 STRING_ITER_SKIP(interp, str, &iter, 1); 3397 tstr = Parrot_str_iter_substr(interp, str, &old_iter, &iter); 3398 VTABLE_set_string_keyed_int(interp, res, old_iter.charpos, tstr); 3399 } while (iter.charpos < slen); 3302 3400 3303 if (pe < 0) {3304 VTABLE_push_string(interp, res, str);3305 3401 return res; 3306 3402 } 3307 3403 3308 ps = 0; 3309 3310 while (ps <= slen) { 3311 const int pl = pe - ps; 3312 STRING * const tstr = Parrot_str_substr(interp, str, ps, pl); 3313 3314 VTABLE_push_string(interp, res, tstr); 3315 ps = pe + Parrot_str_length(interp, delim); 3404 do { 3405 String_iter start, end; 3406 INTVAL pos; 3316 3407 3317 if (ps > slen) 3408 start = iter; 3409 if (Parrot_str_iter_index(interp, str, &start, &end, delim) < 0) 3318 3410 break; 3319 3411 3320 pe = Parrot_str_find_index(interp, str, delim, ps); 3412 tstr = Parrot_str_iter_substr(interp, str, &iter, &start); 3413 VTABLE_push_string(interp, res, tstr); 3414 iter = end; 3415 } while (iter.charpos < slen); 3321 3416 3322 if (pe < 0) 3323 pe = slen; 3324 } 3417 tstr = Parrot_str_iter_substr(interp, str, &iter, NULL); 3418 VTABLE_push_string(interp, res, tstr); 3325 3419 3326 3420 return res; 3327 3421 } -
src/string/charset/ascii.c
diff --git a/src/string/charset/ascii.c b/src/string/charset/ascii.c index 5c8371a..761a60e 100644
a b 201 201 { 202 202 ASSERT_ARGS(to_ascii) 203 203 String_iter iter; 204 UINTVAL offs;205 204 unsigned char *p; 206 205 const UINTVAL len = src->strlen; 207 206 … … 209 208 STRING * const dest = Parrot_str_clone(interp, src); 210 209 211 210 p = (unsigned char *)dest->strstart; 212 ENCODING_ITER_INIT(interp, src, &iter);213 for (offs = 0; offs < len; ++offs) {214 const UINTVAL c = iter.get_and_advance(interp, &iter);211 STRING_ITER_INIT(interp, &iter); 212 while (iter.charpos < len) { 213 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 215 214 if (c >= 128) 216 215 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, 217 216 "can't convert unicode string to ascii"); … … 493 492 return ret_val < 0 ? -1 : 1; 494 493 } 495 494 else { 496 UINTVAL offs; 497 ENCODING_ITER_INIT(interp, rhs, &iter); 498 for (offs = 0; offs < min_len; ++offs) { 499 const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, offs); 500 const UINTVAL cr = iter.get_and_advance(interp, &iter); 495 STRING_ITER_INIT(interp, &iter); 496 while (iter.charpos < min_len) { 497 const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, iter.charpos); 498 const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &iter); 501 499 if (cl != cr) 502 500 return cl < cr ? -1 : 1; 503 501 } … … 531 529 UINTVAL offs) 532 530 { 533 531 ASSERT_ARGS(mixed_cs_index) 532 String_iter start, end; 534 533 535 if (search->strlen <= src->strlen) { 536 String_iter src_iter, search_iter; 537 const UINTVAL maxpos = src->strlen - search->strlen + 1; 538 const UINTVAL cfirst = Parrot_str_indexed(interp, search, 0); 539 540 ENCODING_ITER_INIT(interp, src, &src_iter); 541 src_iter.set_position(interp, &src_iter, offs); 542 ENCODING_ITER_INIT(interp, search, &search_iter); 543 544 while (src_iter.charpos < maxpos) { 545 if (cfirst == src_iter.get_and_advance(interp, &src_iter)) { 546 const INTVAL next_pos = src_iter.charpos; 547 const INTVAL next_byte = src_iter.bytepos; 548 UINTVAL len; 549 search_iter.set_position(interp, &search_iter, 1); 550 for (len = search->strlen - 1; len; --len) { 551 if ((src_iter.get_and_advance(interp, &src_iter)) != 552 (search_iter.get_and_advance(interp, &search_iter))) 553 break; 554 } 555 if (len == 0) 556 return next_pos - 1; 557 src_iter.charpos = next_pos; 558 src_iter.bytepos = next_byte; 559 } 560 } 561 } 562 return -1; 534 STRING_ITER_INIT(interp, &start); 535 STRING_ITER_SET_POSITION(interp, src, &start, offs); 536 537 return Parrot_str_iter_index(interp, src, &start, &end, search); 563 538 } 564 539 565 540 /* … … 638 613 validate(PARROT_INTERP, ARGIN(const STRING *src)) 639 614 { 640 615 ASSERT_ARGS(validate) 641 INTVAL offset;642 616 String_iter iter; 643 617 const INTVAL length = Parrot_str_length(interp, src); 644 618 645 ENCODING_ITER_INIT(interp, src, &iter);646 for (offset = 0; offset < length; ++offset) {647 const UINTVAL codepoint = iter.get_and_advance(interp, &iter);619 STRING_ITER_INIT(interp, &iter); 620 while (iter.charpos < length) { 621 const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 648 622 if (codepoint >= 0x80) 649 623 return 0; 650 624 } -
src/string/charset/iso-8859-1.c
diff --git a/src/string/charset/iso-8859-1.c b/src/string/charset/iso-8859-1.c index 8e965fa..b795e0d 100644
a b 178 178 to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src)) 179 179 { 180 180 ASSERT_ARGS(to_iso_8859_1) 181 UINTVAL offs,src_len;181 UINTVAL src_len; 182 182 String_iter iter; 183 183 /* iso-8859-1 is never bigger then source */ 184 184 STRING * dest = Parrot_str_clone(interp, src); 185 185 186 ENCODING_ITER_INIT(interp, src, &iter);186 STRING_ITER_INIT(interp, &iter); 187 187 src_len = src->strlen; 188 188 dest->bufused = src_len; 189 dest->charset = Parrot_iso_8859_1_charset_ptr; 190 dest->encoding = Parrot_fixed_8_encoding_ptr; 191 for (offs = 0; offs < src_len; ++offs) { 192 const UINTVAL c = iter.get_and_advance(interp, &iter); 189 while (iter.charpos < src_len) { 190 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 193 191 if (c >= 0x100) 194 192 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION, 195 193 "lossy conversion to iso-8559-1"); 196 194 197 ENCODING_SET_BYTE(interp, dest, offs, c);195 Parrot_fixed_8_encoding_ptr->set_byte(interp, dest, iter.charpos - 1, c); 198 196 } 197 dest->charset = Parrot_iso_8859_1_charset_ptr; 198 dest->encoding = Parrot_fixed_8_encoding_ptr; 199 199 return dest; 200 200 } 201 201 … … 221 221 dest->charset = Parrot_unicode_charset_ptr; 222 222 dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest); 223 223 Parrot_gc_reallocate_string_storage(interp, dest, src->strlen); 224 ENCODING_ITER_INIT(interp, dest, &iter);225 for (offs = 0; offs < src->strlen; ++offs) {226 const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);224 STRING_ITER_INIT(interp, &iter); 225 while (iter.charpos < src->strlen) { 226 const UINTVAL c = ENCODING_GET_BYTE(interp, src, iter.charpos); 227 227 228 228 if (iter.bytepos >= Buffer_buflen(dest) - 4) { 229 UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);229 UINTVAL need = (UINTVAL)((src->strlen - iter.charpos) * 1.5); 230 230 if (need < 16) 231 231 need = 16; 232 232 Parrot_gc_reallocate_string_storage(interp, dest, 233 233 Buffer_buflen(dest) + need); 234 234 } 235 iter.set_and_advance(interp, &iter, c);235 STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, c); 236 236 } 237 237 dest->bufused = iter.bytepos; 238 238 dest->strlen = iter.charpos; -
src/string/charset/unicode.c
diff --git a/src/string/charset/unicode.c b/src/string/charset/unicode.c index e1de74c..03555e2 100644
a b 651 651 { 652 652 ASSERT_ARGS(compare) 653 653 String_iter l_iter, r_iter; 654 UINTVAL offs, cl, cr,min_len, l_len, r_len;654 UINTVAL min_len, l_len, r_len; 655 655 656 656 /* TODO make optimized equal - strings are equal length then already */ 657 ENCODING_ITER_INIT(interp, lhs, &l_iter);658 ENCODING_ITER_INIT(interp, rhs, &r_iter);657 STRING_ITER_INIT(interp, &l_iter); 658 STRING_ITER_INIT(interp, &r_iter); 659 659 660 660 l_len = lhs->strlen; 661 661 r_len = rhs->strlen; 662 662 663 663 min_len = l_len > r_len ? r_len : l_len; 664 664 665 for (offs = 0; offs < min_len; ++offs) {666 c l = l_iter.get_and_advance(interp, &l_iter);667 c r = r_iter.get_and_advance(interp, &r_iter);665 while (l_iter.charpos < min_len) { 666 const UINTVAL cl = STRING_ITER_GET_AND_ADVANCE(interp, lhs, &l_iter); 667 const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &r_iter); 668 668 669 669 if (cl != cr) 670 670 return cl < cr ? -1 : 1; … … 716 716 validate(PARROT_INTERP, ARGIN(const STRING *src)) 717 717 { 718 718 ASSERT_ARGS(validate) 719 INTVAL offset;720 719 String_iter iter; 721 720 const INTVAL length = Parrot_str_length(interp, src); 722 721 723 ENCODING_ITER_INIT(interp, src, &iter);724 for (offset = 0; offset < length; ++offset) {725 const UINTVAL codepoint = iter.get_and_advance(interp, &iter);722 STRING_ITER_INIT(interp, &iter); 723 while (iter.charpos < length) { 724 const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 726 725 /* Check for Unicode non-characters */ 727 726 if (codepoint >= 0xfdd0 728 727 && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe) … … 877 876 ASSERT_ARGS(find_cclass) 878 877 String_iter iter; 879 878 UINTVAL codepoint; 880 UINTVAL pos = offset;881 879 UINTVAL end = offset + count; 882 880 883 ENCODING_ITER_INIT(interp, src, &iter); 884 885 iter.set_position(interp, &iter, pos); 881 STRING_ITER_INIT(interp, &iter); 882 STRING_ITER_SET_POSITION(interp, src, &iter, offset); 886 883 887 884 end = src->strlen < end ? src->strlen : end; 888 885 889 for (; pos < end; ++pos) {890 codepoint = iter.get_and_advance(interp, &iter);886 while (iter.charpos < end) { 887 codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 891 888 if (codepoint >= 256) { 892 889 if (u_iscclass(interp, codepoint, flags)) 893 return pos;890 return iter.charpos - 1; 894 891 } 895 892 else { 896 893 if (Parrot_iso_8859_1_typetable[codepoint] & flags) 897 return pos;894 return iter.charpos - 1; 898 895 } 899 896 } 900 897 … … 920 917 ASSERT_ARGS(find_not_cclass) 921 918 String_iter iter; 922 919 UINTVAL codepoint; 923 UINTVAL pos = offset;924 920 UINTVAL end = offset + count; 925 921 int bit; 926 922 927 if ( pos> src->strlen) {923 if (offset > src->strlen) { 928 924 /* XXX: Throw in this case? */ 929 925 return offset + count; 930 926 } 931 927 932 ENCODING_ITER_INIT(interp, src, &iter);928 STRING_ITER_INIT(interp, &iter); 933 929 934 if ( pos)935 iter.set_position(interp, &iter, pos);930 if (offset) 931 STRING_ITER_SET_POSITION(interp, src, &iter, offset); 936 932 937 933 end = src->strlen < end ? src->strlen : end; 938 934 939 935 if (flags == enum_cclass_any) 940 936 return end; 941 937 942 for (; pos < end; ++pos) {943 codepoint = iter.get_and_advance(interp, &iter);938 while (iter.charpos < end) { 939 codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 944 940 if (codepoint >= 256) { 945 941 for (bit = enum_cclass_uppercase; 946 942 bit <= enum_cclass_word ; bit <<= 1) { 947 943 if ((bit & flags) && !u_iscclass(interp, codepoint, bit)) 948 return pos;944 return iter.charpos - 1; 949 945 } 950 946 } 951 947 else { 952 948 if (!(Parrot_iso_8859_1_typetable[codepoint] & flags)) 953 return pos;949 return iter.charpos - 1; 954 950 } 955 951 } 956 952 … … 978 974 979 975 dest->strlen = 1; 980 976 981 ENCODING_ITER_INIT(interp, dest, &iter);982 iter.set_and_advance(interp, &iter, codepoint);977 STRING_ITER_INIT(interp, &iter); 978 STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, codepoint); 983 979 dest->bufused = iter.bytepos; 984 980 985 981 return dest; … … 1002 998 { 1003 999 ASSERT_ARGS(compute_hash) 1004 1000 String_iter iter; 1005 UINTVAL offs;1006 1001 size_t hashval = seed; 1007 1002 1008 ENCODING_ITER_INIT(interp, src, &iter);1003 STRING_ITER_INIT(interp, &iter); 1009 1004 1010 for (offs = 0; offs < src->strlen; ++offs) {1011 const UINTVAL c = iter.get_and_advance(interp, &iter);1005 while (iter.charpos < src->strlen) { 1006 const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter); 1012 1007 hashval += hashval << 5; 1013 1008 hashval += c; 1014 1009 } -
src/string/encoding/fixed_8.c
diff --git a/src/string/encoding/fixed_8.c b/src/string/encoding/fixed_8.c index 52008a5..ec51147 100644
a b 46 46 __attribute__nonnull__(2) 47 47 FUNC_MODIFIES(*iter); 48 48 49 static UINTVAL fixed8_iter_get(PARROT_INTERP, 50 ARGIN(const STRING *str), 51 ARGIN(const String_iter *iter), 52 INTVAL offset) 53 __attribute__nonnull__(1) 54 __attribute__nonnull__(2) 55 __attribute__nonnull__(3); 56 57 static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, 58 ARGIN(const STRING *str), 59 ARGMOD(String_iter *iter)) 60 __attribute__nonnull__(1) 61 __attribute__nonnull__(2) 62 __attribute__nonnull__(3) 63 FUNC_MODIFIES(*iter); 64 65 static void fixed8_iter_set_and_advance(PARROT_INTERP, 66 ARGMOD(STRING *str), 67 ARGMOD(String_iter *iter), 68 UINTVAL c) 69 __attribute__nonnull__(1) 70 __attribute__nonnull__(2) 71 __attribute__nonnull__(3) 72 FUNC_MODIFIES(*str) 73 FUNC_MODIFIES(*iter); 74 75 static void fixed8_iter_set_position(SHIM_INTERP, 76 ARGIN(const STRING *str), 77 ARGMOD(String_iter *iter), 78 UINTVAL pos) 79 __attribute__nonnull__(2) 80 __attribute__nonnull__(3) 81 FUNC_MODIFIES(*iter); 82 83 static void fixed8_iter_skip(SHIM_INTERP, 84 ARGIN(const STRING *str), 85 ARGMOD(String_iter *iter), 86 INTVAL skip) 87 __attribute__nonnull__(2) 88 __attribute__nonnull__(3) 89 FUNC_MODIFIES(*iter); 90 49 91 static void fixed8_set_next(PARROT_INTERP, 50 92 ARGMOD(String_iter *iter), 51 93 UINTVAL c) … … 125 167 #define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 126 168 PARROT_ASSERT_ARG(interp) \ 127 169 , PARROT_ASSERT_ARG(iter)) 170 #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 171 PARROT_ASSERT_ARG(interp) \ 172 , PARROT_ASSERT_ARG(str) \ 173 , PARROT_ASSERT_ARG(iter)) 174 #define ASSERT_ARGS_fixed8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 175 PARROT_ASSERT_ARG(interp) \ 176 , PARROT_ASSERT_ARG(str) \ 177 , PARROT_ASSERT_ARG(iter)) 178 #define ASSERT_ARGS_fixed8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 179 PARROT_ASSERT_ARG(interp) \ 180 , PARROT_ASSERT_ARG(str) \ 181 , PARROT_ASSERT_ARG(iter)) 182 #define ASSERT_ARGS_fixed8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 183 PARROT_ASSERT_ARG(str) \ 184 , PARROT_ASSERT_ARG(iter)) 185 #define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 186 PARROT_ASSERT_ARG(str) \ 187 , PARROT_ASSERT_ARG(iter)) 128 188 #define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 129 189 PARROT_ASSERT_ARG(interp) \ 130 190 , PARROT_ASSERT_ARG(iter)) … … 375 435 376 436 /* 377 437 438 =item C<static UINTVAL fixed8_iter_get(PARROT_INTERP, const STRING *str, const 439 String_iter *iter, INTVAL offset)> 440 441 Get the character at C<iter> plus C<offset>. 442 443 =cut 444 445 */ 446 447 static UINTVAL 448 fixed8_iter_get(PARROT_INTERP, 449 ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset) 450 { 451 ASSERT_ARGS(fixed8_iter_get) 452 return get_byte(interp, str, iter->charpos + offset); 453 } 454 455 /* 456 457 =item C<static void fixed8_iter_skip(PARROT_INTERP, const STRING *str, 458 String_iter *iter, INTVAL skip)> 459 460 Moves the string iterator C<i> by C<skip> characters. 461 462 =cut 463 464 */ 465 466 static void 467 fixed8_iter_skip(SHIM_INTERP, 468 ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip) 469 { 470 ASSERT_ARGS(fixed8_iter_skip) 471 iter->bytepos += skip; 472 iter->charpos += skip; 473 PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str)); 474 } 475 476 /* 477 478 =item C<static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, const STRING 479 *str, String_iter *iter)> 480 481 Moves the string iterator C<i> to the next codepoint. 482 483 =cut 484 485 */ 486 487 static UINTVAL 488 fixed8_iter_get_and_advance(PARROT_INTERP, 489 ARGIN(const STRING *str), ARGMOD(String_iter *iter)) 490 { 491 ASSERT_ARGS(fixed8_iter_get_and_advance) 492 const UINTVAL c = get_byte(interp, str, iter->charpos++); 493 iter->bytepos++; 494 return c; 495 } 496 497 /* 498 499 =item C<static void fixed8_iter_set_and_advance(PARROT_INTERP, STRING *str, 500 String_iter *iter, UINTVAL c)> 501 502 With the string iterator C<i>, appends the codepoint C<c> and advances to the 503 next position in the string. 504 505 =cut 506 507 */ 508 509 static void 510 fixed8_iter_set_and_advance(PARROT_INTERP, 511 ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c) 512 { 513 ASSERT_ARGS(fixed8_iter_set_and_advance) 514 set_byte(interp, str, iter->charpos++, c); 515 iter->bytepos++; 516 } 517 518 /* 519 520 =item C<static void fixed8_iter_set_position(PARROT_INTERP, const STRING *str, 521 String_iter *iter, UINTVAL pos)> 522 523 Moves the string iterator C<i> to the position C<n> in the string. 524 525 =cut 526 527 */ 528 529 static void 530 fixed8_iter_set_position(SHIM_INTERP, 531 ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos) 532 { 533 ASSERT_ARGS(fixed8_iter_set_position) 534 iter->bytepos = iter->charpos = pos; 535 PARROT_ASSERT(pos <= Buffer_buflen(str)); 536 } 537 538 /* 539 378 540 =item C<static UINTVAL fixed8_get_next(PARROT_INTERP, String_iter *iter)> 379 541 380 542 Moves the string iterator C<i> to the next codepoint. … … 511 673 bytes, 512 674 iter_init, 513 675 find_cclass, 514 fixed_8_hash 676 fixed_8_hash, 677 fixed8_iter_get, 678 fixed8_iter_skip, 679 fixed8_iter_get_and_advance, 680 fixed8_iter_set_and_advance, 681 fixed8_iter_set_position 515 682 }; 516 683 517 684 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); -
src/string/encoding/ucs2.c
diff --git a/src/string/encoding/ucs2.c b/src/string/encoding/ucs2.c index 1c9cd48..6e2ec93 100644
a b 120 120 __attribute__nonnull__(1) 121 121 __attribute__nonnull__(2); 122 122 123 static UINTVAL ucs2_iter_get(PARROT_INTERP, 124 ARGIN(const STRING *str), 125 ARGIN(const String_iter *i), 126 INTVAL offset) 127 __attribute__nonnull__(1) 128 __attribute__nonnull__(2) 129 __attribute__nonnull__(3); 130 131 static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP, 132 ARGIN(const STRING *str), 133 ARGMOD(String_iter *i)) 134 __attribute__nonnull__(1) 135 __attribute__nonnull__(2) 136 __attribute__nonnull__(3) 137 FUNC_MODIFIES(*i); 138 139 static void ucs2_iter_set_and_advance(PARROT_INTERP, 140 ARGMOD(STRING *str), 141 ARGMOD(String_iter *i), 142 UINTVAL c) 143 __attribute__nonnull__(1) 144 __attribute__nonnull__(2) 145 __attribute__nonnull__(3) 146 FUNC_MODIFIES(*str) 147 FUNC_MODIFIES(*i); 148 149 static void ucs2_iter_set_position(PARROT_INTERP, 150 ARGIN(const STRING *str), 151 ARGMOD(String_iter *i), 152 UINTVAL n) 153 __attribute__nonnull__(1) 154 __attribute__nonnull__(2) 155 __attribute__nonnull__(3) 156 FUNC_MODIFIES(*i); 157 158 static void ucs2_iter_skip(PARROT_INTERP, 159 ARGIN(const STRING *str), 160 ARGMOD(String_iter *i), 161 INTVAL skip) 162 __attribute__nonnull__(1) 163 __attribute__nonnull__(2) 164 __attribute__nonnull__(3) 165 FUNC_MODIFIES(*i); 166 123 167 static void ucs2_set_position(SHIM_INTERP, 124 168 ARGMOD(String_iter *i), 125 169 UINTVAL n) … … 161 205 #define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 162 206 PARROT_ASSERT_ARG(interp) \ 163 207 , PARROT_ASSERT_ARG(s)) 208 #define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 209 PARROT_ASSERT_ARG(interp) \ 210 , PARROT_ASSERT_ARG(str) \ 211 , PARROT_ASSERT_ARG(i)) 212 #define ASSERT_ARGS_ucs2_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 213 PARROT_ASSERT_ARG(interp) \ 214 , PARROT_ASSERT_ARG(str) \ 215 , PARROT_ASSERT_ARG(i)) 216 #define ASSERT_ARGS_ucs2_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 217 PARROT_ASSERT_ARG(interp) \ 218 , PARROT_ASSERT_ARG(str) \ 219 , PARROT_ASSERT_ARG(i)) 220 #define ASSERT_ARGS_ucs2_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 221 PARROT_ASSERT_ARG(interp) \ 222 , PARROT_ASSERT_ARG(str) \ 223 , PARROT_ASSERT_ARG(i)) 224 #define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 225 PARROT_ASSERT_ARG(interp) \ 226 , PARROT_ASSERT_ARG(str) \ 227 , PARROT_ASSERT_ARG(i)) 164 228 #define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 165 229 PARROT_ASSERT_ARG(i)) 166 230 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ … … 323 387 String_iter iter; 324 388 UINTVAL start; 325 389 326 iter_init(interp, src, &iter);327 iter.set_position(interp, &iter, offset);390 STRING_ITER_INIT(interp, &iter); 391 ucs2_iter_set_position(interp, src, &iter, offset); 328 392 start = iter.bytepos; 329 393 return_string->strstart = (char *)return_string->strstart + start; 330 iter.set_position(interp, &iter, offset + count);394 ucs2_iter_set_position(interp, src, &iter, offset + count); 331 395 return_string->bufused = iter.bytepos - start; 332 396 } 333 397 #endif … … 402 466 403 467 /* 404 468 469 =item C<static UINTVAL ucs2_iter_get(PARROT_INTERP, const STRING *str, const 470 String_iter *i, INTVAL offset)> 471 472 Get the character at C<i> + C<offset>. 473 474 =cut 475 476 */ 477 478 static UINTVAL 479 ucs2_iter_get(PARROT_INTERP, 480 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 481 { 482 ASSERT_ARGS(ucs2_iter_get) 483 return get_codepoint(interp, str, i->charpos + offset); 484 } 485 486 /* 487 488 =item C<static void ucs2_iter_skip(PARROT_INTERP, const STRING *str, String_iter 489 *i, INTVAL skip)> 490 491 Moves the string iterator C<i> by C<skip> characters. 492 493 =cut 494 495 */ 496 497 static void 498 ucs2_iter_skip(PARROT_INTERP, 499 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 500 { 501 ASSERT_ARGS(ucs2_iter_skip) 502 UNUSED(str); 503 504 #if PARROT_HAS_ICU 505 i->charpos += skip; 506 i->bytepos += skip * sizeof (UChar); 507 #else 508 UNUSED(i); 509 UNUSED(skip); 510 no_ICU_lib(interp); 511 #endif 512 } 513 514 /* 515 516 =item C<static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP, const STRING 517 *str, String_iter *i)> 518 519 Moves the string iterator C<i> to the next UCS-2 codepoint. 520 521 =cut 522 523 */ 524 525 static UINTVAL 526 ucs2_iter_get_and_advance(PARROT_INTERP, 527 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 528 { 529 ASSERT_ARGS(ucs2_iter_get_and_advance) 530 531 #if PARROT_HAS_ICU 532 UChar * const s = (UChar*) str->strstart; 533 size_t pos = i->bytepos / sizeof (UChar); 534 535 /* TODO either make sure that we don't go past end or use SAFE 536 * iter versions 537 */ 538 const UChar c = s[pos++]; 539 i->charpos++; 540 i->bytepos = pos * sizeof (UChar); 541 return c; 542 #else 543 UNUSED(str); 544 UNUSED(i); 545 no_ICU_lib(interp); 546 return (UINTVAL)0; /* Stop the static analyzers from panicing */ 547 #endif 548 } 549 550 /* 551 552 =item C<static void ucs2_iter_set_and_advance(PARROT_INTERP, STRING *str, 553 String_iter *i, UINTVAL c)> 554 555 With the string iterator C<i>, appends the codepoint C<c> and advances to the 556 next position in the string. 557 558 =cut 559 560 */ 561 562 static void 563 ucs2_iter_set_and_advance(PARROT_INTERP, 564 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 565 { 566 ASSERT_ARGS(ucs2_iter_set_and_advance) 567 568 #if PARROT_HAS_ICU 569 UChar * const s = (UChar*) str->strstart; 570 UINTVAL pos = i->bytepos / sizeof (UChar); 571 s[pos++] = (UChar)c; 572 i->charpos++; 573 i->bytepos = pos * sizeof (UChar); 574 #else 575 UNUSED(str); 576 UNUSED(i); 577 UNUSED(c); 578 no_ICU_lib(interp); 579 #endif 580 } 581 582 /* 583 584 =item C<static void ucs2_iter_set_position(PARROT_INTERP, const STRING *str, 585 String_iter *i, UINTVAL n)> 586 587 Moves the string iterator C<i> to the position C<n> in the string. 588 589 =cut 590 591 */ 592 593 static void 594 ucs2_iter_set_position(PARROT_INTERP, 595 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n) 596 { 597 ASSERT_ARGS(ucs2_iter_set_position) 598 UNUSED(str); 599 600 #if PARROT_HAS_ICU 601 i->charpos = n; 602 i->bytepos = n * sizeof (UChar); 603 #else 604 UNUSED(i); 605 UNUSED(n); 606 no_ICU_lib(interp); 607 #endif 608 } 609 610 /* 611 405 612 =item C<static UINTVAL ucs2_decode_and_advance(PARROT_INTERP, String_iter *i)> 406 613 407 614 Moves the string iterator C<i> to the next UCS-2 codepoint. … … 592 799 bytes, 593 800 iter_init, 594 801 find_cclass, 595 ucs2_hash 802 ucs2_hash, 803 ucs2_iter_get, 804 ucs2_iter_skip, 805 ucs2_iter_get_and_advance, 806 ucs2_iter_set_and_advance, 807 ucs2_iter_set_position 596 808 }; 597 809 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 598 810 Parrot_register_encoding(interp, "ucs2", return_encoding); -
src/string/encoding/ucs4.c
diff --git a/src/string/encoding/ucs4.c b/src/string/encoding/ucs4.c index c608ef8..e4d0409 100644
a b 123 123 __attribute__nonnull__(1) 124 124 __attribute__nonnull__(2); 125 125 126 static UINTVAL ucs4_iter_get(PARROT_INTERP, 127 ARGIN(const STRING *str), 128 ARGIN(const String_iter *i), 129 INTVAL offset) 130 __attribute__nonnull__(1) 131 __attribute__nonnull__(2) 132 __attribute__nonnull__(3); 133 134 static UINTVAL ucs4_iter_get_and_advance(PARROT_INTERP, 135 ARGIN(const STRING *str), 136 ARGMOD(String_iter *i)) 137 __attribute__nonnull__(1) 138 __attribute__nonnull__(2) 139 __attribute__nonnull__(3) 140 FUNC_MODIFIES(*i); 141 142 static void ucs4_iter_set_and_advance(PARROT_INTERP, 143 ARGMOD(STRING *str), 144 ARGMOD(String_iter *i), 145 UINTVAL c) 146 __attribute__nonnull__(1) 147 __attribute__nonnull__(2) 148 __attribute__nonnull__(3) 149 FUNC_MODIFIES(*str) 150 FUNC_MODIFIES(*i); 151 152 static void ucs4_iter_set_position(PARROT_INTERP, 153 ARGIN(const STRING *str), 154 ARGMOD(String_iter *i), 155 UINTVAL n) 156 __attribute__nonnull__(1) 157 __attribute__nonnull__(2) 158 __attribute__nonnull__(3) 159 FUNC_MODIFIES(*i); 160 161 static void ucs4_iter_skip(PARROT_INTERP, 162 ARGIN(const STRING *str), 163 ARGMOD(String_iter *i), 164 INTVAL skip) 165 __attribute__nonnull__(1) 166 __attribute__nonnull__(2) 167 __attribute__nonnull__(3) 168 FUNC_MODIFIES(*i); 169 126 170 static void ucs4_set_position(PARROT_INTERP, 127 171 ARGMOD(String_iter *i), 128 172 UINTVAL n) … … 167 211 #define ASSERT_ARGS_ucs4_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 168 212 PARROT_ASSERT_ARG(interp) \ 169 213 , PARROT_ASSERT_ARG(s)) 214 #define ASSERT_ARGS_ucs4_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 215 PARROT_ASSERT_ARG(interp) \ 216 , PARROT_ASSERT_ARG(str) \ 217 , PARROT_ASSERT_ARG(i)) 218 #define ASSERT_ARGS_ucs4_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 219 PARROT_ASSERT_ARG(interp) \ 220 , PARROT_ASSERT_ARG(str) \ 221 , PARROT_ASSERT_ARG(i)) 222 #define ASSERT_ARGS_ucs4_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 223 PARROT_ASSERT_ARG(interp) \ 224 , PARROT_ASSERT_ARG(str) \ 225 , PARROT_ASSERT_ARG(i)) 226 #define ASSERT_ARGS_ucs4_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 227 PARROT_ASSERT_ARG(interp) \ 228 , PARROT_ASSERT_ARG(str) \ 229 , PARROT_ASSERT_ARG(i)) 230 #define ASSERT_ARGS_ucs4_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 231 PARROT_ASSERT_ARG(interp) \ 232 , PARROT_ASSERT_ARG(str) \ 233 , PARROT_ASSERT_ARG(i)) 170 234 #define ASSERT_ARGS_ucs4_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 171 235 PARROT_ASSERT_ARG(interp) \ 172 236 , PARROT_ASSERT_ARG(i)) … … 414 478 415 479 /* 416 480 481 =item C<static UINTVAL ucs4_iter_get(PARROT_INTERP, const STRING *str, const 482 String_iter *i, INTVAL offset)> 483 484 Get the character at C<i> + C<offset>. 485 486 =cut 487 488 */ 489 490 static UINTVAL 491 ucs4_iter_get(PARROT_INTERP, 492 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 493 { 494 ASSERT_ARGS(ucs4_iter_get) 495 return get_codepoint(interp, str, i->charpos + offset); 496 } 497 498 /* 499 500 =item C<static void ucs4_iter_skip(PARROT_INTERP, const STRING *str, String_iter 501 *i, INTVAL skip)> 502 503 Moves the string iterator C<i> by C<skip> characters. 504 505 =cut 506 507 */ 508 509 static void 510 ucs4_iter_skip(PARROT_INTERP, 511 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 512 { 513 ASSERT_ARGS(ucs4_iter_skip) 514 UNUSED(str); 515 516 #if PARROT_HAS_ICU 517 i->charpos += skip; 518 i->bytepos += skip * sizeof (UChar32); 519 #else 520 UNUSED(i); 521 UNUSED(skip); 522 no_ICU_lib(interp); 523 #endif 524 } 525 526 /* 527 528 =item C<static UINTVAL ucs4_iter_get_and_advance(PARROT_INTERP, const STRING 529 *str, String_iter *i)> 530 531 Moves the string iterator C<i> to the next codepoint. 532 533 =cut 534 535 */ 536 537 static UINTVAL 538 ucs4_iter_get_and_advance(PARROT_INTERP, 539 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 540 { 541 ASSERT_ARGS(ucs4_iter_get_and_advance) 542 543 #if PARROT_HAS_ICU 544 const UChar32 * const s = (const UChar32*) str->strstart; 545 const UChar32 c = s[i->charpos++]; 546 i->bytepos += sizeof (UChar32); 547 return c; 548 #else 549 UNUSED(str); 550 UNUSED(i); 551 no_ICU_lib(interp); 552 return (UINTVAL)0; /* Stop the static analyzers from panicing */ 553 #endif 554 } 555 556 /* 557 558 =item C<static void ucs4_iter_set_and_advance(PARROT_INTERP, STRING *str, 559 String_iter *i, UINTVAL c)> 560 561 With the string iterator C<i>, appends the codepoint C<c> and advances to the 562 next position in the string. 563 564 =cut 565 566 */ 567 568 static void 569 ucs4_iter_set_and_advance(PARROT_INTERP, 570 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 571 { 572 ASSERT_ARGS(ucs4_iter_set_and_advance) 573 574 #if PARROT_HAS_ICU 575 UChar32 * const s = (UChar32*) str->strstart; 576 s[i->charpos++] = (UChar32)c; 577 i->bytepos += sizeof (UChar32); 578 #else 579 UNUSED(str); 580 UNUSED(i); 581 UNUSED(c); 582 no_ICU_lib(interp); 583 #endif 584 } 585 586 /* 587 588 =item C<static void ucs4_iter_set_position(PARROT_INTERP, const STRING *str, 589 String_iter *i, UINTVAL n)> 590 591 Moves the string iterator C<i> to the position C<n> in the string. 592 593 =cut 594 595 */ 596 597 static void 598 ucs4_iter_set_position(PARROT_INTERP, 599 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n) 600 { 601 ASSERT_ARGS(ucs4_iter_set_position) 602 UNUSED(str); 603 604 #if PARROT_HAS_ICU 605 i->charpos = n; 606 i->bytepos = n * sizeof (UChar32); 607 #else 608 UNUSED(i); 609 UNUSED(n); 610 no_ICU_lib(interp); 611 #endif 612 } 613 614 /* 615 417 616 =item C<static UINTVAL ucs4_decode_and_advance(PARROT_INTERP, String_iter *i)> 418 617 419 618 Moves the string iterator C<i> to the next UCS-4 codepoint. … … 580 779 iter_init, 581 780 find_cclass, 582 781 #if PARROT_HAS_ICU 583 ucs4_hash 782 ucs4_hash, 584 783 #else 585 NULL 784 NULL, 586 785 #endif 786 ucs4_iter_get, 787 ucs4_iter_skip, 788 ucs4_iter_get_and_advance, 789 ucs4_iter_set_and_advance, 790 ucs4_iter_set_position 587 791 }; 588 792 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 589 793 Parrot_register_encoding(interp, "ucs4", return_encoding); -
src/string/encoding/utf16.c
diff --git a/src/string/encoding/utf16.c b/src/string/encoding/utf16.c index 0c5ec2d..2570de5 100644
a b 107 107 __attribute__nonnull__(2) 108 108 FUNC_MODIFIES(*i); 109 109 110 static UINTVAL utf16_iter_get(PARROT_INTERP, 111 ARGIN(const STRING *str), 112 ARGIN(const String_iter *i), 113 INTVAL offset) 114 __attribute__nonnull__(1) 115 __attribute__nonnull__(2) 116 __attribute__nonnull__(3); 117 118 PARROT_WARN_UNUSED_RESULT 119 static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP, 120 ARGIN(const STRING *str), 121 ARGMOD(String_iter *i)) 122 __attribute__nonnull__(1) 123 __attribute__nonnull__(2) 124 __attribute__nonnull__(3) 125 FUNC_MODIFIES(*i); 126 127 static void utf16_iter_set_and_advance(PARROT_INTERP, 128 ARGMOD(STRING *str), 129 ARGMOD(String_iter *i), 130 UINTVAL c) 131 __attribute__nonnull__(1) 132 __attribute__nonnull__(2) 133 __attribute__nonnull__(3) 134 FUNC_MODIFIES(*str) 135 FUNC_MODIFIES(*i); 136 137 static void utf16_iter_set_position(PARROT_INTERP, 138 ARGIN(const STRING *str), 139 ARGMOD(String_iter *i), 140 UINTVAL n) 141 __attribute__nonnull__(1) 142 __attribute__nonnull__(2) 143 __attribute__nonnull__(3) 144 FUNC_MODIFIES(*i); 145 146 static void utf16_iter_skip(PARROT_INTERP, 147 ARGIN(const STRING *str), 148 ARGMOD(String_iter *i), 149 INTVAL skip) 150 __attribute__nonnull__(1) 151 __attribute__nonnull__(2) 152 __attribute__nonnull__(3) 153 FUNC_MODIFIES(*i); 154 110 155 static void utf16_set_position(SHIM_INTERP, 111 156 ARGMOD(String_iter *i), 112 157 UINTVAL n) … … 147 192 PARROT_ASSERT_ARG(i)) 148 193 #define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 149 194 PARROT_ASSERT_ARG(i)) 195 #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 196 PARROT_ASSERT_ARG(interp) \ 197 , PARROT_ASSERT_ARG(str) \ 198 , PARROT_ASSERT_ARG(i)) 199 #define ASSERT_ARGS_utf16_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 200 PARROT_ASSERT_ARG(interp) \ 201 , PARROT_ASSERT_ARG(str) \ 202 , PARROT_ASSERT_ARG(i)) 203 #define ASSERT_ARGS_utf16_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 204 PARROT_ASSERT_ARG(interp) \ 205 , PARROT_ASSERT_ARG(str) \ 206 , PARROT_ASSERT_ARG(i)) 207 #define ASSERT_ARGS_utf16_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 208 PARROT_ASSERT_ARG(interp) \ 209 , PARROT_ASSERT_ARG(str) \ 210 , PARROT_ASSERT_ARG(i)) 211 #define ASSERT_ARGS_utf16_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 212 PARROT_ASSERT_ARG(interp) \ 213 , PARROT_ASSERT_ARG(str) \ 214 , PARROT_ASSERT_ARG(i)) 150 215 #define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 151 216 PARROT_ASSERT_ARG(i)) 152 217 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ … … 377 442 get_codepoints(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count) 378 443 { 379 444 ASSERT_ARGS(get_codepoints) 380 String_iter iter; 381 UINTVAL start; 445 #if PARROT_HAS_ICU 446 UINTVAL pos = 0, start; 447 const UChar * const s = (UChar*) src->strstart; 382 448 STRING * const return_string = Parrot_str_copy(interp, src); 383 449 384 iter_init(interp, src, &iter); 385 iter.set_position(interp, &iter, offset); 386 start = iter.bytepos; 387 return_string->strstart = (char *)return_string->strstart + start ; 388 iter.set_position(interp, &iter, offset + count); 389 return_string->bufused = iter.bytepos - start; 450 U16_FWD_N_UNSAFE(s, pos, offset); 451 start = pos * sizeof (UChar); 452 return_string->strstart = (char *)return_string->strstart + start; 453 U16_FWD_N_UNSAFE(s, pos, count); 454 return_string->bufused = pos * sizeof (UChar) - start; 390 455 return_string->strlen = count; 391 456 return_string->hashval = 0; 392 457 return return_string; 458 #else 459 UNUSED(src); 460 UNUSED(offset); 461 UNUSED(count); 462 463 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 464 "no ICU lib loaded"); 465 #endif 393 466 } 394 467 395 468 … … 432 505 codepoints(PARROT_INTERP, ARGIN(const STRING *src)) 433 506 { 434 507 ASSERT_ARGS(codepoints) 435 String_iter iter; 508 #if PARROT_HAS_ICU 509 const UChar * const s = (UChar*) src->strstart; 510 UINTVAL pos = 0, charpos = 0; 436 511 /* 437 512 * this is used to initially calculate src->strlen, 438 513 * therefore we must scan the whole string 439 514 */ 440 iter_init(interp, src, &iter); 441 while (iter.bytepos < src->bufused) 442 iter.get_and_advance(interp, &iter); 443 return iter.charpos; 515 while (pos * sizeof (UChar) < src->bufused) { 516 U16_FWD_1_UNSAFE(s, pos); 517 ++charpos; 518 } 519 return charpos; 520 #else 521 UNUSED(src); 522 523 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 524 "no ICU lib loaded"); 525 #endif 444 526 } 445 527 446 528 /* … … 461 543 return src->bufused; 462 544 } 463 545 546 /* 547 548 =item C<static UINTVAL utf16_iter_get(PARROT_INTERP, const STRING *str, const 549 String_iter *i, INTVAL offset)> 550 551 Get the character at C<i> plus C<offset>. 552 553 =cut 554 555 */ 556 557 static UINTVAL 558 utf16_iter_get(PARROT_INTERP, 559 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 560 { 561 ASSERT_ARGS(utf16_iter_get) 562 #if PARROT_HAS_ICU 563 const UChar * const s = (UChar*) str->strstart; 564 UINTVAL c, pos; 565 566 pos = i->bytepos / sizeof (UChar); 567 if (offset > 0) { 568 U16_FWD_N_UNSAFE(s, pos, offset); 569 } 570 else if (offset < 0) { 571 U16_BACK_N_UNSAFE(s, pos, -offset); 572 } 573 U16_GET_UNSAFE(s, pos, c); 574 575 return c; 576 #else 577 UNUSED(str); 578 UNUSED(i); 579 UNUSED(offset); 580 581 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 582 "no ICU lib loaded"); 583 #endif 584 } 585 586 /* 587 588 =item C<static void utf16_iter_skip(PARROT_INTERP, const STRING *str, 589 String_iter *i, INTVAL skip)> 590 591 Moves the string iterator C<i> by C<skip> characters. 592 593 =cut 594 595 */ 596 597 static void 598 utf16_iter_skip(PARROT_INTERP, 599 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 600 { 601 ASSERT_ARGS(utf16_iter_skip) 602 #if PARROT_HAS_ICU 603 const UChar * const s = (UChar*) str->strstart; 604 UINTVAL pos = i->bytepos / sizeof (UChar); 605 606 if (skip > 0) { 607 U16_FWD_N_UNSAFE(s, pos, skip); 608 } 609 else if (skip < 0) { 610 U16_BACK_N_UNSAFE(s, pos, -skip); 611 } 612 613 i->charpos += skip; 614 i->bytepos = pos * sizeof (UChar); 615 #else 616 UNUSED(str); 617 UNUSED(i); 618 UNUSED(skip); 619 620 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 621 "no ICU lib loaded"); 622 #endif 623 } 624 625 /* 626 627 =item C<static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP, const STRING 628 *str, String_iter *i)> 629 630 Moves the string iterator C<i> to the next UTF-16 codepoint. 631 632 =cut 633 634 */ 635 636 PARROT_WARN_UNUSED_RESULT 637 static UINTVAL 638 utf16_iter_get_and_advance(PARROT_INTERP, 639 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 640 { 641 ASSERT_ARGS(utf16_iter_get_and_advance) 642 #if PARROT_HAS_ICU 643 const UChar * const s = (UChar*) str->strstart; 644 UINTVAL c, pos; 645 pos = i->bytepos / sizeof (UChar); 646 /* TODO either make sure that we don't go past end or use SAFE 647 * iter versions 648 */ 649 U16_NEXT_UNSAFE(s, pos, c); 650 i->charpos++; 651 i->bytepos = pos * sizeof (UChar); 652 return c; 653 #else 654 UNUSED(str); 655 UNUSED(i); 656 657 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 658 "no ICU lib loaded"); 659 #endif 660 } 661 662 /* 663 664 =item C<static void utf16_iter_set_and_advance(PARROT_INTERP, STRING *str, 665 String_iter *i, UINTVAL c)> 666 667 With the string iterator C<i>, appends the codepoint C<c> and advances to the 668 next position in the string. 669 670 =cut 671 672 */ 673 674 static void 675 utf16_iter_set_and_advance(PARROT_INTERP, 676 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 677 { 678 ASSERT_ARGS(utf16_iter_set_and_advance) 679 #if PARROT_HAS_ICU 680 UChar * const s = (UChar*) str->strstart; 681 UINTVAL pos; 682 pos = i->bytepos / sizeof (UChar); 683 U16_APPEND_UNSAFE(s, pos, c); 684 i->charpos++; 685 i->bytepos = pos * sizeof (UChar); 686 #else 687 UNUSED(str); 688 UNUSED(i); 689 UNUSED(c); 690 691 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 692 "no ICU lib loaded"); 693 #endif 694 } 695 696 /* 697 698 =item C<static void utf16_iter_set_position(PARROT_INTERP, const STRING *str, 699 String_iter *i, UINTVAL n)> 700 701 Moves the string iterator C<i> to the position C<n> in the string. 702 703 =cut 704 705 */ 706 707 static void 708 utf16_iter_set_position(PARROT_INTERP, 709 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n) 710 { 711 ASSERT_ARGS(utf16_iter_set_position) 712 #if PARROT_HAS_ICU 713 UChar * const s = (UChar*) str->strstart; 714 UINTVAL pos; 715 pos = 0; 716 U16_FWD_N_UNSAFE(s, pos, n); 717 i->charpos = n; 718 i->bytepos = pos * sizeof (UChar); 719 #else 720 UNUSED(str); 721 UNUSED(i); 722 UNUSED(n); 723 724 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, 725 "no ICU lib loaded"); 726 #endif 727 } 728 464 729 #if PARROT_HAS_ICU 465 730 /* 466 731 … … 595 860 bytes, 596 861 iter_init, 597 862 find_cclass, 598 NULL 863 NULL, 864 utf16_iter_get, 865 utf16_iter_skip, 866 utf16_iter_get_and_advance, 867 utf16_iter_set_and_advance, 868 utf16_iter_set_position 599 869 }; 600 870 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 601 871 Parrot_register_encoding(interp, "utf16", return_encoding); -
src/string/encoding/utf8.c
diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index ff17761..811ce76 100644
a b 118 118 __attribute__nonnull__(2) 119 119 FUNC_MODIFIES(*i); 120 120 121 static UINTVAL utf8_iter_get(PARROT_INTERP, 122 ARGIN(const STRING *str), 123 ARGIN(const String_iter *i), 124 INTVAL offset) 125 __attribute__nonnull__(1) 126 __attribute__nonnull__(2) 127 __attribute__nonnull__(3); 128 129 static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, 130 ARGIN(const STRING *str), 131 ARGMOD(String_iter *i)) 132 __attribute__nonnull__(1) 133 __attribute__nonnull__(2) 134 __attribute__nonnull__(3) 135 FUNC_MODIFIES(*i); 136 137 static void utf8_iter_set_and_advance(PARROT_INTERP, 138 ARGMOD(STRING *str), 139 ARGMOD(String_iter *i), 140 UINTVAL c) 141 __attribute__nonnull__(1) 142 __attribute__nonnull__(2) 143 __attribute__nonnull__(3) 144 FUNC_MODIFIES(*str) 145 FUNC_MODIFIES(*i); 146 147 static void utf8_iter_set_position(SHIM_INTERP, 148 ARGIN(const STRING *str), 149 ARGMOD(String_iter *i), 150 UINTVAL pos) 151 __attribute__nonnull__(2) 152 __attribute__nonnull__(3) 153 FUNC_MODIFIES(*i); 154 155 static void utf8_iter_skip(SHIM_INTERP, 156 ARGIN(const STRING *str), 157 ARGMOD(String_iter *i), 158 INTVAL skip) 159 __attribute__nonnull__(2) 160 __attribute__nonnull__(3) 161 FUNC_MODIFIES(*i); 162 121 163 static void utf8_set_position(SHIM_INTERP, 122 164 ARGMOD(String_iter *i), 123 165 UINTVAL pos) … … 175 217 #define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 176 218 PARROT_ASSERT_ARG(interp) \ 177 219 , PARROT_ASSERT_ARG(i)) 220 #define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 221 PARROT_ASSERT_ARG(interp) \ 222 , PARROT_ASSERT_ARG(str) \ 223 , PARROT_ASSERT_ARG(i)) 224 #define ASSERT_ARGS_utf8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 225 PARROT_ASSERT_ARG(interp) \ 226 , PARROT_ASSERT_ARG(str) \ 227 , PARROT_ASSERT_ARG(i)) 228 #define ASSERT_ARGS_utf8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 229 PARROT_ASSERT_ARG(interp) \ 230 , PARROT_ASSERT_ARG(str) \ 231 , PARROT_ASSERT_ARG(i)) 232 #define ASSERT_ARGS_utf8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 233 PARROT_ASSERT_ARG(str) \ 234 , PARROT_ASSERT_ARG(i)) 235 #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 236 PARROT_ASSERT_ARG(str) \ 237 , PARROT_ASSERT_ARG(i)) 178 238 #define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 179 239 PARROT_ASSERT_ARG(i)) 180 240 #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 387 447 388 448 /* 389 449 450 =item C<static UINTVAL utf8_iter_get(PARROT_INTERP, const STRING *str, const 451 String_iter *i, INTVAL offset)> 452 453 Get the character at C<i> plus C<offset>. 454 455 =cut 456 457 */ 458 459 static UINTVAL 460 utf8_iter_get(PARROT_INTERP, 461 ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset) 462 { 463 ASSERT_ARGS(utf8_iter_get) 464 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 465 466 if (offset > 0) { 467 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, offset); 468 } 469 else if (offset < 0) { 470 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -offset); 471 } 472 473 return utf8_decode(interp, u8ptr); 474 } 475 476 /* 477 478 =item C<static void utf8_iter_skip(PARROT_INTERP, const STRING *str, String_iter 479 *i, INTVAL skip)> 480 481 Moves the string iterator C<i> by C<skip> characters. 482 483 =cut 484 485 */ 486 487 static void 488 utf8_iter_skip(SHIM_INTERP, 489 ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip) 490 { 491 ASSERT_ARGS(utf8_iter_skip) 492 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 493 494 if (skip > 0) { 495 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, skip); 496 } 497 else if (skip < 0) { 498 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -skip); 499 } 500 501 i->charpos += skip; 502 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 503 } 504 505 /* 506 507 =item C<static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, const STRING 508 *str, String_iter *i)> 509 510 The UTF-8 implementation of the string iterator's C<get_and_advance> 511 function. 512 513 =cut 514 515 */ 516 517 static UINTVAL 518 utf8_iter_get_and_advance(PARROT_INTERP, 519 ARGIN(const STRING *str), ARGMOD(String_iter *i)) 520 { 521 ASSERT_ARGS(utf8_iter_get_and_advance) 522 const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos); 523 UINTVAL c = *u8ptr; 524 525 if (UTF8_IS_START(c)) { 526 UINTVAL len = UTF8SKIP(u8ptr); 527 528 c &= UTF8_START_MASK(len); 529 i->bytepos += len; 530 for (len--; len; len--) { 531 u8ptr++; 532 533 if (!UTF8_IS_CONTINUATION(*u8ptr)) 534 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 535 "Malformed UTF-8 string\n"); 536 c = UTF8_ACCUMULATE(c, *u8ptr); 537 } 538 539 if (UNICODE_IS_SURROGATE(c)) 540 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 541 "Surrogate in UTF-8 string\n"); 542 } 543 else if (!UNICODE_IS_INVARIANT(c)) { 544 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8, 545 "Malformed UTF-8 string\n"); 546 } 547 else { 548 i->bytepos++; 549 } 550 551 i->charpos++; 552 return c; 553 } 554 555 /* 556 557 =item C<static void utf8_iter_set_and_advance(PARROT_INTERP, STRING *str, 558 String_iter *i, UINTVAL c)> 559 560 The UTF-8 implementation of the string iterator's C<set_and_advance> 561 function. 562 563 =cut 564 565 */ 566 567 static void 568 utf8_iter_set_and_advance(PARROT_INTERP, 569 ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c) 570 { 571 ASSERT_ARGS(utf8_iter_set_and_advance) 572 unsigned char * const pos = (unsigned char *)str->strstart + i->bytepos; 573 unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c); 574 575 i->bytepos += (new_pos - pos); 576 /* XXX possible buffer overrun exception? */ 577 PARROT_ASSERT(i->bytepos <= Buffer_buflen(str)); 578 i->charpos++; 579 } 580 581 /* 582 583 =item C<static void utf8_iter_set_position(PARROT_INTERP, const STRING *str, 584 String_iter *i, UINTVAL pos)> 585 586 The UTF-8 implementation of the string iterator's C<set_position> 587 function. 588 589 =cut 590 591 */ 592 593 static void 594 utf8_iter_set_position(SHIM_INTERP, 595 ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL pos) 596 { 597 ASSERT_ARGS(utf8_iter_set_position) 598 const utf8_t *u8ptr = (const utf8_t *)str->strstart; 599 600 if (pos == 0) { 601 i->charpos = 0; 602 i->bytepos = 0; 603 return; 604 } 605 606 /* 607 * we know the byte offsets of three positions: start, current and end 608 * now find the shortest way to reach pos 609 */ 610 if (pos < i->charpos) { 611 if (pos <= (i->charpos >> 1)) { 612 /* go forward from start */ 613 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, pos); 614 } 615 else { 616 /* go backward from current */ 617 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + i->bytepos, i->charpos - pos); 618 } 619 } 620 else { 621 const UINTVAL len = str->strlen; 622 if (pos <= i->charpos + ((len - i->charpos) >> 1)) { 623 /* go forward from current */ 624 u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr + i->bytepos, pos - i->charpos); 625 } 626 else { 627 /* go backward from end */ 628 u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + str->bufused, len - pos); 629 } 630 } 631 632 i->charpos = pos; 633 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 634 } 635 636 /* 637 390 638 =item C<static UINTVAL utf8_decode_and_advance(PARROT_INTERP, String_iter *i)> 391 639 392 640 The UTF-8 implementation of the string iterator's C<get_and_advance> … … 513 761 { 514 762 ASSERT_ARGS(to_encoding) 515 763 STRING *result; 516 String_iter src_iter;517 UINTVAL offs,dest_len, dest_pos, src_len;764 const ENCODING *src_encoding; 765 UINTVAL dest_len, dest_pos, src_len; 518 766 unsigned char *p; 519 767 520 768 if (src->encoding == Parrot_utf8_encoding_ptr) … … 523 771 result = Parrot_gc_new_string_header(interp, 0); 524 772 src_len = src->strlen; 525 773 526 /* init iter before possilby changing encoding*/527 ENCODING_ITER_INIT(interp, src, &src_iter);774 /* save source encoding before possibly changing it */ 775 src_encoding = src->encoding; 528 776 result->charset = Parrot_unicode_charset_ptr; 529 777 result->encoding = Parrot_utf8_encoding_ptr; 530 778 result->strlen = src_len; … … 542 790 result->bufused = dest_len; 543 791 } 544 792 else { 793 String_iter src_iter; 794 STRING_ITER_INIT(interp, &src_iter); 545 795 dest_len = src_len; 546 796 dest_pos = 0; 547 for (offs = 0; offs < src_len; ++offs) {548 const UINTVAL c = src_ iter.get_and_advance(interp, &src_iter);797 while (src_iter.charpos < src_len) { 798 const UINTVAL c = src_encoding->iter_get_and_advance(interp, src, &src_iter); 549 799 unsigned char *new_pos; 550 800 unsigned char *pos; 551 801 552 802 if (dest_len - dest_pos < 6) { 553 UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);803 UINTVAL need = (UINTVAL)((src->strlen - src_iter.charpos + 1) * 1.5); 554 804 if (need < 16) 555 805 need = 16; 556 806 dest_len += need; … … 683 933 String_iter iter; 684 934 UINTVAL start; 685 935 686 iter_init(interp, src, &iter);936 STRING_ITER_INIT(interp, &iter); 687 937 688 938 if (offset) 689 iter.set_position(interp, &iter, offset);939 utf8_iter_set_position(interp, src, &iter, offset); 690 940 691 941 start = iter.bytepos; 692 942 return_string->strstart = (char *)return_string->strstart + start; 693 943 694 944 if (count) 695 iter.set_position(interp, &iter, offset + count);945 utf8_iter_set_position(interp, src, &iter, offset + count); 696 946 697 947 return_string->bufused = iter.bytepos - start; 698 948 return_string->strlen = count; … … 749 999 * this is used to initially calculate src->strlen, 750 1000 * therefore we must scan the whole string 751 1001 */ 752 iter_init(interp, src, &iter);1002 STRING_ITER_INIT(interp, &iter); 753 1003 while (iter.bytepos < src->bufused) 754 iter.get_and_advance(interp, &iter);1004 utf8_iter_get_and_advance(interp, src, &iter); 755 1005 return iter.charpos; 756 1006 } 757 1007 … … 825 1075 bytes, 826 1076 iter_init, 827 1077 find_cclass, 828 NULL 1078 NULL, 1079 utf8_iter_get, 1080 utf8_iter_skip, 1081 utf8_iter_get_and_advance, 1082 utf8_iter_set_and_advance, 1083 utf8_iter_set_position 829 1084 }; 830 1085 STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding); 831 1086 Parrot_register_encoding(interp, "utf8", return_encoding);
