Ticket #1456: string-iter-remove-old-api-v7.diff
File string-iter-remove-old-api-v7.diff, 33.1 KB (added by nwellnhof, 11 years ago) |
---|
-
include/parrot/encoding.h
diff --git a/include/parrot/encoding.h b/include/parrot/encoding.h index 5965ad6..0aa07f9 100644
a b 30 30 31 31 struct string_iterator_t; /* s. parrot/string.h */ 32 32 33 typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src,34 struct string_iterator_t *);35 33 typedef UINTVAL (*encoding_iter_get_t)( 36 34 PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset); 37 35 typedef void (*encoding_iter_skip_t)( … … 54 52 encoding_get_bytes_t get_bytes; 55 53 encoding_codepoints_t codepoints; 56 54 encoding_bytes_t bytes; 57 encoding_iter_init_t iter_init;58 55 encoding_find_cclass_t find_cclass; 59 56 encoding_hash_t hash; 60 57 encoding_iter_get_t iter_get; … … 224 221 ((src)->encoding)->codepoints((i), (src)) 225 222 #define ENCODING_BYTES(i, src) \ 226 223 ((src)->encoding)->bytes((i), (src)) 227 #define ENCODING_ITER_INIT(i, src, iter) \228 ((src)->encoding)->iter_init((i), (src), (iter))229 224 #define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \ 230 225 ((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end)) 231 226 #define ENCODING_HASH(i, src, seed) \ -
include/parrot/string.h
diff --git a/include/parrot/string.h b/include/parrot/string.h index d02f5c1..55df3c3 100644
a b 30 30 31 31 /* String iterator */ 32 32 typedef struct string_iterator_t { 33 const STRING *str;34 33 UINTVAL bytepos; 35 34 UINTVAL charpos; 36 UINTVAL (*get_and_advance)(PARROT_INTERP, struct string_iterator_t *i);37 void (*set_and_advance)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL c);38 void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos);39 35 } String_iter; 40 36 41 37 #define STRING_ITER_INIT(i, iter) \ -
src/string/encoding/fixed_8.c
diff --git a/src/string/encoding/fixed_8.c b/src/string/encoding/fixed_8.c index ec51147..be45421 100644
a b 41 41 __attribute__nonnull__(2) 42 42 __attribute__nonnull__(3); 43 43 44 static UINTVAL fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter))45 __attribute__nonnull__(1)46 __attribute__nonnull__(2)47 FUNC_MODIFIES(*iter);48 49 44 static UINTVAL fixed8_iter_get(PARROT_INTERP, 50 45 ARGIN(const STRING *str), 51 46 ARGIN(const String_iter *iter), … … 88 83 __attribute__nonnull__(3) 89 84 FUNC_MODIFIES(*iter); 90 85 91 static void fixed8_set_next(PARROT_INTERP,92 ARGMOD(String_iter *iter),93 UINTVAL c)94 __attribute__nonnull__(1)95 __attribute__nonnull__(2)96 FUNC_MODIFIES(*iter);97 98 static void fixed8_set_position(SHIM_INTERP,99 ARGMOD(String_iter *iter),100 UINTVAL pos)101 __attribute__nonnull__(2)102 FUNC_MODIFIES(*iter);103 104 86 static size_t fixed_8_hash(SHIM_INTERP, 105 87 ARGIN(const STRING *s), 106 88 size_t hashval) … … 137 119 __attribute__nonnull__(1) 138 120 __attribute__nonnull__(2); 139 121 140 static void iter_init(SHIM_INTERP,141 ARGIN(const STRING *src),142 ARGOUT(String_iter *iter))143 __attribute__nonnull__(2)144 __attribute__nonnull__(3)145 FUNC_MODIFIES(*iter);146 147 122 static void set_byte(PARROT_INTERP, 148 123 ARGIN(const STRING *src), 149 124 UINTVAL offset, … … 164 139 #define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 165 140 PARROT_ASSERT_ARG(s) \ 166 141 , PARROT_ASSERT_ARG(typetable)) 167 #define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\168 PARROT_ASSERT_ARG(interp) \169 , PARROT_ASSERT_ARG(iter))170 142 #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 171 143 PARROT_ASSERT_ARG(interp) \ 172 144 , PARROT_ASSERT_ARG(str) \ … … 185 157 #define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 186 158 PARROT_ASSERT_ARG(str) \ 187 159 , PARROT_ASSERT_ARG(iter)) 188 #define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\189 PARROT_ASSERT_ARG(interp) \190 , PARROT_ASSERT_ARG(iter))191 #define ASSERT_ARGS_fixed8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\192 PARROT_ASSERT_ARG(iter))193 160 #define ASSERT_ARGS_fixed_8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 194 161 PARROT_ASSERT_ARG(s)) 195 162 #define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 203 170 #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 204 171 PARROT_ASSERT_ARG(interp) \ 205 172 , PARROT_ASSERT_ARG(src)) 206 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\207 PARROT_ASSERT_ARG(src) \208 , PARROT_ASSERT_ARG(iter))209 173 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 210 174 PARROT_ASSERT_ARG(interp) \ 211 175 , PARROT_ASSERT_ARG(src)) … … 537 501 538 502 /* 539 503 540 =item C<static UINTVAL fixed8_get_next(PARROT_INTERP, String_iter *iter)>541 542 Moves the string iterator C<i> to the next codepoint.543 544 =cut545 546 */547 548 static UINTVAL549 fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter))550 {551 ASSERT_ARGS(fixed8_get_next)552 const UINTVAL c = get_byte(interp, iter->str, iter->charpos++);553 ++iter->bytepos;554 return c;555 }556 557 /*558 559 =item C<static void fixed8_set_next(PARROT_INTERP, String_iter *iter, UINTVAL560 c)>561 562 With the string iterator C<i>, appends the codepoint C<c> and advances to the563 next position in the string.564 565 =cut566 567 */568 569 static void570 fixed8_set_next(PARROT_INTERP, ARGMOD(String_iter *iter), UINTVAL c)571 {572 ASSERT_ARGS(fixed8_set_next)573 set_byte(interp, iter->str, iter->charpos++, c);574 ++iter->bytepos;575 }576 577 /*578 579 =item C<static void fixed8_set_position(PARROT_INTERP, String_iter *iter,580 UINTVAL pos)>581 582 Moves the string iterator C<i> to the position C<n> in the string.583 584 =cut585 586 */587 588 static void589 fixed8_set_position(SHIM_INTERP, ARGMOD(String_iter *iter), UINTVAL pos)590 {591 ASSERT_ARGS(fixed8_set_position)592 iter->bytepos = iter->charpos = pos;593 PARROT_ASSERT(pos <= Buffer_buflen(iter->str));594 }595 596 597 /*598 599 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter600 *iter)>601 602 Initializes for string C<src> the string iterator C<iter>.603 604 =cut605 606 */607 608 static void609 iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))610 {611 ASSERT_ARGS(iter_init)612 iter->str = src;613 iter->bytepos = iter->charpos = 0;614 iter->get_and_advance = fixed8_get_next;615 iter->set_and_advance = fixed8_set_next;616 iter->set_position = fixed8_set_position;617 }618 619 620 /*621 622 504 =item C<static size_t fixed_8_hash(PARROT_INTERP, const STRING *s, size_t 623 505 hashval)> 624 506 … … 671 553 get_bytes, 672 554 codepoints, 673 555 bytes, 674 iter_init,675 556 find_cclass, 676 557 fixed_8_hash, 677 558 fixed8_iter_get, -
src/string/encoding/ucs2.c
diff --git a/src/string/encoding/ucs2.c b/src/string/encoding/ucs2.c index 6e2ec93..2f91bd5 100644
a b 84 84 __attribute__nonnull__(1) 85 85 __attribute__nonnull__(2); 86 86 87 static void iter_init(PARROT_INTERP,88 ARGIN(const STRING *src),89 ARGOUT(String_iter *iter))90 __attribute__nonnull__(1)91 __attribute__nonnull__(2)92 __attribute__nonnull__(3)93 FUNC_MODIFIES(*iter);94 95 87 static void set_byte(PARROT_INTERP, 96 88 SHIM(const STRING *src), 97 89 SHIM(UINTVAL offset), … … 104 96 __attribute__nonnull__(1) 105 97 __attribute__nonnull__(2); 106 98 107 static UINTVAL ucs2_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))108 __attribute__nonnull__(2)109 FUNC_MODIFIES(*i);110 111 static void ucs2_encode_and_advance(SHIM_INTERP,112 ARGMOD(String_iter *i),113 UINTVAL c)114 __attribute__nonnull__(2)115 FUNC_MODIFIES(*i);116 117 99 static size_t ucs2_hash(PARROT_INTERP, 118 100 ARGIN(const STRING *s), 119 101 size_t hashval) … … 164 146 __attribute__nonnull__(3) 165 147 FUNC_MODIFIES(*i); 166 148 167 static void ucs2_set_position(SHIM_INTERP,168 ARGMOD(String_iter *i),169 UINTVAL n)170 __attribute__nonnull__(2)171 FUNC_MODIFIES(*i);172 173 149 #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 174 150 PARROT_ASSERT_ARG(src)) 175 151 #define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 189 165 #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 190 166 PARROT_ASSERT_ARG(interp) \ 191 167 , PARROT_ASSERT_ARG(src)) 192 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\193 PARROT_ASSERT_ARG(interp) \194 , PARROT_ASSERT_ARG(src) \195 , PARROT_ASSERT_ARG(iter))196 168 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 197 169 PARROT_ASSERT_ARG(interp)) 198 170 #define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 199 171 PARROT_ASSERT_ARG(interp) \ 200 172 , PARROT_ASSERT_ARG(src)) 201 #define ASSERT_ARGS_ucs2_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\202 PARROT_ASSERT_ARG(i))203 #define ASSERT_ARGS_ucs2_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\204 PARROT_ASSERT_ARG(i))205 173 #define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 206 174 PARROT_ASSERT_ARG(interp) \ 207 175 , PARROT_ASSERT_ARG(s)) … … 225 193 PARROT_ASSERT_ARG(interp) \ 226 194 , PARROT_ASSERT_ARG(str) \ 227 195 , PARROT_ASSERT_ARG(i)) 228 #define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\229 PARROT_ASSERT_ARG(i))230 196 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ 231 197 /* HEADERIZER END: static */ 232 198 … … 609 575 610 576 /* 611 577 612 =item C<static UINTVAL ucs2_decode_and_advance(PARROT_INTERP, String_iter *i)>613 614 Moves the string iterator C<i> to the next UCS-2 codepoint.615 616 =cut617 618 */619 620 static UINTVAL621 ucs2_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))622 {623 ASSERT_ARGS(ucs2_decode_and_advance)624 625 #if PARROT_HAS_ICU626 const UChar * const s = (const UChar*) i->str->strstart;627 size_t pos = i->bytepos / sizeof (UChar);628 629 /* TODO either make sure that we don't go past end or use SAFE630 * iter versions631 */632 const UChar c = s[pos++];633 ++i->charpos;634 i->bytepos = pos * sizeof (UChar);635 return c;636 #else637 /* This function must never be called if compiled without ICU.638 * See TT #557639 */640 PARROT_ASSERT(0);641 UNUSED(i);642 return (UINTVAL)0; /* Stop the static analyzers from panicing */643 #endif644 }645 646 /*647 648 =item C<static void ucs2_encode_and_advance(PARROT_INTERP, String_iter *i,649 UINTVAL c)>650 651 With the string iterator C<i>, appends the codepoint C<c> and advances to the652 next position in the string.653 654 =cut655 656 */657 658 static void659 ucs2_encode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL c)660 {661 ASSERT_ARGS(ucs2_encode_and_advance)662 663 #if PARROT_HAS_ICU664 UChar *s = (UChar*) i->str->strstart;665 UINTVAL pos = i->bytepos / sizeof (UChar);666 s[pos++] = (UChar)c;667 ++i->charpos;668 i->bytepos = pos * sizeof (UChar);669 #else670 /* This function must never be called if compiled without ICU.671 * See TT #557672 */673 UNUSED(i);674 UNUSED(c);675 PARROT_ASSERT(0);676 #endif677 }678 679 /*680 681 578 =item C<static size_t ucs2_hash(PARROT_INTERP, const STRING *s, size_t hashval)> 682 579 683 580 Returns the hashed value of the string, given a seed in hashval. … … 710 607 #endif 711 608 } 712 609 713 714 /*715 716 =item C<static void ucs2_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>717 718 Moves the string iterator C<i> to the position C<n> in the string.719 720 =cut721 722 */723 724 static void725 ucs2_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n)726 {727 ASSERT_ARGS(ucs2_set_position)728 729 #if PARROT_HAS_ICU730 i->charpos = n;731 i->bytepos = n * sizeof (UChar);732 #else733 /* This function must never be called if compiled without ICU.734 * See TT #557735 */736 UNUSED(i);737 UNUSED(n);738 PARROT_ASSERT(0);739 #endif740 }741 742 743 /*744 745 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter746 *iter)>747 748 Initializes for string C<src> the string iterator C<iter>.749 750 =cut751 752 */753 754 static void755 iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))756 {757 ASSERT_ARGS(iter_init)758 #if PARROT_HAS_ICU759 UNUSED(interp);760 iter->str = src;761 iter->bytepos = 0;762 iter->charpos = 0;763 iter->get_and_advance = ucs2_decode_and_advance;764 iter->set_and_advance = ucs2_encode_and_advance;765 iter->set_position = ucs2_set_position;766 #else767 UNUSED(src);768 UNUSED(iter);769 no_ICU_lib(interp);770 #endif771 }772 773 610 /* 774 611 775 612 =item C<void Parrot_encoding_ucs2_init(PARROT_INTERP)> … … 797 634 get_bytes, 798 635 codepoints, 799 636 bytes, 800 iter_init,801 637 find_cclass, 802 638 ucs2_hash, 803 639 ucs2_iter_get, -
src/string/encoding/ucs4.c
diff --git a/src/string/encoding/ucs4.c b/src/string/encoding/ucs4.c index e4d0409..462cc96 100644
a b 84 84 __attribute__nonnull__(1) 85 85 __attribute__nonnull__(2); 86 86 87 static void iter_init(PARROT_INTERP,88 ARGIN(const STRING *src),89 ARGOUT(String_iter *iter))90 __attribute__nonnull__(1)91 __attribute__nonnull__(2)92 __attribute__nonnull__(3)93 FUNC_MODIFIES(*iter);94 95 87 static void set_byte(PARROT_INTERP, 96 88 SHIM(const STRING *src), 97 89 SHIM(UINTVAL offset), … … 104 96 __attribute__nonnull__(1) 105 97 __attribute__nonnull__(2); 106 98 107 static UINTVAL ucs4_decode_and_advance(PARROT_INTERP,108 ARGMOD(String_iter *i))109 __attribute__nonnull__(1)110 __attribute__nonnull__(2)111 FUNC_MODIFIES(*i);112 113 static void ucs4_encode_and_advance(PARROT_INTERP,114 ARGMOD(String_iter *i),115 UINTVAL c)116 __attribute__nonnull__(1)117 __attribute__nonnull__(2)118 FUNC_MODIFIES(*i);119 120 99 static size_t ucs4_hash(PARROT_INTERP, 121 100 ARGIN(const STRING *s), 122 101 size_t hashval) … … 167 146 __attribute__nonnull__(3) 168 147 FUNC_MODIFIES(*i); 169 148 170 static void ucs4_set_position(PARROT_INTERP,171 ARGMOD(String_iter *i),172 UINTVAL n)173 __attribute__nonnull__(1)174 __attribute__nonnull__(2)175 FUNC_MODIFIES(*i);176 177 149 #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 178 150 PARROT_ASSERT_ARG(src)) 179 151 #define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 193 165 #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 194 166 PARROT_ASSERT_ARG(interp) \ 195 167 , PARROT_ASSERT_ARG(src)) 196 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\197 PARROT_ASSERT_ARG(interp) \198 , PARROT_ASSERT_ARG(src) \199 , PARROT_ASSERT_ARG(iter))200 168 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 201 169 PARROT_ASSERT_ARG(interp)) 202 170 #define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 203 171 PARROT_ASSERT_ARG(interp) \ 204 172 , PARROT_ASSERT_ARG(src)) 205 #define ASSERT_ARGS_ucs4_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\206 PARROT_ASSERT_ARG(interp) \207 , PARROT_ASSERT_ARG(i))208 #define ASSERT_ARGS_ucs4_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\209 PARROT_ASSERT_ARG(interp) \210 , PARROT_ASSERT_ARG(i))211 173 #define ASSERT_ARGS_ucs4_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 212 174 PARROT_ASSERT_ARG(interp) \ 213 175 , PARROT_ASSERT_ARG(s)) … … 231 193 PARROT_ASSERT_ARG(interp) \ 232 194 , PARROT_ASSERT_ARG(str) \ 233 195 , PARROT_ASSERT_ARG(i)) 234 #define ASSERT_ARGS_ucs4_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\235 PARROT_ASSERT_ARG(interp) \236 , PARROT_ASSERT_ARG(i))237 196 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ 238 197 /* HEADERIZER END: static */ 239 198 … … 611 570 #endif 612 571 } 613 572 614 /*615 616 =item C<static UINTVAL ucs4_decode_and_advance(PARROT_INTERP, String_iter *i)>617 618 Moves the string iterator C<i> to the next UCS-4 codepoint.619 620 =cut621 622 */623 624 static UINTVAL625 ucs4_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))626 {627 ASSERT_ARGS(ucs4_decode_and_advance)628 #if PARROT_HAS_ICU629 const UChar32 * const s = (const UChar32 *) i->str->strstart;630 size_t pos = i->bytepos / sizeof (UChar32);631 const UChar32 c = s[pos++];632 ++i->charpos;633 i->bytepos = pos * sizeof (UChar32);634 return c;635 #else636 UNUSED(i);637 no_ICU_lib(interp);638 #endif639 }640 641 /*642 643 =item C<static void ucs4_encode_and_advance(PARROT_INTERP, String_iter *i,644 UINTVAL c)>645 646 With the string iterator C<i>, appends the codepoint C<c> and advances to the647 next position in the string.648 649 =cut650 651 */652 653 static void654 ucs4_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)655 {656 ASSERT_ARGS(ucs4_encode_and_advance)657 #if PARROT_HAS_ICU658 UChar32 *s = (UChar32 *) i->str->strstart;659 size_t pos = i->bytepos / sizeof (UChar32);660 s[pos++] = (UChar32) c;661 ++i->charpos;662 i->bytepos = pos * sizeof (UChar32);663 #else664 UNUSED(i);665 no_ICU_lib(interp);666 #endif667 }668 669 573 #if PARROT_HAS_ICU 670 574 /* 671 575 … … 696 600 697 601 /* 698 602 699 =item C<static void ucs4_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>700 701 Moves the string iterator C<i> to the position C<n> in the string.702 703 =cut704 705 */706 707 static void708 ucs4_set_position(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL n)709 {710 ASSERT_ARGS(ucs4_set_position)711 #if PARROT_HAS_ICU712 i->charpos = n;713 i->bytepos = n * sizeof (UChar32);714 #else715 UNUSED(i);716 UNUSED(n);717 no_ICU_lib(interp);718 #endif719 }720 721 722 /*723 724 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter725 *iter)>726 727 Initializes for string C<src> the string iterator C<iter>.728 729 =cut730 731 */732 733 static void734 iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))735 {736 ASSERT_ARGS(iter_init)737 #if PARROT_HAS_ICU738 UNUSED(interp);739 iter->str = src;740 iter->bytepos = 0;741 iter->charpos = 0;742 iter->get_and_advance = ucs4_decode_and_advance;743 iter->set_and_advance = ucs4_encode_and_advance;744 iter->set_position = ucs4_set_position;745 #else746 UNUSED(src);747 UNUSED(iter);748 no_ICU_lib(interp);749 #endif750 }751 752 /*753 754 603 =item C<void Parrot_encoding_ucs4_init(PARROT_INTERP)> 755 604 756 605 Initializes the UCS-4 encoding. … … 776 625 get_bytes, 777 626 codepoints, 778 627 bytes, 779 iter_init,780 628 find_cclass, 781 629 #if PARROT_HAS_ICU 782 630 ucs4_hash, -
src/string/encoding/utf16.c
diff --git a/src/string/encoding/utf16.c b/src/string/encoding/utf16.c index 2570de5..d43bcdf 100644
a b 75 75 __attribute__nonnull__(1) 76 76 __attribute__nonnull__(2); 77 77 78 static void iter_init(PARROT_INTERP,79 ARGIN(const STRING *src),80 ARGOUT(String_iter *iter))81 __attribute__nonnull__(1)82 __attribute__nonnull__(2)83 __attribute__nonnull__(3)84 FUNC_MODIFIES(*iter);85 86 78 static void set_byte(PARROT_INTERP, 87 79 ARGIN(const STRING *src), 88 80 UINTVAL offset, … … 96 88 __attribute__nonnull__(1) 97 89 __attribute__nonnull__(2); 98 90 99 PARROT_WARN_UNUSED_RESULT100 static UINTVAL utf16_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))101 __attribute__nonnull__(2)102 FUNC_MODIFIES(*i);103 104 static void utf16_encode_and_advance(SHIM_INTERP,105 ARGMOD(String_iter *i),106 UINTVAL c)107 __attribute__nonnull__(2)108 FUNC_MODIFIES(*i);109 110 91 static UINTVAL utf16_iter_get(PARROT_INTERP, 111 92 ARGIN(const STRING *str), 112 93 ARGIN(const String_iter *i), … … 152 133 __attribute__nonnull__(3) 153 134 FUNC_MODIFIES(*i); 154 135 155 static void utf16_set_position(SHIM_INTERP,156 ARGMOD(String_iter *i),157 UINTVAL n)158 __attribute__nonnull__(2)159 FUNC_MODIFIES(*i);160 161 136 #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 162 137 PARROT_ASSERT_ARG(src)) 163 138 #define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 178 153 #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 179 154 PARROT_ASSERT_ARG(interp) \ 180 155 , PARROT_ASSERT_ARG(src)) 181 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\182 PARROT_ASSERT_ARG(interp) \183 , PARROT_ASSERT_ARG(src) \184 , PARROT_ASSERT_ARG(iter))185 156 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 186 157 PARROT_ASSERT_ARG(interp) \ 187 158 , PARROT_ASSERT_ARG(src)) 188 159 #define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 189 160 PARROT_ASSERT_ARG(interp) \ 190 161 , PARROT_ASSERT_ARG(src)) 191 #define ASSERT_ARGS_utf16_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\192 PARROT_ASSERT_ARG(i))193 #define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\194 PARROT_ASSERT_ARG(i))195 162 #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 196 163 PARROT_ASSERT_ARG(interp) \ 197 164 , PARROT_ASSERT_ARG(str) \ … … 212 179 PARROT_ASSERT_ARG(interp) \ 213 180 , PARROT_ASSERT_ARG(str) \ 214 181 , PARROT_ASSERT_ARG(i)) 215 #define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\216 PARROT_ASSERT_ARG(i))217 182 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ 218 183 /* HEADERIZER END: static */ 219 184 … … 726 691 #endif 727 692 } 728 693 729 #if PARROT_HAS_ICU730 /*731 732 =item C<static UINTVAL utf16_decode_and_advance(PARROT_INTERP, String_iter *i)>733 734 Moves the string iterator C<i> to the next UTF-16 codepoint.735 736 =cut737 738 */739 740 PARROT_WARN_UNUSED_RESULT741 static UINTVAL742 utf16_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))743 {744 ASSERT_ARGS(utf16_decode_and_advance)745 const UChar * const s = (const UChar*) i->str->strstart;746 UINTVAL pos = i->bytepos / sizeof (UChar);747 UINTVAL c;748 749 /* TODO either make sure that we don't go past end or use SAFE750 * iter versions751 */752 U16_NEXT_UNSAFE(s, pos, c);753 ++i->charpos;754 i->bytepos = pos * sizeof (UChar);755 return c;756 }757 758 /*759 760 =item C<static void utf16_encode_and_advance(PARROT_INTERP, String_iter *i,761 UINTVAL c)>762 763 With the string iterator C<i>, appends the codepoint C<c> and advances to the764 next position in the string.765 766 =cut767 768 */769 770 static void771 utf16_encode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL c)772 {773 ASSERT_ARGS(utf16_encode_and_advance)774 UChar * const s = (UChar*) i->str->strstart;775 UINTVAL pos = i->bytepos / sizeof (UChar);776 U16_APPEND_UNSAFE(s, pos, c);777 ++i->charpos;778 i->bytepos = pos * sizeof (UChar);779 }780 781 /*782 783 =item C<static void utf16_set_position(PARROT_INTERP, String_iter *i, UINTVAL784 n)>785 786 Moves the string iterator C<i> to the position C<n> in the string.787 788 =cut789 790 */791 792 static void793 utf16_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n)794 {795 ASSERT_ARGS(utf16_set_position)796 UChar * const s = (UChar*) i->str->strstart;797 UINTVAL pos;798 pos = 0;799 U16_FWD_N_UNSAFE(s, pos, n);800 i->charpos = n;801 i->bytepos = pos * sizeof (UChar);802 }803 804 #endif805 806 /*807 808 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter809 *iter)>810 811 Initializes for string C<src> the string iterator C<iter>.812 813 =cut814 815 */816 817 static void818 iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))819 {820 ASSERT_ARGS(iter_init)821 iter->str = src;822 iter->bytepos = iter->charpos = 0;823 #if PARROT_HAS_ICU824 UNUSED(interp);825 iter->get_and_advance = utf16_decode_and_advance;826 iter->set_and_advance = utf16_encode_and_advance;827 iter->set_position = utf16_set_position;828 #else829 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,830 "no ICU lib loaded");831 #endif832 }833 834 694 /* 835 695 836 696 =item C<void Parrot_encoding_utf16_init(PARROT_INTERP)> … … 858 718 get_bytes, 859 719 codepoints, 860 720 bytes, 861 iter_init,862 721 find_cclass, 863 722 NULL, 864 723 utf16_iter_get, -
src/string/encoding/utf8.c
diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index 811ce76..b07cadb 100644
a b 71 71 __attribute__nonnull__(1) 72 72 __attribute__nonnull__(2); 73 73 74 static void iter_init(SHIM_INTERP,75 ARGIN(const STRING *src),76 ARGOUT(String_iter *iter))77 __attribute__nonnull__(2)78 __attribute__nonnull__(3)79 FUNC_MODIFIES(*iter);80 81 74 static void set_byte(PARROT_INTERP, 82 75 ARGIN(const STRING *src), 83 76 UINTVAL offset, … … 100 93 __attribute__nonnull__(1) 101 94 __attribute__nonnull__(2); 102 95 103 static UINTVAL utf8_decode_and_advance(PARROT_INTERP,104 ARGMOD(String_iter *i))105 __attribute__nonnull__(1)106 __attribute__nonnull__(2)107 FUNC_MODIFIES(*i);108 109 96 PARROT_CANNOT_RETURN_NULL 110 97 static void * utf8_encode(PARROT_INTERP, ARGIN(void *ptr), UINTVAL c) 111 98 __attribute__nonnull__(1) 112 99 __attribute__nonnull__(2); 113 100 114 static void utf8_encode_and_advance(PARROT_INTERP,115 ARGMOD(String_iter *i),116 UINTVAL c)117 __attribute__nonnull__(1)118 __attribute__nonnull__(2)119 FUNC_MODIFIES(*i);120 121 101 static UINTVAL utf8_iter_get(PARROT_INTERP, 122 102 ARGIN(const STRING *str), 123 103 ARGIN(const String_iter *i), … … 160 140 __attribute__nonnull__(3) 161 141 FUNC_MODIFIES(*i); 162 142 163 static void utf8_set_position(SHIM_INTERP,164 ARGMOD(String_iter *i),165 UINTVAL pos)166 __attribute__nonnull__(2)167 FUNC_MODIFIES(*i);168 169 143 PARROT_WARN_UNUSED_RESULT 170 144 PARROT_CANNOT_RETURN_NULL 171 145 static const void * utf8_skip_backward(ARGIN(const void *ptr), UINTVAL n) … … 193 167 #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 194 168 PARROT_ASSERT_ARG(interp) \ 195 169 , PARROT_ASSERT_ARG(src)) 196 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\197 PARROT_ASSERT_ARG(src) \198 , PARROT_ASSERT_ARG(iter))199 170 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 200 171 PARROT_ASSERT_ARG(interp) \ 201 172 , PARROT_ASSERT_ARG(src)) … … 208 179 #define ASSERT_ARGS_utf8_decode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 209 180 PARROT_ASSERT_ARG(interp) \ 210 181 , PARROT_ASSERT_ARG(ptr)) 211 #define ASSERT_ARGS_utf8_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\212 PARROT_ASSERT_ARG(interp) \213 , PARROT_ASSERT_ARG(i))214 182 #define ASSERT_ARGS_utf8_encode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 215 183 PARROT_ASSERT_ARG(interp) \ 216 184 , PARROT_ASSERT_ARG(ptr)) 217 #define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\218 PARROT_ASSERT_ARG(interp) \219 , PARROT_ASSERT_ARG(i))220 185 #define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 221 186 PARROT_ASSERT_ARG(interp) \ 222 187 , PARROT_ASSERT_ARG(str) \ … … 235 200 #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 236 201 PARROT_ASSERT_ARG(str) \ 237 202 , PARROT_ASSERT_ARG(i)) 238 #define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\239 PARROT_ASSERT_ARG(i))240 203 #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 241 204 PARROT_ASSERT_ARG(ptr)) 242 205 #define ASSERT_ARGS_utf8_skip_forward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 633 596 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 634 597 } 635 598 636 /*637 638 =item C<static UINTVAL utf8_decode_and_advance(PARROT_INTERP, String_iter *i)>639 640 The UTF-8 implementation of the string iterator's C<get_and_advance>641 function.642 643 =cut644 645 */646 647 static UINTVAL648 utf8_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))649 {650 ASSERT_ARGS(utf8_decode_and_advance)651 const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos);652 UINTVAL c = *u8ptr;653 654 if (UTF8_IS_START(c)) {655 UINTVAL len = UTF8SKIP(u8ptr);656 657 c &= UTF8_START_MASK(len);658 i->bytepos += len;659 for (--len; len; --len) {660 ++u8ptr;661 662 if (!UTF8_IS_CONTINUATION(*u8ptr))663 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,664 "Malformed UTF-8 string\n");665 666 c = UTF8_ACCUMULATE(c, *u8ptr);667 }668 669 if (UNICODE_IS_SURROGATE(c))670 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,671 "Surrogate in UTF-8 string\n");672 }673 else if (!UNICODE_IS_INVARIANT(c)) {674 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,675 "Malformed UTF-8 string\n");676 }677 else {678 ++i->bytepos;679 }680 681 ++i->charpos;682 return c;683 }684 685 /*686 687 =item C<static void utf8_encode_and_advance(PARROT_INTERP, String_iter *i,688 UINTVAL c)>689 690 The UTF-8 implementation of the string iterator's C<set_and_advance>691 function.692 693 =cut694 695 */696 697 static void698 utf8_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)699 {700 ASSERT_ARGS(utf8_encode_and_advance)701 const STRING * const s = i->str;702 unsigned char * const pos = (unsigned char *)s->strstart + i->bytepos;703 unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c);704 705 i->bytepos += (new_pos - pos);706 /* XXX possible buffer overrun exception? */707 PARROT_ASSERT(i->bytepos <= Buffer_buflen(s));708 ++i->charpos;709 }710 711 /*712 713 =item C<static void utf8_set_position(PARROT_INTERP, String_iter *i, UINTVAL714 pos)>715 716 The UTF-8 implementation of the string iterator's C<set_position>717 function.718 719 =cut720 721 */722 723 static void724 utf8_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL pos)725 {726 ASSERT_ARGS(utf8_set_position)727 const utf8_t *u8ptr = (const utf8_t *)i->str->strstart;728 729 /* start from last known charpos, if we can */730 if (i->charpos <= pos) {731 const UINTVAL old_pos = pos;732 pos -= i->charpos;733 u8ptr += i->bytepos;734 i->charpos = old_pos;735 }736 else737 i->charpos = pos;738 739 while (pos-- > 0)740 u8ptr += UTF8SKIP(u8ptr);741 742 i->bytepos = (const char *)u8ptr - (const char *)i->str->strstart;743 }744 745 599 746 600 /* 747 601 … … 1025 879 1026 880 /* 1027 881 1028 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter1029 *iter)>1030 1031 Initializes for string C<src> the string iterator C<iter>.1032 1033 =cut1034 1035 */1036 1037 static void1038 iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))1039 {1040 ASSERT_ARGS(iter_init)1041 iter->str = src;1042 iter->bytepos = 0;1043 iter->charpos = 0;1044 iter->get_and_advance = utf8_decode_and_advance;1045 iter->set_and_advance = utf8_encode_and_advance;1046 iter->set_position = utf8_set_position;1047 }1048 1049 /*1050 1051 882 =item C<void Parrot_encoding_utf8_init(PARROT_INTERP)> 1052 883 1053 884 Initializes the UTF-8 encoding. … … 1073 904 get_bytes, 1074 905 codepoints, 1075 906 bytes, 1076 iter_init,1077 907 find_cclass, 1078 908 NULL, 1079 909 utf8_iter_get,